From 19700b860d9ec70d01e885d92c3d4f62fd052873 Mon Sep 17 00:00:00 2001 From: Igor Pashev Date: Tue, 11 Sep 2012 19:12:10 +0400 Subject: Imported Upstream version 20060131 --- README | 35 + ReleaseNotes | 11 + usr/src/OPENSOLARIS.LICENSE | 377 +++ usr/src/harness/Makefile-os | 123 + usr/src/libm/inc/complex.h | 121 + usr/src/libm/inc/fenv.h | 246 ++ usr/src/libm/inc/floatingpoint.h | 202 ++ usr/src/libm/inc/iso/math_c99.h | 527 ++++ usr/src/libm/inc/iso/math_iso.h | 231 ++ usr/src/libm/inc/math.h | 350 +++ usr/src/libm/inc/sys/ieeefp.h | 117 + usr/src/libm/inc/tgmath.h | 170 ++ usr/src/libm/src/C/_SVID_error.c | 978 ++++++ usr/src/libm/src/C/_TBL_atan.c | 137 + usr/src/libm/src/C/_TBL_exp2.c | 78 + usr/src/libm/src/C/_TBL_ipio2.c | 86 + usr/src/libm/src/C/_TBL_log.c | 298 ++ usr/src/libm/src/C/_TBL_log2.c | 120 + usr/src/libm/src/C/_TBL_sin.c | 798 +++++ usr/src/libm/src/C/_TBL_tan.c | 84 + usr/src/libm/src/C/__cos.c | 126 + usr/src/libm/src/C/__lgamma.c | 268 ++ usr/src/libm/src/C/__libx_errno.c | 33 + usr/src/libm/src/C/__rem_pio2.c | 167 + usr/src/libm/src/C/__rem_pio2m.c | 362 +++ usr/src/libm/src/C/__sin.c | 128 + usr/src/libm/src/C/__sincos.c | 163 + usr/src/libm/src/C/__tan.c | 194 ++ usr/src/libm/src/C/__xpg6.c | 53 + usr/src/libm/src/C/_lib_version.c | 37 + usr/src/libm/src/C/acos.c | 162 + usr/src/libm/src/C/acosh.c | 105 + usr/src/libm/src/C/asin.c | 168 ++ usr/src/libm/src/C/asinh.c | 96 + usr/src/libm/src/C/atan.c | 197 ++ usr/src/libm/src/C/atan2.c | 498 +++ usr/src/libm/src/C/atan2pi.c | 50 + usr/src/libm/src/C/atanh.c | 69 + usr/src/libm/src/C/cbrt.c | 283 ++ usr/src/libm/src/C/ceil.c | 64 + usr/src/libm/src/C/copysign.c | 42 + usr/src/libm/src/C/cos.c | 222 ++ usr/src/libm/src/C/cosh.c | 89 + usr/src/libm/src/C/erf.c | 435 +++ usr/src/libm/src/C/exp.c | 356 +++ usr/src/libm/src/C/exp10.c | 109 + usr/src/libm/src/C/exp2.c | 87 + usr/src/libm/src/C/expm1.c | 270 ++ usr/src/libm/src/C/fabs.c | 51 + usr/src/libm/src/C/floor.c | 64 + usr/src/libm/src/C/fmod.c | 126 + usr/src/libm/src/C/gamma.c | 51 + usr/src/libm/src/C/gamma_r.c | 35 + usr/src/libm/src/C/hypot.c | 211 ++ usr/src/libm/src/C/ilogb.c | 93 + usr/src/libm/src/C/isnan.c | 47 + usr/src/libm/src/C/j0.c | 311 ++ usr/src/libm/src/C/j1.c | 329 ++ usr/src/libm/src/C/jn.c | 279 ++ usr/src/libm/src/C/lgamma.c | 51 + usr/src/libm/src/C/lgamma_r.c | 49 + usr/src/libm/src/C/libm.h | 203 ++ usr/src/libm/src/C/libm_macros.h | 76 + usr/src/libm/src/C/libm_protos.h | 217 ++ usr/src/libm/src/C/libm_synonyms.h | 748 +++++ usr/src/libm/src/C/libm_thread.h | 43 + usr/src/libm/src/C/libmv1.c | 661 ++++ usr/src/libm/src/C/log.c | 219 ++ usr/src/libm/src/C/log10.c | 217 ++ usr/src/libm/src/C/log1p.c | 201 ++ usr/src/libm/src/C/log2.c | 226 ++ usr/src/libm/src/C/logb.c | 84 + usr/src/libm/src/C/matherr.c | 37 + usr/src/libm/src/C/nextafter.c | 85 + usr/src/libm/src/C/pow.c | 342 +++ usr/src/libm/src/C/remainder.c | 86 + usr/src/libm/src/C/rint.c | 72 + usr/src/libm/src/C/scalb.c | 72 + usr/src/libm/src/C/scalbn.c | 119 + usr/src/libm/src/C/signgam.c | 34 + usr/src/libm/src/C/significand.c | 49 + usr/src/libm/src/C/sin.c | 188 ++ usr/src/libm/src/C/sincos.c | 367 +++ usr/src/libm/src/C/sincospi.c | 197 ++ usr/src/libm/src/C/sinh.c | 78 + usr/src/libm/src/C/sqrt.c | 149 + usr/src/libm/src/C/tan.c | 75 + usr/src/libm/src/C/tanh.c | 100 + usr/src/libm/src/C/xpg6.h | 67 + usr/src/libm/src/LD/_TBL_cosl.c | 194 ++ usr/src/libm/src/LD/_TBL_ipio2l.c | 503 ++++ usr/src/libm/src/LD/_TBL_sinl.c | 194 ++ usr/src/libm/src/LD/_TBL_tanl.c | 194 ++ usr/src/libm/src/LD/__cosl.c | 143 + usr/src/libm/src/LD/__lgammal.c | 395 +++ usr/src/libm/src/LD/__poly_libmq.c | 40 + usr/src/libm/src/LD/__rem_pio2l.c | 77 + usr/src/libm/src/LD/__sincosl.c | 151 + usr/src/libm/src/LD/__sinl.c | 145 + usr/src/libm/src/LD/__tanl.c | 168 ++ usr/src/libm/src/LD/acoshl.c | 56 + usr/src/libm/src/LD/asinhl.c | 58 + usr/src/libm/src/LD/atan2pil.c | 52 + usr/src/libm/src/LD/atanhl.c | 73 + usr/src/libm/src/LD/cbrtl.c | 73 + usr/src/libm/src/LD/coshl.c | 107 + usr/src/libm/src/LD/cosl.c | 105 + usr/src/libm/src/LD/erfl.c | 347 +++ usr/src/libm/src/LD/finitel.c | 51 + usr/src/libm/src/LD/gammal.c | 47 + usr/src/libm/src/LD/gammal_r.c | 41 + usr/src/libm/src/LD/hypotl.c | 146 + usr/src/libm/src/LD/isnanl.c | 53 + usr/src/libm/src/LD/j0l.c | 731 +++++ usr/src/libm/src/LD/j1l.c | 731 +++++ usr/src/libm/src/LD/jnl.c | 266 ++ usr/src/libm/src/LD/lgammal.c | 48 + usr/src/libm/src/LD/lgammal_r.c | 41 + usr/src/libm/src/LD/log1pl.c | 64 + usr/src/libm/src/LD/logbl.c | 82 + usr/src/libm/src/LD/longdouble.h | 155 + usr/src/libm/src/LD/nextafterl.c | 118 + usr/src/libm/src/LD/scalbl.c | 69 + usr/src/libm/src/LD/signgaml.c | 35 + usr/src/libm/src/LD/significandl.c | 41 + usr/src/libm/src/LD/sincosl.c | 112 + usr/src/libm/src/LD/sincospil.c | 205 ++ usr/src/libm/src/LD/sinhl.c | 86 + usr/src/libm/src/LD/sinl.c | 107 + usr/src/libm/src/LD/sinpil.c | 172 ++ usr/src/libm/src/LD/tanhl.c | 97 + usr/src/libm/src/LD/tanl.c | 96 + usr/src/libm/src/Q/_TBL_atanl.c | 234 ++ usr/src/libm/src/Q/_TBL_cosl.c | 191 ++ usr/src/libm/src/Q/_TBL_expl.c | 104 + usr/src/libm/src/Q/_TBL_expm1l.c | 367 +++ usr/src/libm/src/Q/_TBL_ipio2l.c | 503 ++++ usr/src/libm/src/Q/_TBL_logl.c | 168 ++ usr/src/libm/src/Q/_TBL_sinl.c | 191 ++ usr/src/libm/src/Q/_TBL_tanl.c | 191 ++ usr/src/libm/src/Q/__cosl.c | 137 + usr/src/libm/src/Q/__lgammal.c | 394 +++ usr/src/libm/src/Q/__poly_libmq.c | 40 + usr/src/libm/src/Q/__rem_pio2l.c | 84 + usr/src/libm/src/Q/__sincosl.c | 144 + usr/src/libm/src/Q/__sinl.c | 142 + usr/src/libm/src/Q/__tanl.c | 162 + usr/src/libm/src/Q/acoshl.c | 56 + usr/src/libm/src/Q/acosl.c | 66 + usr/src/libm/src/Q/asinhl.c | 58 + usr/src/libm/src/Q/asinl.c | 76 + usr/src/libm/src/Q/atan2l.c | 157 + usr/src/libm/src/Q/atan2pil.c | 42 + usr/src/libm/src/Q/atanhl.c | 60 + usr/src/libm/src/Q/atanl.c | 208 ++ usr/src/libm/src/Q/cbrtl.c | 68 + usr/src/libm/src/Q/copysignl.c | 42 + usr/src/libm/src/Q/coshl.c | 104 + usr/src/libm/src/Q/cosl.c | 92 + usr/src/libm/src/Q/erfl.c | 365 +++ usr/src/libm/src/Q/exp10l.c | 102 + usr/src/libm/src/Q/exp2l.c | 79 + usr/src/libm/src/Q/expl.c | 125 + usr/src/libm/src/Q/expm1l.c | 184 ++ usr/src/libm/src/Q/fabsl.c | 39 + usr/src/libm/src/Q/finitel.c | 51 + usr/src/libm/src/Q/floorl.c | 68 + usr/src/libm/src/Q/fmodl.c | 274 ++ usr/src/libm/src/Q/gammal.c | 47 + usr/src/libm/src/Q/gammal_r.c | 41 + usr/src/libm/src/Q/hypotl.c | 150 + usr/src/libm/src/Q/ieee_funcl.c | 112 + usr/src/libm/src/Q/ilogbl.c | 86 + usr/src/libm/src/Q/isnanl.c | 53 + usr/src/libm/src/Q/j0l.c | 736 +++++ usr/src/libm/src/Q/j1l.c | 732 +++++ usr/src/libm/src/Q/jnl.c | 269 ++ usr/src/libm/src/Q/lgammal.c | 47 + usr/src/libm/src/Q/lgammal_r.c | 41 + usr/src/libm/src/Q/log10l.c | 108 + usr/src/libm/src/Q/log1pl.c | 216 ++ usr/src/libm/src/Q/log2l.c | 64 + usr/src/libm/src/Q/logbl.c | 82 + usr/src/libm/src/Q/logl.c | 172 ++ usr/src/libm/src/Q/longdouble.h | 155 + usr/src/libm/src/Q/nextafterl.c | 118 + usr/src/libm/src/Q/powl.c | 319 ++ usr/src/libm/src/Q/remainderl.c | 86 + usr/src/libm/src/Q/rintl.c | 73 + usr/src/libm/src/Q/rndintl.c | 114 + usr/src/libm/src/Q/scalbl.c | 67 + usr/src/libm/src/Q/scalbnl.c | 84 + usr/src/libm/src/Q/signgaml.c | 33 + usr/src/libm/src/Q/significandl.c | 41 + usr/src/libm/src/Q/sincosl.c | 97 + usr/src/libm/src/Q/sincospil.c | 194 ++ usr/src/libm/src/Q/sinhl.c | 88 + usr/src/libm/src/Q/sinl.c | 91 + usr/src/libm/src/Q/sinpil.c | 168 ++ usr/src/libm/src/Q/sqrtl.c | 478 +++ usr/src/libm/src/Q/tanhl.c | 97 + usr/src/libm/src/Q/tanl.c | 80 + usr/src/libm/src/R/_TBL_r_atan_.c | 74 + usr/src/libm/src/R/__cosf.c | 85 + usr/src/libm/src/R/__sincosf.c | 100 + usr/src/libm/src/R/__sinf.c | 83 + usr/src/libm/src/R/__tanf.c | 95 + usr/src/libm/src/R/acosf.c | 42 + usr/src/libm/src/R/acoshf.c | 42 + usr/src/libm/src/R/asinf.c | 42 + usr/src/libm/src/R/asinhf.c | 40 + usr/src/libm/src/R/atan2f.c | 343 +++ usr/src/libm/src/R/atan2pif.c | 51 + usr/src/libm/src/R/atanf.c | 195 ++ usr/src/libm/src/R/atanhf.c | 44 + usr/src/libm/src/R/besself.c | 806 +++++ usr/src/libm/src/R/cbrtf.c | 41 + usr/src/libm/src/R/copysignf.c | 41 + usr/src/libm/src/R/cosf.c | 146 + usr/src/libm/src/R/coshf.c | 49 + usr/src/libm/src/R/erff.c | 68 + usr/src/libm/src/R/exp10f.c | 41 + usr/src/libm/src/R/exp2f.c | 41 + usr/src/libm/src/R/expf.c | 400 +++ usr/src/libm/src/R/expm1f.c | 41 + usr/src/libm/src/R/fabsf.c | 37 + usr/src/libm/src/R/floorf.c | 110 + usr/src/libm/src/R/fmodf.c | 175 ++ usr/src/libm/src/R/gammaf.c | 35 + usr/src/libm/src/R/gammaf_r.c | 35 + usr/src/libm/src/R/hypotf.c | 63 + usr/src/libm/src/R/ilogbf.c | 89 + usr/src/libm/src/R/isnanf.c | 39 + usr/src/libm/src/R/lgammaf.c | 43 + usr/src/libm/src/R/lgammaf_r.c | 37 + usr/src/libm/src/R/log10f.c | 54 + usr/src/libm/src/R/log1pf.c | 51 + usr/src/libm/src/R/log2f.c | 41 + usr/src/libm/src/R/logbf.c | 86 + usr/src/libm/src/R/logf.c | 147 + usr/src/libm/src/R/nextafterf.c | 80 + usr/src/libm/src/R/powf.c | 287 ++ usr/src/libm/src/R/remainderf.c | 46 + usr/src/libm/src/R/rintf.c | 165 + usr/src/libm/src/R/scalbf.c | 59 + usr/src/libm/src/R/scalbnf.c | 96 + usr/src/libm/src/R/signgamf.c | 33 + usr/src/libm/src/R/significandf.c | 47 + usr/src/libm/src/R/sincosf.c | 185 ++ usr/src/libm/src/R/sincospif.c | 48 + usr/src/libm/src/R/sinf.c | 149 + usr/src/libm/src/R/sinhf.c | 50 + usr/src/libm/src/R/sqrtf.c | 108 + usr/src/libm/src/R/tanf.c | 157 + usr/src/libm/src/R/tanhf.c | 41 + usr/src/libm/src/complex/cabs.c | 182 ++ usr/src/libm/src/complex/cabsf.c | 37 + usr/src/libm/src/complex/cabsl.c | 37 + usr/src/libm/src/complex/cacos.c | 403 +++ usr/src/libm/src/complex/cacosf.c | 45 + usr/src/libm/src/complex/cacosh.c | 69 + usr/src/libm/src/complex/cacoshf.c | 47 + usr/src/libm/src/complex/cacoshl.c | 67 + usr/src/libm/src/complex/cacosl.c | 270 ++ usr/src/libm/src/complex/carg.c | 52 + usr/src/libm/src/complex/cargf.c | 37 + usr/src/libm/src/complex/cargl.c | 37 + usr/src/libm/src/complex/casin.c | 378 +++ usr/src/libm/src/complex/casinf.c | 45 + usr/src/libm/src/complex/casinh.c | 51 + usr/src/libm/src/complex/casinhf.c | 44 + usr/src/libm/src/complex/casinhl.c | 44 + usr/src/libm/src/complex/casinl.c | 230 ++ usr/src/libm/src/complex/catan.c | 291 ++ usr/src/libm/src/complex/catanf.c | 137 + usr/src/libm/src/complex/catanh.c | 56 + usr/src/libm/src/complex/catanhf.c | 47 + usr/src/libm/src/complex/catanhl.c | 47 + usr/src/libm/src/complex/catanl.c | 327 ++ usr/src/libm/src/complex/ccos.c | 54 + usr/src/libm/src/complex/ccosf.c | 43 + usr/src/libm/src/complex/ccosh.c | 134 + usr/src/libm/src/complex/ccoshf.c | 99 + usr/src/libm/src/complex/ccoshl.c | 90 + usr/src/libm/src/complex/ccosl.c | 43 + usr/src/libm/src/complex/cexp.c | 115 + usr/src/libm/src/complex/cexpf.c | 95 + usr/src/libm/src/complex/cexpl.c | 86 + usr/src/libm/src/complex/cimag.c | 37 + usr/src/libm/src/complex/cimagf.c | 37 + usr/src/libm/src/complex/cimagl.c | 37 + usr/src/libm/src/complex/clog.c | 133 + usr/src/libm/src/complex/clogf.c | 81 + usr/src/libm/src/complex/clogl.c | 103 + usr/src/libm/src/complex/complex_wrapper.h | 176 ++ usr/src/libm/src/complex/conj.c | 38 + usr/src/libm/src/complex/conjf.c | 38 + usr/src/libm/src/complex/conjl.c | 38 + usr/src/libm/src/complex/cpow.c | 333 ++ usr/src/libm/src/complex/cpowf.c | 168 ++ usr/src/libm/src/complex/cpowl.c | 278 ++ usr/src/libm/src/complex/cproj.c | 68 + usr/src/libm/src/complex/cprojf.c | 57 + usr/src/libm/src/complex/cprojl.c | 57 + usr/src/libm/src/complex/creal.c | 37 + usr/src/libm/src/complex/crealf.c | 37 + usr/src/libm/src/complex/creall.c | 37 + usr/src/libm/src/complex/csin.c | 60 + usr/src/libm/src/complex/csinf.c | 47 + usr/src/libm/src/complex/csinh.c | 136 + usr/src/libm/src/complex/csinhf.c | 101 + usr/src/libm/src/complex/csinhl.c | 92 + usr/src/libm/src/complex/csinl.c | 47 + usr/src/libm/src/complex/csqrt.c | 209 ++ usr/src/libm/src/complex/csqrtf.c | 92 + usr/src/libm/src/complex/csqrtl.c | 144 + usr/src/libm/src/complex/ctan.c | 60 + usr/src/libm/src/complex/ctanf.c | 47 + usr/src/libm/src/complex/ctanh.c | 175 ++ usr/src/libm/src/complex/ctanhf.c | 114 + usr/src/libm/src/complex/ctanhl.c | 116 + usr/src/libm/src/complex/ctanl.c | 47 + usr/src/libm/src/complex/k_atan2.c | 549 ++++ usr/src/libm/src/complex/k_atan2l.c | 808 +++++ usr/src/libm/src/complex/k_cexp.c | 179 ++ usr/src/libm/src/complex/k_cexpl.c | 282 ++ usr/src/libm/src/complex/k_clog_r.c | 411 +++ usr/src/libm/src/complex/k_clog_rl.c | 620 ++++ usr/src/libm/src/i386/amd64/__swapFLAGS.s | 161 + usr/src/libm/src/i386/amd64/acosl.s | 70 + usr/src/libm/src/i386/amd64/asinl.s | 57 + usr/src/libm/src/i386/amd64/atan2l.s | 40 + usr/src/libm/src/i386/amd64/atanl.s | 40 + usr/src/libm/src/i386/amd64/copysignl.s | 44 + usr/src/libm/src/i386/amd64/exp10l.s | 115 + usr/src/libm/src/i386/amd64/exp2l.s | 99 + usr/src/libm/src/i386/amd64/expl.s | 124 + usr/src/libm/src/i386/amd64/expm1l.s | 123 + usr/src/libm/src/i386/amd64/fabsl.s | 40 + usr/src/libm/src/i386/amd64/floorl.s | 80 + usr/src/libm/src/i386/amd64/fmod.s | 69 + usr/src/libm/src/i386/amd64/fmodf.s | 53 + usr/src/libm/src/i386/amd64/fmodl.s | 45 + usr/src/libm/src/i386/amd64/ieee_funcl.s | 121 + usr/src/libm/src/i386/amd64/ilogbl.s | 86 + usr/src/libm/src/i386/amd64/libm.m4 | 290 ++ usr/src/libm/src/i386/amd64/log10l.s | 40 + usr/src/libm/src/i386/amd64/log2l.s | 40 + usr/src/libm/src/i386/amd64/logl.s | 40 + usr/src/libm/src/i386/amd64/powl.s | 419 +++ usr/src/libm/src/i386/amd64/remainder.s | 79 + usr/src/libm/src/i386/amd64/remainderf.s | 53 + usr/src/libm/src/i386/amd64/remainderl.s | 45 + usr/src/libm/src/i386/amd64/remquol.s | 67 + usr/src/libm/src/i386/amd64/rintl.s | 40 + usr/src/libm/src/i386/amd64/rndintl.s | 146 + usr/src/libm/src/i386/amd64/scalbnl.s | 44 + usr/src/libm/src/i386/amd64/sqrtl.s | 39 + usr/src/libm/src/i386/common/__reduction.s | 89 + usr/src/libm/src/i386/common/acos.s | 87 + usr/src/libm/src/i386/common/acosf.s | 77 + usr/src/libm/src/i386/common/acosl.s | 74 + usr/src/libm/src/i386/common/asin.s | 72 + usr/src/libm/src/i386/common/asinf.s | 62 + usr/src/libm/src/i386/common/asinl.s | 59 + usr/src/libm/src/i386/common/atan.s | 40 + usr/src/libm/src/i386/common/atan2.s | 69 + usr/src/libm/src/i386/common/atan2f.s | 41 + usr/src/libm/src/i386/common/atan2l.s | 40 + usr/src/libm/src/i386/common/atanl.s | 40 + usr/src/libm/src/i386/common/ceil.s | 55 + usr/src/libm/src/i386/common/copysign.s | 50 + usr/src/libm/src/i386/common/copysignf.s | 47 + usr/src/libm/src/i386/common/copysignl.s | 53 + usr/src/libm/src/i386/common/cos.s | 58 + usr/src/libm/src/i386/common/exp.s | 155 + usr/src/libm/src/i386/common/exp10.s | 132 + usr/src/libm/src/i386/common/exp10f.s | 122 + usr/src/libm/src/i386/common/exp10l.s | 114 + usr/src/libm/src/i386/common/exp2.s | 97 + usr/src/libm/src/i386/common/exp2f.s | 87 + usr/src/libm/src/i386/common/exp2l.s | 100 + usr/src/libm/src/i386/common/expl.s | 123 + usr/src/libm/src/i386/common/expm1.s | 129 + usr/src/libm/src/i386/common/expm1f.s | 152 + usr/src/libm/src/i386/common/expm1l.s | 122 + usr/src/libm/src/i386/common/fabs.s | 40 + usr/src/libm/src/i386/common/fabsf.s | 41 + usr/src/libm/src/i386/common/fabsl.s | 41 + usr/src/libm/src/i386/common/finitef.s | 43 + usr/src/libm/src/i386/common/finitel.s | 55 + usr/src/libm/src/i386/common/floor.s | 55 + usr/src/libm/src/i386/common/floorl.s | 80 + usr/src/libm/src/i386/common/fmod.s | 65 + usr/src/libm/src/i386/common/fmodf.s | 45 + usr/src/libm/src/i386/common/fmodl.s | 45 + usr/src/libm/src/i386/common/hypot.s | 137 + usr/src/libm/src/i386/common/hypotf.s | 69 + usr/src/libm/src/i386/common/ieee_funcl.s | 121 + usr/src/libm/src/i386/common/ilogb.s | 85 + usr/src/libm/src/i386/common/ilogbf.s | 90 + usr/src/libm/src/i386/common/ilogbl.s | 86 + usr/src/libm/src/i386/common/isnan.s | 63 + usr/src/libm/src/i386/common/isnanf.s | 54 + usr/src/libm/src/i386/common/isnanl.s | 55 + usr/src/libm/src/i386/common/libm.m4 | 445 +++ usr/src/libm/src/i386/common/llrint.s | 45 + usr/src/libm/src/i386/common/llrintf.s | 45 + usr/src/libm/src/i386/common/llrintl.s | 45 + usr/src/libm/src/i386/common/log.s | 95 + usr/src/libm/src/i386/common/log10.s | 95 + usr/src/libm/src/i386/common/log10f.s | 41 + usr/src/libm/src/i386/common/log10l.s | 40 + usr/src/libm/src/i386/common/log2.s | 40 + usr/src/libm/src/i386/common/log2f.s | 40 + usr/src/libm/src/i386/common/log2l.s | 40 + usr/src/libm/src/i386/common/logl.s | 40 + usr/src/libm/src/i386/common/lrint.s | 44 + usr/src/libm/src/i386/common/lrintf.s | 44 + usr/src/libm/src/i386/common/lrintl.s | 44 + usr/src/libm/src/i386/common/lround.s | 96 + usr/src/libm/src/i386/common/lroundl.s | 96 + usr/src/libm/src/i386/common/nextafter.s | 133 + usr/src/libm/src/i386/common/nextafterf.s | 113 + usr/src/libm/src/i386/common/nextafterl.s | 185 ++ usr/src/libm/src/i386/common/nexttowardl.s | 185 ++ usr/src/libm/src/i386/common/pow.s | 472 +++ usr/src/libm/src/i386/common/powf.s | 442 +++ usr/src/libm/src/i386/common/powl.s | 439 +++ usr/src/libm/src/i386/common/remainder.s | 81 + usr/src/libm/src/i386/common/remainderf.s | 45 + usr/src/libm/src/i386/common/remainderl.s | 45 + usr/src/libm/src/i386/common/remquo.s | 69 + usr/src/libm/src/i386/common/remquof.s | 69 + usr/src/libm/src/i386/common/remquol.s | 69 + usr/src/libm/src/i386/common/rint.s | 45 + usr/src/libm/src/i386/common/rintf.s | 45 + usr/src/libm/src/i386/common/rintl.s | 40 + usr/src/libm/src/i386/common/rndintl.s | 149 + usr/src/libm/src/i386/common/round.s | 94 + usr/src/libm/src/i386/common/roundl.s | 94 + usr/src/libm/src/i386/common/scalbln.s | 41 + usr/src/libm/src/i386/common/scalblnf.s | 41 + usr/src/libm/src/i386/common/scalblnl.s | 42 + usr/src/libm/src/i386/common/scalbn.s | 41 + usr/src/libm/src/i386/common/scalbnf.s | 41 + usr/src/libm/src/i386/common/scalbnl.s | 42 + usr/src/libm/src/i386/common/sin.s | 58 + usr/src/libm/src/i386/common/sincos.s | 81 + usr/src/libm/src/i386/common/sqrtl.s | 39 + usr/src/libm/src/i386/common/tan.s | 51 + usr/src/libm/src/i386/common/trunc.s | 55 + usr/src/libm/src/i386/common/truncl.s | 55 + usr/src/libm/src/m9x/__fenv_amd64.il | 349 +++ usr/src/libm/src/m9x/__fenv_i386.il | 411 +++ usr/src/libm/src/m9x/__fenv_sparc.il | 40 + usr/src/libm/src/m9x/__fex_hdlr.c | 850 ++++++ usr/src/libm/src/m9x/__fex_i386.c | 1671 ++++++++++ usr/src/libm/src/m9x/__fex_sparc.c | 864 ++++++ usr/src/libm/src/m9x/__fex_sse.c | 1581 ++++++++++ usr/src/libm/src/m9x/__fex_sym.c | 306 ++ usr/src/libm/src/m9x/fdim.c | 55 + usr/src/libm/src/m9x/fdimf.c | 58 + usr/src/libm/src/m9x/fdiml.c | 46 + usr/src/libm/src/m9x/feexcept.c | 135 + usr/src/libm/src/m9x/fenv.c | 116 + usr/src/libm/src/m9x/fenv_synonyms.h | 100 + usr/src/libm/src/m9x/feprec.c | 56 + usr/src/libm/src/m9x/feround.c | 81 + usr/src/libm/src/m9x/fex_handler.c | 91 + usr/src/libm/src/m9x/fex_handler.h | 215 ++ usr/src/libm/src/m9x/fex_log.c | 398 +++ usr/src/libm/src/m9x/fma.c | 608 ++++ usr/src/libm/src/m9x/fma.h | 125 + usr/src/libm/src/m9x/fmaf.c | 241 ++ usr/src/libm/src/m9x/fmal.c | 1224 ++++++++ usr/src/libm/src/m9x/fmax.c | 79 + usr/src/libm/src/m9x/fmaxf.c | 143 + usr/src/libm/src/m9x/fmaxl.c | 78 + usr/src/libm/src/m9x/fmin.c | 80 + usr/src/libm/src/m9x/fminf.c | 102 + usr/src/libm/src/m9x/fminl.c | 78 + usr/src/libm/src/m9x/frexp.c | 102 + usr/src/libm/src/m9x/frexpf.c | 69 + usr/src/libm/src/m9x/frexpl.c | 126 + usr/src/libm/src/m9x/ldexp.c | 56 + usr/src/libm/src/m9x/ldexpf.c | 38 + usr/src/libm/src/m9x/ldexpl.c | 38 + usr/src/libm/src/m9x/llrint.c | 80 + usr/src/libm/src/m9x/llrintf.c | 76 + usr/src/libm/src/m9x/llrintl.c | 173 ++ usr/src/libm/src/m9x/llround.c | 84 + usr/src/libm/src/m9x/llroundf.c | 72 + usr/src/libm/src/m9x/llroundl.c | 165 + usr/src/libm/src/m9x/lrint.c | 80 + usr/src/libm/src/m9x/lrintf.c | 71 + usr/src/libm/src/m9x/lrintl.c | 154 + usr/src/libm/src/m9x/lround.c | 82 + usr/src/libm/src/m9x/lroundf.c | 66 + usr/src/libm/src/m9x/lroundl.c | 144 + usr/src/libm/src/m9x/modf.c | 92 + usr/src/libm/src/m9x/modff.c | 68 + usr/src/libm/src/m9x/modfl.c | 149 + usr/src/libm/src/m9x/nan.c | 61 + usr/src/libm/src/m9x/nanf.c | 44 + usr/src/libm/src/m9x/nanl.c | 57 + usr/src/libm/src/m9x/nearbyint.c | 222 ++ usr/src/libm/src/m9x/nearbyintf.c | 185 ++ usr/src/libm/src/m9x/nearbyintl.c | 183 ++ usr/src/libm/src/m9x/nexttoward.c | 222 ++ usr/src/libm/src/m9x/nexttowardf.c | 184 ++ usr/src/libm/src/m9x/nexttowardl.c | 118 + usr/src/libm/src/m9x/regset.h | 128 + usr/src/libm/src/m9x/remquo.c | 267 ++ usr/src/libm/src/m9x/remquof.c | 267 ++ usr/src/libm/src/m9x/remquol.c | 344 +++ usr/src/libm/src/m9x/round.c | 75 + usr/src/libm/src/m9x/roundf.c | 65 + usr/src/libm/src/m9x/roundl.c | 165 + usr/src/libm/src/m9x/scalbln.c | 108 + usr/src/libm/src/m9x/scalblnf.c | 93 + usr/src/libm/src/m9x/scalblnl.c | 81 + usr/src/libm/src/m9x/tgamma.c | 1703 +++++++++++ usr/src/libm/src/m9x/tgammaf.c | 545 ++++ usr/src/libm/src/m9x/tgammal.c | 1166 +++++++ usr/src/libm/src/m9x/trunc.c | 69 + usr/src/libm/src/m9x/truncf.c | 62 + usr/src/libm/src/m9x/truncl.c | 109 + usr/src/libm/src/mvec/__vTBL_atan1.c | 616 ++++ usr/src/libm/src/mvec/__vTBL_atan2.c | 353 +++ usr/src/libm/src/mvec/__vTBL_rsqrt.c | 168 ++ usr/src/libm/src/mvec/__vTBL_sincos.c | 333 ++ usr/src/libm/src/mvec/__vTBL_sincos2.c | 145 + usr/src/libm/src/mvec/__vTBL_sqrtf.c | 553 ++++ usr/src/libm/src/mvec/__vatan.c | 315 ++ usr/src/libm/src/mvec/__vatan2.c | 451 +++ usr/src/libm/src/mvec/__vatan2f.c | 475 +++ usr/src/libm/src/mvec/__vatanf.c | 405 +++ usr/src/libm/src/mvec/__vc_abs.c | 43 + usr/src/libm/src/mvec/__vc_exp.c | 53 + usr/src/libm/src/mvec/__vc_log.c | 48 + usr/src/libm/src/mvec/__vc_pow.c | 55 + usr/src/libm/src/mvec/__vcos.c | 1098 +++++++ usr/src/libm/src/mvec/__vcosbig.c | 172 ++ usr/src/libm/src/mvec/__vcosbig_ultra3.c | 652 ++++ usr/src/libm/src/mvec/__vcosbigf.c | 173 ++ usr/src/libm/src/mvec/__vcosf.c | 376 +++ usr/src/libm/src/mvec/__vexp.c | 589 ++++ usr/src/libm/src/mvec/__vexpf.c | 350 +++ usr/src/libm/src/mvec/__vhypot.c | 394 +++ usr/src/libm/src/mvec/__vhypotf.c | 207 ++ usr/src/libm/src/mvec/__vlog.c | 786 +++++ usr/src/libm/src/mvec/__vlogf.c | 261 ++ usr/src/libm/src/mvec/__vpow.c | 1390 +++++++++ usr/src/libm/src/mvec/__vpowf.c | 823 +++++ usr/src/libm/src/mvec/__vrem_pio2m.c | 308 ++ usr/src/libm/src/mvec/__vrhypot.c | 428 +++ usr/src/libm/src/mvec/__vrhypotf.c | 462 +++ usr/src/libm/src/mvec/__vrsqrt.c | 412 +++ usr/src/libm/src/mvec/__vrsqrtf.c | 500 +++ usr/src/libm/src/mvec/__vsin.c | 1106 +++++++ usr/src/libm/src/mvec/__vsinbig.c | 171 ++ usr/src/libm/src/mvec/__vsinbig_ultra3.c | 652 ++++ usr/src/libm/src/mvec/__vsinbigf.c | 172 ++ usr/src/libm/src/mvec/__vsincos.c | 1545 ++++++++++ usr/src/libm/src/mvec/__vsincosbig.c | 173 ++ usr/src/libm/src/mvec/__vsincosbigf.c | 170 ++ usr/src/libm/src/mvec/__vsincosf.c | 313 ++ usr/src/libm/src/mvec/__vsinf.c | 380 +++ usr/src/libm/src/mvec/__vsqrt.c | 49 + usr/src/libm/src/mvec/__vsqrtf.c | 49 + usr/src/libm/src/mvec/__vz_abs.c | 43 + usr/src/libm/src/mvec/__vz_exp.c | 53 + usr/src/libm/src/mvec/__vz_log.c | 48 + usr/src/libm/src/mvec/__vz_pow.c | 55 + usr/src/libm/src/mvec/amd64/__vsqrtf.S | 127 + usr/src/libm/src/mvec/vatan2_.c | 91 + usr/src/libm/src/mvec/vatan2f_.c | 91 + usr/src/libm/src/mvec/vatan_.c | 87 + usr/src/libm/src/mvec/vatanf_.c | 87 + usr/src/libm/src/mvec/vc_abs_.c | 87 + usr/src/libm/src/mvec/vc_exp_.c | 90 + usr/src/libm/src/mvec/vc_log_.c | 87 + usr/src/libm/src/mvec/vc_pow_.c | 94 + usr/src/libm/src/mvec/vcos_.c | 157 + usr/src/libm/src/mvec/vcosf_.c | 87 + usr/src/libm/src/mvec/vexp_.c | 87 + usr/src/libm/src/mvec/vexpf_.c | 87 + usr/src/libm/src/mvec/vhypot_.c | 91 + usr/src/libm/src/mvec/vhypotf_.c | 91 + usr/src/libm/src/mvec/vis/__vatan.S | 571 ++++ usr/src/libm/src/mvec/vis/__vatan2.S | 1077 +++++++ usr/src/libm/src/mvec/vis/__vatan2f.S | 3378 +++++++++++++++++++++ usr/src/libm/src/mvec/vis/__vatanf.S | 1891 ++++++++++++ usr/src/libm/src/mvec/vis/__vcos.S | 3078 +++++++++++++++++++ usr/src/libm/src/mvec/vis/__vcos_ultra3.S | 3424 +++++++++++++++++++++ usr/src/libm/src/mvec/vis/__vcosf.S | 2101 +++++++++++++ usr/src/libm/src/mvec/vis/__vexp.S | 1281 ++++++++ usr/src/libm/src/mvec/vis/__vexpf.S | 2113 +++++++++++++ usr/src/libm/src/mvec/vis/__vhypot.S | 1242 ++++++++ usr/src/libm/src/mvec/vis/__vhypotf.S | 1226 ++++++++ usr/src/libm/src/mvec/vis/__vlog.S | 670 +++++ usr/src/libm/src/mvec/vis/__vlog_ultra3.S | 2904 ++++++++++++++++++ usr/src/libm/src/mvec/vis/__vlogf.S | 1276 ++++++++ usr/src/libm/src/mvec/vis/__vpow.S | 4352 +++++++++++++++++++++++++++ usr/src/libm/src/mvec/vis/__vpowf.S | 3138 +++++++++++++++++++ usr/src/libm/src/mvec/vis/__vrhypot.S | 3878 ++++++++++++++++++++++++ usr/src/libm/src/mvec/vis/__vrhypotf.S | 1518 ++++++++++ usr/src/libm/src/mvec/vis/__vrsqrt.S | 2156 +++++++++++++ usr/src/libm/src/mvec/vis/__vrsqrtf.S | 1718 +++++++++++ usr/src/libm/src/mvec/vis/__vsin.S | 3002 ++++++++++++++++++ usr/src/libm/src/mvec/vis/__vsin_ultra3.S | 3431 +++++++++++++++++++++ usr/src/libm/src/mvec/vis/__vsincos.S | 958 ++++++ usr/src/libm/src/mvec/vis/__vsincosf.S | 905 ++++++ usr/src/libm/src/mvec/vis/__vsinf.S | 2093 +++++++++++++ usr/src/libm/src/mvec/vis/__vsqrt.S | 1843 ++++++++++++ usr/src/libm/src/mvec/vis/__vsqrtf.S | 58 + usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S | 993 ++++++ usr/src/libm/src/mvec/vlog_.c | 157 + usr/src/libm/src/mvec/vlogf_.c | 87 + usr/src/libm/src/mvec/vpow_.c | 91 + usr/src/libm/src/mvec/vpowf_.c | 91 + usr/src/libm/src/mvec/vrhypot_.c | 91 + usr/src/libm/src/mvec/vrhypotf_.c | 91 + usr/src/libm/src/mvec/vrsqrt_.c | 88 + usr/src/libm/src/mvec/vrsqrtf_.c | 88 + usr/src/libm/src/mvec/vsin_.c | 157 + usr/src/libm/src/mvec/vsincos_.c | 91 + usr/src/libm/src/mvec/vsincosf_.c | 91 + usr/src/libm/src/mvec/vsinf_.c | 87 + usr/src/libm/src/mvec/vsqrt_.c | 87 + usr/src/libm/src/mvec/vsqrtf_.c | 157 + usr/src/libm/src/mvec/vz_abs_.c | 87 + usr/src/libm/src/mvec/vz_exp_.c | 90 + usr/src/libm/src/mvec/vz_log_.c | 87 + usr/src/libm/src/mvec/vz_pow_.c | 94 + usr/src/libm/src/sparc/common/copysign.S | 43 + usr/src/libm/src/sparc/common/fabs.S | 47 + usr/src/libm/src/sparc/common/libm.m4 | 2635 ++++++++++++++++ usr/src/libm/src/sparc/common/nextafter.S | 123 + usr/src/libm/src/sparc/v9/libm.m4 | 1278 ++++++++ usr/src/libm/wos/Integration.log | 140 + usr/src/libm/wos/Makefile | 1124 +++++++ usr/src/libm/wos/llib-lm | 46 + usr/src/libm/wos/mapfiles/libmv1-common | 205 ++ usr/src/libm/wos/mapfiles/libmv2-i386 | 692 +++++ usr/src/libm/wos/mapfiles/libmv2-sparc | 688 +++++ usr/src/libm/wos/mapfiles/libmvec-common | 128 + usr/src/libm/wos/mapfiles/libmvec-ia1 | 27 + usr/src/libm/wos/mapfiles/libmvec-v8plusa | 56 + usr/src/libm/wos/mapfiles/libmvec-v8plusb | 36 + usr/src/libm/wos64/Makefile | 1064 +++++++ usr/src/libm/wos64/llib-lm | 46 + usr/src/libm/wos64/mapfiles/libmv1-common | 200 ++ usr/src/libm/wos64/mapfiles/libmv2-i386 | 687 +++++ usr/src/libm/wos64/mapfiles/libmv2-sparc | 683 +++++ usr/src/libm/wos64/mapfiles/libmvec-common | 128 + usr/src/libm/wos64/mapfiles/libmvec-v9a | 56 + usr/src/libm/wos64/mapfiles/libmvec-v9b | 36 + 659 files changed, 167194 insertions(+) create mode 100644 README create mode 100644 ReleaseNotes create mode 100644 usr/src/OPENSOLARIS.LICENSE create mode 100644 usr/src/harness/Makefile-os create mode 100644 usr/src/libm/inc/complex.h create mode 100644 usr/src/libm/inc/fenv.h create mode 100644 usr/src/libm/inc/floatingpoint.h create mode 100644 usr/src/libm/inc/iso/math_c99.h create mode 100644 usr/src/libm/inc/iso/math_iso.h create mode 100644 usr/src/libm/inc/math.h create mode 100644 usr/src/libm/inc/sys/ieeefp.h create mode 100644 usr/src/libm/inc/tgmath.h create mode 100644 usr/src/libm/src/C/_SVID_error.c create mode 100644 usr/src/libm/src/C/_TBL_atan.c create mode 100644 usr/src/libm/src/C/_TBL_exp2.c create mode 100644 usr/src/libm/src/C/_TBL_ipio2.c create mode 100644 usr/src/libm/src/C/_TBL_log.c create mode 100644 usr/src/libm/src/C/_TBL_log2.c create mode 100644 usr/src/libm/src/C/_TBL_sin.c create mode 100644 usr/src/libm/src/C/_TBL_tan.c create mode 100644 usr/src/libm/src/C/__cos.c create mode 100644 usr/src/libm/src/C/__lgamma.c create mode 100644 usr/src/libm/src/C/__libx_errno.c create mode 100644 usr/src/libm/src/C/__rem_pio2.c create mode 100644 usr/src/libm/src/C/__rem_pio2m.c create mode 100644 usr/src/libm/src/C/__sin.c create mode 100644 usr/src/libm/src/C/__sincos.c create mode 100644 usr/src/libm/src/C/__tan.c create mode 100644 usr/src/libm/src/C/__xpg6.c create mode 100644 usr/src/libm/src/C/_lib_version.c create mode 100644 usr/src/libm/src/C/acos.c create mode 100644 usr/src/libm/src/C/acosh.c create mode 100644 usr/src/libm/src/C/asin.c create mode 100644 usr/src/libm/src/C/asinh.c create mode 100644 usr/src/libm/src/C/atan.c create mode 100644 usr/src/libm/src/C/atan2.c create mode 100644 usr/src/libm/src/C/atan2pi.c create mode 100644 usr/src/libm/src/C/atanh.c create mode 100644 usr/src/libm/src/C/cbrt.c create mode 100644 usr/src/libm/src/C/ceil.c create mode 100644 usr/src/libm/src/C/copysign.c create mode 100644 usr/src/libm/src/C/cos.c create mode 100644 usr/src/libm/src/C/cosh.c create mode 100644 usr/src/libm/src/C/erf.c create mode 100644 usr/src/libm/src/C/exp.c create mode 100644 usr/src/libm/src/C/exp10.c create mode 100644 usr/src/libm/src/C/exp2.c create mode 100644 usr/src/libm/src/C/expm1.c create mode 100644 usr/src/libm/src/C/fabs.c create mode 100644 usr/src/libm/src/C/floor.c create mode 100644 usr/src/libm/src/C/fmod.c create mode 100644 usr/src/libm/src/C/gamma.c create mode 100644 usr/src/libm/src/C/gamma_r.c create mode 100644 usr/src/libm/src/C/hypot.c create mode 100644 usr/src/libm/src/C/ilogb.c create mode 100644 usr/src/libm/src/C/isnan.c create mode 100644 usr/src/libm/src/C/j0.c create mode 100644 usr/src/libm/src/C/j1.c create mode 100644 usr/src/libm/src/C/jn.c create mode 100644 usr/src/libm/src/C/lgamma.c create mode 100644 usr/src/libm/src/C/lgamma_r.c create mode 100644 usr/src/libm/src/C/libm.h create mode 100644 usr/src/libm/src/C/libm_macros.h create mode 100644 usr/src/libm/src/C/libm_protos.h create mode 100644 usr/src/libm/src/C/libm_synonyms.h create mode 100644 usr/src/libm/src/C/libm_thread.h create mode 100644 usr/src/libm/src/C/libmv1.c create mode 100644 usr/src/libm/src/C/log.c create mode 100644 usr/src/libm/src/C/log10.c create mode 100644 usr/src/libm/src/C/log1p.c create mode 100644 usr/src/libm/src/C/log2.c create mode 100644 usr/src/libm/src/C/logb.c create mode 100644 usr/src/libm/src/C/matherr.c create mode 100644 usr/src/libm/src/C/nextafter.c create mode 100644 usr/src/libm/src/C/pow.c create mode 100644 usr/src/libm/src/C/remainder.c create mode 100644 usr/src/libm/src/C/rint.c create mode 100644 usr/src/libm/src/C/scalb.c create mode 100644 usr/src/libm/src/C/scalbn.c create mode 100644 usr/src/libm/src/C/signgam.c create mode 100644 usr/src/libm/src/C/significand.c create mode 100644 usr/src/libm/src/C/sin.c create mode 100644 usr/src/libm/src/C/sincos.c create mode 100644 usr/src/libm/src/C/sincospi.c create mode 100644 usr/src/libm/src/C/sinh.c create mode 100644 usr/src/libm/src/C/sqrt.c create mode 100644 usr/src/libm/src/C/tan.c create mode 100644 usr/src/libm/src/C/tanh.c create mode 100644 usr/src/libm/src/C/xpg6.h create mode 100644 usr/src/libm/src/LD/_TBL_cosl.c create mode 100644 usr/src/libm/src/LD/_TBL_ipio2l.c create mode 100644 usr/src/libm/src/LD/_TBL_sinl.c create mode 100644 usr/src/libm/src/LD/_TBL_tanl.c create mode 100644 usr/src/libm/src/LD/__cosl.c create mode 100644 usr/src/libm/src/LD/__lgammal.c create mode 100644 usr/src/libm/src/LD/__poly_libmq.c create mode 100644 usr/src/libm/src/LD/__rem_pio2l.c create mode 100644 usr/src/libm/src/LD/__sincosl.c create mode 100644 usr/src/libm/src/LD/__sinl.c create mode 100644 usr/src/libm/src/LD/__tanl.c create mode 100644 usr/src/libm/src/LD/acoshl.c create mode 100644 usr/src/libm/src/LD/asinhl.c create mode 100644 usr/src/libm/src/LD/atan2pil.c create mode 100644 usr/src/libm/src/LD/atanhl.c create mode 100644 usr/src/libm/src/LD/cbrtl.c create mode 100644 usr/src/libm/src/LD/coshl.c create mode 100644 usr/src/libm/src/LD/cosl.c create mode 100644 usr/src/libm/src/LD/erfl.c create mode 100644 usr/src/libm/src/LD/finitel.c create mode 100644 usr/src/libm/src/LD/gammal.c create mode 100644 usr/src/libm/src/LD/gammal_r.c create mode 100644 usr/src/libm/src/LD/hypotl.c create mode 100644 usr/src/libm/src/LD/isnanl.c create mode 100644 usr/src/libm/src/LD/j0l.c create mode 100644 usr/src/libm/src/LD/j1l.c create mode 100644 usr/src/libm/src/LD/jnl.c create mode 100644 usr/src/libm/src/LD/lgammal.c create mode 100644 usr/src/libm/src/LD/lgammal_r.c create mode 100644 usr/src/libm/src/LD/log1pl.c create mode 100644 usr/src/libm/src/LD/logbl.c create mode 100644 usr/src/libm/src/LD/longdouble.h create mode 100644 usr/src/libm/src/LD/nextafterl.c create mode 100644 usr/src/libm/src/LD/scalbl.c create mode 100644 usr/src/libm/src/LD/signgaml.c create mode 100644 usr/src/libm/src/LD/significandl.c create mode 100644 usr/src/libm/src/LD/sincosl.c create mode 100644 usr/src/libm/src/LD/sincospil.c create mode 100644 usr/src/libm/src/LD/sinhl.c create mode 100644 usr/src/libm/src/LD/sinl.c create mode 100644 usr/src/libm/src/LD/sinpil.c create mode 100644 usr/src/libm/src/LD/tanhl.c create mode 100644 usr/src/libm/src/LD/tanl.c create mode 100644 usr/src/libm/src/Q/_TBL_atanl.c create mode 100644 usr/src/libm/src/Q/_TBL_cosl.c create mode 100644 usr/src/libm/src/Q/_TBL_expl.c create mode 100644 usr/src/libm/src/Q/_TBL_expm1l.c create mode 100644 usr/src/libm/src/Q/_TBL_ipio2l.c create mode 100644 usr/src/libm/src/Q/_TBL_logl.c create mode 100644 usr/src/libm/src/Q/_TBL_sinl.c create mode 100644 usr/src/libm/src/Q/_TBL_tanl.c create mode 100644 usr/src/libm/src/Q/__cosl.c create mode 100644 usr/src/libm/src/Q/__lgammal.c create mode 100644 usr/src/libm/src/Q/__poly_libmq.c create mode 100644 usr/src/libm/src/Q/__rem_pio2l.c create mode 100644 usr/src/libm/src/Q/__sincosl.c create mode 100644 usr/src/libm/src/Q/__sinl.c create mode 100644 usr/src/libm/src/Q/__tanl.c create mode 100644 usr/src/libm/src/Q/acoshl.c create mode 100644 usr/src/libm/src/Q/acosl.c create mode 100644 usr/src/libm/src/Q/asinhl.c create mode 100644 usr/src/libm/src/Q/asinl.c create mode 100644 usr/src/libm/src/Q/atan2l.c create mode 100644 usr/src/libm/src/Q/atan2pil.c create mode 100644 usr/src/libm/src/Q/atanhl.c create mode 100644 usr/src/libm/src/Q/atanl.c create mode 100644 usr/src/libm/src/Q/cbrtl.c create mode 100644 usr/src/libm/src/Q/copysignl.c create mode 100644 usr/src/libm/src/Q/coshl.c create mode 100644 usr/src/libm/src/Q/cosl.c create mode 100644 usr/src/libm/src/Q/erfl.c create mode 100644 usr/src/libm/src/Q/exp10l.c create mode 100644 usr/src/libm/src/Q/exp2l.c create mode 100644 usr/src/libm/src/Q/expl.c create mode 100644 usr/src/libm/src/Q/expm1l.c create mode 100644 usr/src/libm/src/Q/fabsl.c create mode 100644 usr/src/libm/src/Q/finitel.c create mode 100644 usr/src/libm/src/Q/floorl.c create mode 100644 usr/src/libm/src/Q/fmodl.c create mode 100644 usr/src/libm/src/Q/gammal.c create mode 100644 usr/src/libm/src/Q/gammal_r.c create mode 100644 usr/src/libm/src/Q/hypotl.c create mode 100644 usr/src/libm/src/Q/ieee_funcl.c create mode 100644 usr/src/libm/src/Q/ilogbl.c create mode 100644 usr/src/libm/src/Q/isnanl.c create mode 100644 usr/src/libm/src/Q/j0l.c create mode 100644 usr/src/libm/src/Q/j1l.c create mode 100644 usr/src/libm/src/Q/jnl.c create mode 100644 usr/src/libm/src/Q/lgammal.c create mode 100644 usr/src/libm/src/Q/lgammal_r.c create mode 100644 usr/src/libm/src/Q/log10l.c create mode 100644 usr/src/libm/src/Q/log1pl.c create mode 100644 usr/src/libm/src/Q/log2l.c create mode 100644 usr/src/libm/src/Q/logbl.c create mode 100644 usr/src/libm/src/Q/logl.c create mode 100644 usr/src/libm/src/Q/longdouble.h create mode 100644 usr/src/libm/src/Q/nextafterl.c create mode 100644 usr/src/libm/src/Q/powl.c create mode 100644 usr/src/libm/src/Q/remainderl.c create mode 100644 usr/src/libm/src/Q/rintl.c create mode 100644 usr/src/libm/src/Q/rndintl.c create mode 100644 usr/src/libm/src/Q/scalbl.c create mode 100644 usr/src/libm/src/Q/scalbnl.c create mode 100644 usr/src/libm/src/Q/signgaml.c create mode 100644 usr/src/libm/src/Q/significandl.c create mode 100644 usr/src/libm/src/Q/sincosl.c create mode 100644 usr/src/libm/src/Q/sincospil.c create mode 100644 usr/src/libm/src/Q/sinhl.c create mode 100644 usr/src/libm/src/Q/sinl.c create mode 100644 usr/src/libm/src/Q/sinpil.c create mode 100644 usr/src/libm/src/Q/sqrtl.c create mode 100644 usr/src/libm/src/Q/tanhl.c create mode 100644 usr/src/libm/src/Q/tanl.c create mode 100644 usr/src/libm/src/R/_TBL_r_atan_.c create mode 100644 usr/src/libm/src/R/__cosf.c create mode 100644 usr/src/libm/src/R/__sincosf.c create mode 100644 usr/src/libm/src/R/__sinf.c create mode 100644 usr/src/libm/src/R/__tanf.c create mode 100644 usr/src/libm/src/R/acosf.c create mode 100644 usr/src/libm/src/R/acoshf.c create mode 100644 usr/src/libm/src/R/asinf.c create mode 100644 usr/src/libm/src/R/asinhf.c create mode 100644 usr/src/libm/src/R/atan2f.c create mode 100644 usr/src/libm/src/R/atan2pif.c create mode 100644 usr/src/libm/src/R/atanf.c create mode 100644 usr/src/libm/src/R/atanhf.c create mode 100644 usr/src/libm/src/R/besself.c create mode 100644 usr/src/libm/src/R/cbrtf.c create mode 100644 usr/src/libm/src/R/copysignf.c create mode 100644 usr/src/libm/src/R/cosf.c create mode 100644 usr/src/libm/src/R/coshf.c create mode 100644 usr/src/libm/src/R/erff.c create mode 100644 usr/src/libm/src/R/exp10f.c create mode 100644 usr/src/libm/src/R/exp2f.c create mode 100644 usr/src/libm/src/R/expf.c create mode 100644 usr/src/libm/src/R/expm1f.c create mode 100644 usr/src/libm/src/R/fabsf.c create mode 100644 usr/src/libm/src/R/floorf.c create mode 100644 usr/src/libm/src/R/fmodf.c create mode 100644 usr/src/libm/src/R/gammaf.c create mode 100644 usr/src/libm/src/R/gammaf_r.c create mode 100644 usr/src/libm/src/R/hypotf.c create mode 100644 usr/src/libm/src/R/ilogbf.c create mode 100644 usr/src/libm/src/R/isnanf.c create mode 100644 usr/src/libm/src/R/lgammaf.c create mode 100644 usr/src/libm/src/R/lgammaf_r.c create mode 100644 usr/src/libm/src/R/log10f.c create mode 100644 usr/src/libm/src/R/log1pf.c create mode 100644 usr/src/libm/src/R/log2f.c create mode 100644 usr/src/libm/src/R/logbf.c create mode 100644 usr/src/libm/src/R/logf.c create mode 100644 usr/src/libm/src/R/nextafterf.c create mode 100644 usr/src/libm/src/R/powf.c create mode 100644 usr/src/libm/src/R/remainderf.c create mode 100644 usr/src/libm/src/R/rintf.c create mode 100644 usr/src/libm/src/R/scalbf.c create mode 100644 usr/src/libm/src/R/scalbnf.c create mode 100644 usr/src/libm/src/R/signgamf.c create mode 100644 usr/src/libm/src/R/significandf.c create mode 100644 usr/src/libm/src/R/sincosf.c create mode 100644 usr/src/libm/src/R/sincospif.c create mode 100644 usr/src/libm/src/R/sinf.c create mode 100644 usr/src/libm/src/R/sinhf.c create mode 100644 usr/src/libm/src/R/sqrtf.c create mode 100644 usr/src/libm/src/R/tanf.c create mode 100644 usr/src/libm/src/R/tanhf.c create mode 100644 usr/src/libm/src/complex/cabs.c create mode 100644 usr/src/libm/src/complex/cabsf.c create mode 100644 usr/src/libm/src/complex/cabsl.c create mode 100644 usr/src/libm/src/complex/cacos.c create mode 100644 usr/src/libm/src/complex/cacosf.c create mode 100644 usr/src/libm/src/complex/cacosh.c create mode 100644 usr/src/libm/src/complex/cacoshf.c create mode 100644 usr/src/libm/src/complex/cacoshl.c create mode 100644 usr/src/libm/src/complex/cacosl.c create mode 100644 usr/src/libm/src/complex/carg.c create mode 100644 usr/src/libm/src/complex/cargf.c create mode 100644 usr/src/libm/src/complex/cargl.c create mode 100644 usr/src/libm/src/complex/casin.c create mode 100644 usr/src/libm/src/complex/casinf.c create mode 100644 usr/src/libm/src/complex/casinh.c create mode 100644 usr/src/libm/src/complex/casinhf.c create mode 100644 usr/src/libm/src/complex/casinhl.c create mode 100644 usr/src/libm/src/complex/casinl.c create mode 100644 usr/src/libm/src/complex/catan.c create mode 100644 usr/src/libm/src/complex/catanf.c create mode 100644 usr/src/libm/src/complex/catanh.c create mode 100644 usr/src/libm/src/complex/catanhf.c create mode 100644 usr/src/libm/src/complex/catanhl.c create mode 100644 usr/src/libm/src/complex/catanl.c create mode 100644 usr/src/libm/src/complex/ccos.c create mode 100644 usr/src/libm/src/complex/ccosf.c create mode 100644 usr/src/libm/src/complex/ccosh.c create mode 100644 usr/src/libm/src/complex/ccoshf.c create mode 100644 usr/src/libm/src/complex/ccoshl.c create mode 100644 usr/src/libm/src/complex/ccosl.c create mode 100644 usr/src/libm/src/complex/cexp.c create mode 100644 usr/src/libm/src/complex/cexpf.c create mode 100644 usr/src/libm/src/complex/cexpl.c create mode 100644 usr/src/libm/src/complex/cimag.c create mode 100644 usr/src/libm/src/complex/cimagf.c create mode 100644 usr/src/libm/src/complex/cimagl.c create mode 100644 usr/src/libm/src/complex/clog.c create mode 100644 usr/src/libm/src/complex/clogf.c create mode 100644 usr/src/libm/src/complex/clogl.c create mode 100644 usr/src/libm/src/complex/complex_wrapper.h create mode 100644 usr/src/libm/src/complex/conj.c create mode 100644 usr/src/libm/src/complex/conjf.c create mode 100644 usr/src/libm/src/complex/conjl.c create mode 100644 usr/src/libm/src/complex/cpow.c create mode 100644 usr/src/libm/src/complex/cpowf.c create mode 100644 usr/src/libm/src/complex/cpowl.c create mode 100644 usr/src/libm/src/complex/cproj.c create mode 100644 usr/src/libm/src/complex/cprojf.c create mode 100644 usr/src/libm/src/complex/cprojl.c create mode 100644 usr/src/libm/src/complex/creal.c create mode 100644 usr/src/libm/src/complex/crealf.c create mode 100644 usr/src/libm/src/complex/creall.c create mode 100644 usr/src/libm/src/complex/csin.c create mode 100644 usr/src/libm/src/complex/csinf.c create mode 100644 usr/src/libm/src/complex/csinh.c create mode 100644 usr/src/libm/src/complex/csinhf.c create mode 100644 usr/src/libm/src/complex/csinhl.c create mode 100644 usr/src/libm/src/complex/csinl.c create mode 100644 usr/src/libm/src/complex/csqrt.c create mode 100644 usr/src/libm/src/complex/csqrtf.c create mode 100644 usr/src/libm/src/complex/csqrtl.c create mode 100644 usr/src/libm/src/complex/ctan.c create mode 100644 usr/src/libm/src/complex/ctanf.c create mode 100644 usr/src/libm/src/complex/ctanh.c create mode 100644 usr/src/libm/src/complex/ctanhf.c create mode 100644 usr/src/libm/src/complex/ctanhl.c create mode 100644 usr/src/libm/src/complex/ctanl.c create mode 100644 usr/src/libm/src/complex/k_atan2.c create mode 100644 usr/src/libm/src/complex/k_atan2l.c create mode 100644 usr/src/libm/src/complex/k_cexp.c create mode 100644 usr/src/libm/src/complex/k_cexpl.c create mode 100644 usr/src/libm/src/complex/k_clog_r.c create mode 100644 usr/src/libm/src/complex/k_clog_rl.c create mode 100644 usr/src/libm/src/i386/amd64/__swapFLAGS.s create mode 100644 usr/src/libm/src/i386/amd64/acosl.s create mode 100644 usr/src/libm/src/i386/amd64/asinl.s create mode 100644 usr/src/libm/src/i386/amd64/atan2l.s create mode 100644 usr/src/libm/src/i386/amd64/atanl.s create mode 100644 usr/src/libm/src/i386/amd64/copysignl.s create mode 100644 usr/src/libm/src/i386/amd64/exp10l.s create mode 100644 usr/src/libm/src/i386/amd64/exp2l.s create mode 100644 usr/src/libm/src/i386/amd64/expl.s create mode 100644 usr/src/libm/src/i386/amd64/expm1l.s create mode 100644 usr/src/libm/src/i386/amd64/fabsl.s create mode 100644 usr/src/libm/src/i386/amd64/floorl.s create mode 100644 usr/src/libm/src/i386/amd64/fmod.s create mode 100644 usr/src/libm/src/i386/amd64/fmodf.s create mode 100644 usr/src/libm/src/i386/amd64/fmodl.s create mode 100644 usr/src/libm/src/i386/amd64/ieee_funcl.s create mode 100644 usr/src/libm/src/i386/amd64/ilogbl.s create mode 100644 usr/src/libm/src/i386/amd64/libm.m4 create mode 100644 usr/src/libm/src/i386/amd64/log10l.s create mode 100644 usr/src/libm/src/i386/amd64/log2l.s create mode 100644 usr/src/libm/src/i386/amd64/logl.s create mode 100644 usr/src/libm/src/i386/amd64/powl.s create mode 100644 usr/src/libm/src/i386/amd64/remainder.s create mode 100644 usr/src/libm/src/i386/amd64/remainderf.s create mode 100644 usr/src/libm/src/i386/amd64/remainderl.s create mode 100644 usr/src/libm/src/i386/amd64/remquol.s create mode 100644 usr/src/libm/src/i386/amd64/rintl.s create mode 100644 usr/src/libm/src/i386/amd64/rndintl.s create mode 100644 usr/src/libm/src/i386/amd64/scalbnl.s create mode 100644 usr/src/libm/src/i386/amd64/sqrtl.s create mode 100644 usr/src/libm/src/i386/common/__reduction.s create mode 100644 usr/src/libm/src/i386/common/acos.s create mode 100644 usr/src/libm/src/i386/common/acosf.s create mode 100644 usr/src/libm/src/i386/common/acosl.s create mode 100644 usr/src/libm/src/i386/common/asin.s create mode 100644 usr/src/libm/src/i386/common/asinf.s create mode 100644 usr/src/libm/src/i386/common/asinl.s create mode 100644 usr/src/libm/src/i386/common/atan.s create mode 100644 usr/src/libm/src/i386/common/atan2.s create mode 100644 usr/src/libm/src/i386/common/atan2f.s create mode 100644 usr/src/libm/src/i386/common/atan2l.s create mode 100644 usr/src/libm/src/i386/common/atanl.s create mode 100644 usr/src/libm/src/i386/common/ceil.s create mode 100644 usr/src/libm/src/i386/common/copysign.s create mode 100644 usr/src/libm/src/i386/common/copysignf.s create mode 100644 usr/src/libm/src/i386/common/copysignl.s create mode 100644 usr/src/libm/src/i386/common/cos.s create mode 100644 usr/src/libm/src/i386/common/exp.s create mode 100644 usr/src/libm/src/i386/common/exp10.s create mode 100644 usr/src/libm/src/i386/common/exp10f.s create mode 100644 usr/src/libm/src/i386/common/exp10l.s create mode 100644 usr/src/libm/src/i386/common/exp2.s create mode 100644 usr/src/libm/src/i386/common/exp2f.s create mode 100644 usr/src/libm/src/i386/common/exp2l.s create mode 100644 usr/src/libm/src/i386/common/expl.s create mode 100644 usr/src/libm/src/i386/common/expm1.s create mode 100644 usr/src/libm/src/i386/common/expm1f.s create mode 100644 usr/src/libm/src/i386/common/expm1l.s create mode 100644 usr/src/libm/src/i386/common/fabs.s create mode 100644 usr/src/libm/src/i386/common/fabsf.s create mode 100644 usr/src/libm/src/i386/common/fabsl.s create mode 100644 usr/src/libm/src/i386/common/finitef.s create mode 100644 usr/src/libm/src/i386/common/finitel.s create mode 100644 usr/src/libm/src/i386/common/floor.s create mode 100644 usr/src/libm/src/i386/common/floorl.s create mode 100644 usr/src/libm/src/i386/common/fmod.s create mode 100644 usr/src/libm/src/i386/common/fmodf.s create mode 100644 usr/src/libm/src/i386/common/fmodl.s create mode 100644 usr/src/libm/src/i386/common/hypot.s create mode 100644 usr/src/libm/src/i386/common/hypotf.s create mode 100644 usr/src/libm/src/i386/common/ieee_funcl.s create mode 100644 usr/src/libm/src/i386/common/ilogb.s create mode 100644 usr/src/libm/src/i386/common/ilogbf.s create mode 100644 usr/src/libm/src/i386/common/ilogbl.s create mode 100644 usr/src/libm/src/i386/common/isnan.s create mode 100644 usr/src/libm/src/i386/common/isnanf.s create mode 100644 usr/src/libm/src/i386/common/isnanl.s create mode 100644 usr/src/libm/src/i386/common/libm.m4 create mode 100644 usr/src/libm/src/i386/common/llrint.s create mode 100644 usr/src/libm/src/i386/common/llrintf.s create mode 100644 usr/src/libm/src/i386/common/llrintl.s create mode 100644 usr/src/libm/src/i386/common/log.s create mode 100644 usr/src/libm/src/i386/common/log10.s create mode 100644 usr/src/libm/src/i386/common/log10f.s create mode 100644 usr/src/libm/src/i386/common/log10l.s create mode 100644 usr/src/libm/src/i386/common/log2.s create mode 100644 usr/src/libm/src/i386/common/log2f.s create mode 100644 usr/src/libm/src/i386/common/log2l.s create mode 100644 usr/src/libm/src/i386/common/logl.s create mode 100644 usr/src/libm/src/i386/common/lrint.s create mode 100644 usr/src/libm/src/i386/common/lrintf.s create mode 100644 usr/src/libm/src/i386/common/lrintl.s create mode 100644 usr/src/libm/src/i386/common/lround.s create mode 100644 usr/src/libm/src/i386/common/lroundl.s create mode 100644 usr/src/libm/src/i386/common/nextafter.s create mode 100644 usr/src/libm/src/i386/common/nextafterf.s create mode 100644 usr/src/libm/src/i386/common/nextafterl.s create mode 100644 usr/src/libm/src/i386/common/nexttowardl.s create mode 100644 usr/src/libm/src/i386/common/pow.s create mode 100644 usr/src/libm/src/i386/common/powf.s create mode 100644 usr/src/libm/src/i386/common/powl.s create mode 100644 usr/src/libm/src/i386/common/remainder.s create mode 100644 usr/src/libm/src/i386/common/remainderf.s create mode 100644 usr/src/libm/src/i386/common/remainderl.s create mode 100644 usr/src/libm/src/i386/common/remquo.s create mode 100644 usr/src/libm/src/i386/common/remquof.s create mode 100644 usr/src/libm/src/i386/common/remquol.s create mode 100644 usr/src/libm/src/i386/common/rint.s create mode 100644 usr/src/libm/src/i386/common/rintf.s create mode 100644 usr/src/libm/src/i386/common/rintl.s create mode 100644 usr/src/libm/src/i386/common/rndintl.s create mode 100644 usr/src/libm/src/i386/common/round.s create mode 100644 usr/src/libm/src/i386/common/roundl.s create mode 100644 usr/src/libm/src/i386/common/scalbln.s create mode 100644 usr/src/libm/src/i386/common/scalblnf.s create mode 100644 usr/src/libm/src/i386/common/scalblnl.s create mode 100644 usr/src/libm/src/i386/common/scalbn.s create mode 100644 usr/src/libm/src/i386/common/scalbnf.s create mode 100644 usr/src/libm/src/i386/common/scalbnl.s create mode 100644 usr/src/libm/src/i386/common/sin.s create mode 100644 usr/src/libm/src/i386/common/sincos.s create mode 100644 usr/src/libm/src/i386/common/sqrtl.s create mode 100644 usr/src/libm/src/i386/common/tan.s create mode 100644 usr/src/libm/src/i386/common/trunc.s create mode 100644 usr/src/libm/src/i386/common/truncl.s create mode 100644 usr/src/libm/src/m9x/__fenv_amd64.il create mode 100644 usr/src/libm/src/m9x/__fenv_i386.il create mode 100644 usr/src/libm/src/m9x/__fenv_sparc.il create mode 100644 usr/src/libm/src/m9x/__fex_hdlr.c create mode 100644 usr/src/libm/src/m9x/__fex_i386.c create mode 100644 usr/src/libm/src/m9x/__fex_sparc.c create mode 100644 usr/src/libm/src/m9x/__fex_sse.c create mode 100644 usr/src/libm/src/m9x/__fex_sym.c create mode 100644 usr/src/libm/src/m9x/fdim.c create mode 100644 usr/src/libm/src/m9x/fdimf.c create mode 100644 usr/src/libm/src/m9x/fdiml.c create mode 100644 usr/src/libm/src/m9x/feexcept.c create mode 100644 usr/src/libm/src/m9x/fenv.c create mode 100644 usr/src/libm/src/m9x/fenv_synonyms.h create mode 100644 usr/src/libm/src/m9x/feprec.c create mode 100644 usr/src/libm/src/m9x/feround.c create mode 100644 usr/src/libm/src/m9x/fex_handler.c create mode 100644 usr/src/libm/src/m9x/fex_handler.h create mode 100644 usr/src/libm/src/m9x/fex_log.c create mode 100644 usr/src/libm/src/m9x/fma.c create mode 100644 usr/src/libm/src/m9x/fma.h create mode 100644 usr/src/libm/src/m9x/fmaf.c create mode 100644 usr/src/libm/src/m9x/fmal.c create mode 100644 usr/src/libm/src/m9x/fmax.c create mode 100644 usr/src/libm/src/m9x/fmaxf.c create mode 100644 usr/src/libm/src/m9x/fmaxl.c create mode 100644 usr/src/libm/src/m9x/fmin.c create mode 100644 usr/src/libm/src/m9x/fminf.c create mode 100644 usr/src/libm/src/m9x/fminl.c create mode 100644 usr/src/libm/src/m9x/frexp.c create mode 100644 usr/src/libm/src/m9x/frexpf.c create mode 100644 usr/src/libm/src/m9x/frexpl.c create mode 100644 usr/src/libm/src/m9x/ldexp.c create mode 100644 usr/src/libm/src/m9x/ldexpf.c create mode 100644 usr/src/libm/src/m9x/ldexpl.c create mode 100644 usr/src/libm/src/m9x/llrint.c create mode 100644 usr/src/libm/src/m9x/llrintf.c create mode 100644 usr/src/libm/src/m9x/llrintl.c create mode 100644 usr/src/libm/src/m9x/llround.c create mode 100644 usr/src/libm/src/m9x/llroundf.c create mode 100644 usr/src/libm/src/m9x/llroundl.c create mode 100644 usr/src/libm/src/m9x/lrint.c create mode 100644 usr/src/libm/src/m9x/lrintf.c create mode 100644 usr/src/libm/src/m9x/lrintl.c create mode 100644 usr/src/libm/src/m9x/lround.c create mode 100644 usr/src/libm/src/m9x/lroundf.c create mode 100644 usr/src/libm/src/m9x/lroundl.c create mode 100644 usr/src/libm/src/m9x/modf.c create mode 100644 usr/src/libm/src/m9x/modff.c create mode 100644 usr/src/libm/src/m9x/modfl.c create mode 100644 usr/src/libm/src/m9x/nan.c create mode 100644 usr/src/libm/src/m9x/nanf.c create mode 100644 usr/src/libm/src/m9x/nanl.c create mode 100644 usr/src/libm/src/m9x/nearbyint.c create mode 100644 usr/src/libm/src/m9x/nearbyintf.c create mode 100644 usr/src/libm/src/m9x/nearbyintl.c create mode 100644 usr/src/libm/src/m9x/nexttoward.c create mode 100644 usr/src/libm/src/m9x/nexttowardf.c create mode 100644 usr/src/libm/src/m9x/nexttowardl.c create mode 100644 usr/src/libm/src/m9x/regset.h create mode 100644 usr/src/libm/src/m9x/remquo.c create mode 100644 usr/src/libm/src/m9x/remquof.c create mode 100644 usr/src/libm/src/m9x/remquol.c create mode 100644 usr/src/libm/src/m9x/round.c create mode 100644 usr/src/libm/src/m9x/roundf.c create mode 100644 usr/src/libm/src/m9x/roundl.c create mode 100644 usr/src/libm/src/m9x/scalbln.c create mode 100644 usr/src/libm/src/m9x/scalblnf.c create mode 100644 usr/src/libm/src/m9x/scalblnl.c create mode 100644 usr/src/libm/src/m9x/tgamma.c create mode 100644 usr/src/libm/src/m9x/tgammaf.c create mode 100644 usr/src/libm/src/m9x/tgammal.c create mode 100644 usr/src/libm/src/m9x/trunc.c create mode 100644 usr/src/libm/src/m9x/truncf.c create mode 100644 usr/src/libm/src/m9x/truncl.c create mode 100644 usr/src/libm/src/mvec/__vTBL_atan1.c create mode 100644 usr/src/libm/src/mvec/__vTBL_atan2.c create mode 100644 usr/src/libm/src/mvec/__vTBL_rsqrt.c create mode 100644 usr/src/libm/src/mvec/__vTBL_sincos.c create mode 100644 usr/src/libm/src/mvec/__vTBL_sincos2.c create mode 100644 usr/src/libm/src/mvec/__vTBL_sqrtf.c create mode 100644 usr/src/libm/src/mvec/__vatan.c create mode 100644 usr/src/libm/src/mvec/__vatan2.c create mode 100644 usr/src/libm/src/mvec/__vatan2f.c create mode 100644 usr/src/libm/src/mvec/__vatanf.c create mode 100644 usr/src/libm/src/mvec/__vc_abs.c create mode 100644 usr/src/libm/src/mvec/__vc_exp.c create mode 100644 usr/src/libm/src/mvec/__vc_log.c create mode 100644 usr/src/libm/src/mvec/__vc_pow.c create mode 100644 usr/src/libm/src/mvec/__vcos.c create mode 100644 usr/src/libm/src/mvec/__vcosbig.c create mode 100644 usr/src/libm/src/mvec/__vcosbig_ultra3.c create mode 100644 usr/src/libm/src/mvec/__vcosbigf.c create mode 100644 usr/src/libm/src/mvec/__vcosf.c create mode 100644 usr/src/libm/src/mvec/__vexp.c create mode 100644 usr/src/libm/src/mvec/__vexpf.c create mode 100644 usr/src/libm/src/mvec/__vhypot.c create mode 100644 usr/src/libm/src/mvec/__vhypotf.c create mode 100644 usr/src/libm/src/mvec/__vlog.c create mode 100644 usr/src/libm/src/mvec/__vlogf.c create mode 100644 usr/src/libm/src/mvec/__vpow.c create mode 100644 usr/src/libm/src/mvec/__vpowf.c create mode 100644 usr/src/libm/src/mvec/__vrem_pio2m.c create mode 100644 usr/src/libm/src/mvec/__vrhypot.c create mode 100644 usr/src/libm/src/mvec/__vrhypotf.c create mode 100644 usr/src/libm/src/mvec/__vrsqrt.c create mode 100644 usr/src/libm/src/mvec/__vrsqrtf.c create mode 100644 usr/src/libm/src/mvec/__vsin.c create mode 100644 usr/src/libm/src/mvec/__vsinbig.c create mode 100644 usr/src/libm/src/mvec/__vsinbig_ultra3.c create mode 100644 usr/src/libm/src/mvec/__vsinbigf.c create mode 100644 usr/src/libm/src/mvec/__vsincos.c create mode 100644 usr/src/libm/src/mvec/__vsincosbig.c create mode 100644 usr/src/libm/src/mvec/__vsincosbigf.c create mode 100644 usr/src/libm/src/mvec/__vsincosf.c create mode 100644 usr/src/libm/src/mvec/__vsinf.c create mode 100644 usr/src/libm/src/mvec/__vsqrt.c create mode 100644 usr/src/libm/src/mvec/__vsqrtf.c create mode 100644 usr/src/libm/src/mvec/__vz_abs.c create mode 100644 usr/src/libm/src/mvec/__vz_exp.c create mode 100644 usr/src/libm/src/mvec/__vz_log.c create mode 100644 usr/src/libm/src/mvec/__vz_pow.c create mode 100644 usr/src/libm/src/mvec/amd64/__vsqrtf.S create mode 100644 usr/src/libm/src/mvec/vatan2_.c create mode 100644 usr/src/libm/src/mvec/vatan2f_.c create mode 100644 usr/src/libm/src/mvec/vatan_.c create mode 100644 usr/src/libm/src/mvec/vatanf_.c create mode 100644 usr/src/libm/src/mvec/vc_abs_.c create mode 100644 usr/src/libm/src/mvec/vc_exp_.c create mode 100644 usr/src/libm/src/mvec/vc_log_.c create mode 100644 usr/src/libm/src/mvec/vc_pow_.c create mode 100644 usr/src/libm/src/mvec/vcos_.c create mode 100644 usr/src/libm/src/mvec/vcosf_.c create mode 100644 usr/src/libm/src/mvec/vexp_.c create mode 100644 usr/src/libm/src/mvec/vexpf_.c create mode 100644 usr/src/libm/src/mvec/vhypot_.c create mode 100644 usr/src/libm/src/mvec/vhypotf_.c create mode 100644 usr/src/libm/src/mvec/vis/__vatan.S create mode 100644 usr/src/libm/src/mvec/vis/__vatan2.S create mode 100644 usr/src/libm/src/mvec/vis/__vatan2f.S create mode 100644 usr/src/libm/src/mvec/vis/__vatanf.S create mode 100644 usr/src/libm/src/mvec/vis/__vcos.S create mode 100644 usr/src/libm/src/mvec/vis/__vcos_ultra3.S create mode 100644 usr/src/libm/src/mvec/vis/__vcosf.S create mode 100644 usr/src/libm/src/mvec/vis/__vexp.S create mode 100644 usr/src/libm/src/mvec/vis/__vexpf.S create mode 100644 usr/src/libm/src/mvec/vis/__vhypot.S create mode 100644 usr/src/libm/src/mvec/vis/__vhypotf.S create mode 100644 usr/src/libm/src/mvec/vis/__vlog.S create mode 100644 usr/src/libm/src/mvec/vis/__vlog_ultra3.S create mode 100644 usr/src/libm/src/mvec/vis/__vlogf.S create mode 100644 usr/src/libm/src/mvec/vis/__vpow.S create mode 100644 usr/src/libm/src/mvec/vis/__vpowf.S create mode 100644 usr/src/libm/src/mvec/vis/__vrhypot.S create mode 100644 usr/src/libm/src/mvec/vis/__vrhypotf.S create mode 100644 usr/src/libm/src/mvec/vis/__vrsqrt.S create mode 100644 usr/src/libm/src/mvec/vis/__vrsqrtf.S create mode 100644 usr/src/libm/src/mvec/vis/__vsin.S create mode 100644 usr/src/libm/src/mvec/vis/__vsin_ultra3.S create mode 100644 usr/src/libm/src/mvec/vis/__vsincos.S create mode 100644 usr/src/libm/src/mvec/vis/__vsincosf.S create mode 100644 usr/src/libm/src/mvec/vis/__vsinf.S create mode 100644 usr/src/libm/src/mvec/vis/__vsqrt.S create mode 100644 usr/src/libm/src/mvec/vis/__vsqrtf.S create mode 100644 usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S create mode 100644 usr/src/libm/src/mvec/vlog_.c create mode 100644 usr/src/libm/src/mvec/vlogf_.c create mode 100644 usr/src/libm/src/mvec/vpow_.c create mode 100644 usr/src/libm/src/mvec/vpowf_.c create mode 100644 usr/src/libm/src/mvec/vrhypot_.c create mode 100644 usr/src/libm/src/mvec/vrhypotf_.c create mode 100644 usr/src/libm/src/mvec/vrsqrt_.c create mode 100644 usr/src/libm/src/mvec/vrsqrtf_.c create mode 100644 usr/src/libm/src/mvec/vsin_.c create mode 100644 usr/src/libm/src/mvec/vsincos_.c create mode 100644 usr/src/libm/src/mvec/vsincosf_.c create mode 100644 usr/src/libm/src/mvec/vsinf_.c create mode 100644 usr/src/libm/src/mvec/vsqrt_.c create mode 100644 usr/src/libm/src/mvec/vsqrtf_.c create mode 100644 usr/src/libm/src/mvec/vz_abs_.c create mode 100644 usr/src/libm/src/mvec/vz_exp_.c create mode 100644 usr/src/libm/src/mvec/vz_log_.c create mode 100644 usr/src/libm/src/mvec/vz_pow_.c create mode 100644 usr/src/libm/src/sparc/common/copysign.S create mode 100644 usr/src/libm/src/sparc/common/fabs.S create mode 100644 usr/src/libm/src/sparc/common/libm.m4 create mode 100644 usr/src/libm/src/sparc/common/nextafter.S create mode 100644 usr/src/libm/src/sparc/v9/libm.m4 create mode 100644 usr/src/libm/wos/Integration.log create mode 100644 usr/src/libm/wos/Makefile create mode 100644 usr/src/libm/wos/llib-lm create mode 100644 usr/src/libm/wos/mapfiles/libmv1-common create mode 100644 usr/src/libm/wos/mapfiles/libmv2-i386 create mode 100644 usr/src/libm/wos/mapfiles/libmv2-sparc create mode 100644 usr/src/libm/wos/mapfiles/libmvec-common create mode 100644 usr/src/libm/wos/mapfiles/libmvec-ia1 create mode 100644 usr/src/libm/wos/mapfiles/libmvec-v8plusa create mode 100644 usr/src/libm/wos/mapfiles/libmvec-v8plusb create mode 100644 usr/src/libm/wos64/Makefile create mode 100644 usr/src/libm/wos64/llib-lm create mode 100644 usr/src/libm/wos64/mapfiles/libmv1-common create mode 100644 usr/src/libm/wos64/mapfiles/libmv2-i386 create mode 100644 usr/src/libm/wos64/mapfiles/libmv2-sparc create mode 100644 usr/src/libm/wos64/mapfiles/libmvec-common create mode 100644 usr/src/libm/wos64/mapfiles/libmvec-v9a create mode 100644 usr/src/libm/wos64/mapfiles/libmvec-v9b diff --git a/README b/README new file mode 100644 index 0000000..c26b3b0 --- /dev/null +++ b/README @@ -0,0 +1,35 @@ +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)README 1.1 06/01/30 SMI" +# + +To build libm and libmvec: + + cd usr/src/harness; make -f Makefile-os \ + STUDIOBIN=/your/path/to/Sun/Studio11/bin \ + BUILDNAME=any_build_name_without_blanks \ + STATDIR=/your/path/to/scratch/directory \ + DESTDIR=/your/path/to/root/of/install/location + +The resulting files, symlinks and libraries will be installed under +$(DESTDIR). The default values for these macros are: + + STUDIOBIN=/opt/SUNWspro/bin + BUILDNAME="DEV" + STATDIR=statdir/`uname -p`/$(BUILDNAME) + DESTDIR=destdir/`uname -p`/$(BUILDNAME) + +If you're happy with the default values, there is no need to specify any. + +Also, the following derived macros can be set individually if so desired. +The default values are listed below for reference: + + CC=$(STUDIOBIN)/cc + LINT=$(STUDIOBIN)/lint + MAKE=$(STUDIOBIN)/dmake -m serial + DMAKE=$(STUDIOBIN)/dmake -m parallel -j $(DMAKE_MAX_JOBS) + DMAKE_MAX_JOBS # the default is 8 jobs per processor in on-line state + +Note that only Sun Studio 10 or Sun Studio 11 compilers are supported. diff --git a/ReleaseNotes b/ReleaseNotes new file mode 100644 index 0000000..04693e2 --- /dev/null +++ b/ReleaseNotes @@ -0,0 +1,11 @@ +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)ReleaseNotes 1.2 06/01/31 SMI" +# + +The 2006.01.31 release of the source code for libm and libmvec +was used to build the files and libraries delivered into snv_33. + +See README for build instructions. diff --git a/usr/src/OPENSOLARIS.LICENSE b/usr/src/OPENSOLARIS.LICENSE new file mode 100644 index 0000000..d838932 --- /dev/null +++ b/usr/src/OPENSOLARIS.LICENSE @@ -0,0 +1,377 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0 + +1. Definitions. + + 1.1. "Contributor" means each individual or entity that creates + or contributes to the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Software, prior Modifications used by a Contributor (if any), + and the Modifications made by that particular Contributor. + + 1.3. "Covered Software" means (a) the Original Software, or (b) + Modifications, or (c) the combination of files containing + Original Software with files containing Modifications, in + each case including portions thereof. + + 1.4. "Executable" means the Covered Software in any form other + than Source Code. + + 1.5. "Initial Developer" means the individual or entity that first + makes Original Software available under this License. + + 1.6. "Larger Work" means a work which combines Covered Software or + portions thereof with code not governed by the terms of this + License. + + 1.7. "License" means this document. + + 1.8. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed + herein. + + 1.9. "Modifications" means the Source Code and Executable form of + any of the following: + + A. Any file that results from an addition to, deletion from or + modification of the contents of a file containing Original + Software or previous Modifications; + + B. Any new file that contains any part of the Original + Software or previous Modifications; or + + C. Any new file that is contributed or otherwise made + available under the terms of this License. + + 1.10. "Original Software" means the Source Code and Executable + form of computer software code that is originally released + under this License. + + 1.11. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, + process, and apparatus claims, in any patent Licensable by + grantor. + + 1.12. "Source Code" means (a) the common form of computer software + code in which modifications are made and (b) associated + documentation included in or with such code. + + 1.13. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms + of, this License. For legal entities, "You" includes any + entity which controls, is controlled by, or is under common + control with You. For purposes of this definition, + "control" means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty + percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, the Initial + Developer hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer, to use, + reproduce, modify, display, perform, sublicense and + distribute the Original Software (or portions thereof), + with or without Modifications, and/or as part of a Larger + Work; and + + (b) under Patent Claims infringed by the making, using or + selling of Original Software, to make, have made, use, + practice, sell, and offer for sale, and/or otherwise + dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are + effective on the date Initial Developer first distributes + or otherwise makes the Original Software available to a + third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: (1) for code that You delete from the Original + Software, or (2) for infringements caused by: (i) the + modification of the Original Software, or (ii) the + combination of the Original Software with other software + or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, each + Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor to use, reproduce, + modify, display, perform, sublicense and distribute the + Modifications created by such Contributor (or portions + thereof), either on an unmodified basis, with other + Modifications, as Covered Software and/or as part of a + Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either + alone and/or in combination with its Contributor Version + (or portions of such combination), to make, use, sell, + offer for sale, have made, and/or otherwise dispose of: + (1) Modifications made by that Contributor (or portions + thereof); and (2) the combination of Modifications made by + that Contributor with its Contributor Version (or portions + of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first distributes or + otherwise makes the Modifications available to a third + party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: (1) for any code that Contributor has deleted + from the Contributor Version; (2) for infringements caused + by: (i) third party modifications of Contributor Version, + or (ii) the combination of Modifications made by that + Contributor with other software (except as part of the + Contributor Version) or other devices; or (3) under Patent + Claims infringed by Covered Software in the absence of + Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + + Any Covered Software that You distribute or otherwise make + available in Executable form must also be made available in Source + Code form and that Source Code form must be distributed only under + the terms of this License. You must include a copy of this + License with every copy of the Source Code form of the Covered + Software You distribute or otherwise make available. You must + inform recipients of any such Covered Software in Executable form + as to how they can obtain such Covered Software in Source Code + form in a reasonable manner on or through a medium customarily + used for software exchange. + + 3.2. Modifications. + + The Modifications that You create or to which You contribute are + governed by the terms of this License. You represent that You + believe Your Modifications are Your original creation(s) and/or + You have sufficient rights to grant the rights conveyed by this + License. + + 3.3. Required Notices. + + You must include a notice in each of Your Modifications that + identifies You as the Contributor of the Modification. You may + not remove or alter any copyright, patent or trademark notices + contained within the Covered Software, or any notices of licensing + or any descriptive text giving attribution to any Contributor or + the Initial Developer. + + 3.4. Application of Additional Terms. + + You may not offer or impose any terms on any Covered Software in + Source Code form that alters or restricts the applicable version + of this License or the recipients' rights hereunder. You may + choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of + Covered Software. However, you may do so only on Your own behalf, + and not on behalf of the Initial Developer or any Contributor. + You must make it absolutely clear that any such warranty, support, + indemnity or liability obligation is offered by You alone, and You + hereby agree to indemnify the Initial Developer and every + Contributor for any liability incurred by the Initial Developer or + such Contributor as a result of warranty, support, indemnity or + liability terms You offer. + + 3.5. Distribution of Executable Versions. + + You may distribute the Executable form of the Covered Software + under the terms of this License or under the terms of a license of + Your choice, which may contain terms different from this License, + provided that You are in compliance with the terms of this License + and that the license for the Executable form does not attempt to + limit or alter the recipient's rights in the Source Code form from + the rights set forth in this License. If You distribute the + Covered Software in Executable form under a different license, You + must make it absolutely clear that any terms which differ from + this License are offered by You alone, not by the Initial + Developer or Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred + by the Initial Developer or such Contributor as a result of any + such terms You offer. + + 3.6. Larger Works. + + You may create a Larger Work by combining Covered Software with + other code not governed by the terms of this License and + distribute the Larger Work as a single product. In such a case, + You must make sure the requirements of this License are fulfilled + for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + + Sun Microsystems, Inc. is the initial license steward and may + publish revised and/or new versions of this License from time to + time. Each version will be given a distinguishing version number. + Except as provided in Section 4.3, no one other than the license + steward has the right to modify this License. + + 4.2. Effect of New Versions. + + You may always continue to use, distribute or otherwise make the + Covered Software available under the terms of the version of the + License under which You originally received the Covered Software. + If the Initial Developer includes a notice in the Original + Software prohibiting it from being distributed or otherwise made + available under any subsequent version of the License, You must + distribute and make the Covered Software available under the terms + of the version of the License under which You originally received + the Covered Software. Otherwise, You may also choose to use, + distribute or otherwise make the Covered Software available under + the terms of any subsequent version of the License published by + the license steward. + + 4.3. Modified Versions. + + When You are an Initial Developer and You want to create a new + license for Your Original Software, You may create and use a + modified version of this License if You: (a) rename the license + and remove any references to the name of the license steward + (except to note that the license differs from this License); and + (b) otherwise make it clear that the license contains terms which + differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" + BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED + SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR + PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY + COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE + INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS + DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to + cure such breach within 30 days of becoming aware of the breach. + Provisions which, by their nature, must remain in effect beyond + the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding + declaratory judgment actions) against Initial Developer or a + Contributor (the Initial Developer or Contributor against whom You + assert such claim is referred to as "Participant") alleging that + the Participant Software (meaning the Contributor Version where + the Participant is a Contributor or the Original Software where + the Participant is the Initial Developer) directly or indirectly + infringes any patent, then any and all rights granted directly or + indirectly to You by such Participant, the Initial Developer (if + the Initial Developer is not the Participant) and all Contributors + under Sections 2.1 and/or 2.2 of this License shall, upon 60 days + notice from Participant terminate prospectively and automatically + at the expiration of such 60 day notice period, unless if within + such 60 day period You withdraw Your claim with respect to the + Participant Software against such Participant either unilaterally + or pursuant to a written agreement with Participant. + + 6.3. In the event of termination under Sections 6.1 or 6.2 above, + all end user licenses that have been validly granted by You or any + distributor hereunder prior to termination (excluding licenses + granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE + INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF + COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE + LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR + CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT + LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK + STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL + INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT + APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO + NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR + CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT + APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a "commercial item," as that term is + defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial + computer software" (as that term is defined at 48 + C.F.R. 252.227-7014(a)(1)) and "commercial computer software + documentation" as such terms are used in 48 C.F.R. 12.212 + (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 + C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all + U.S. Government End Users acquire Covered Software with only those + rights set forth herein. This U.S. Government Rights clause is in + lieu of, and supersedes, any other FAR, DFAR, or other clause or + provision that addresses Government rights in computer software + under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed + by the law of the jurisdiction specified in a notice contained + within the Original Software (except to the extent applicable law, + if any, provides otherwise), excluding such jurisdiction's + conflict-of-law provisions. Any litigation relating to this + License shall be subject to the jurisdiction of the courts located + in the jurisdiction and venue specified in a notice contained + within the Original Software, with the losing party responsible + for costs, including, without limitation, court costs and + reasonable attorneys' fees and expenses. The application of the + United Nations Convention on Contracts for the International Sale + of Goods is expressly excluded. Any law or regulation which + provides that the language of a contract shall be construed + against the drafter shall not apply to this License. You agree + that You alone are responsible for compliance with the United + States export administration regulations (and the export control + laws and regulation of any other countries) when You use, + distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or + indirectly, out of its utilization of rights under this License + and You agree to work with Initial Developer and Contributors to + distribute such responsibility on an equitable basis. Nothing + herein is intended or shall be deemed to constitute any admission + of liability. + +-------------------------------------------------------------------- + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND +DISTRIBUTION LICENSE (CDDL) + +For Covered Software in this distribution, this License shall +be governed by the laws of the State of California (excluding +conflict-of-law provisions). + +Any litigation relating to this License shall be subject to the +jurisdiction of the Federal Courts of the Northern District of +California and the state courts of the State of California, with +venue lying in Santa Clara County, California. diff --git a/usr/src/harness/Makefile-os b/usr/src/harness/Makefile-os new file mode 100644 index 0000000..6f89613 --- /dev/null +++ b/usr/src/harness/Makefile-os @@ -0,0 +1,123 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile-os 1.1 06/01/30 SMI" +# + +MACH:sh = uname -p + +MACH64_1 = $(MACH:sparc=sparcv9) +MACH64 = $(MACH64_1:i386=amd64) + +CPU_1 = $(MACH:sparc=sparc-S2) +CPU = $(CPU_1:i386=intel-S2) + +CPU64_1 = $(MACH64:sparcv9=sparcv9-S2) +CPU64 = $(CPU64_1:amd64=amd64-S2) + +USRSRC:sh = cd ..; /bin/pwd # where source is +BUILDNAME:sh = if [ -f buildname ]; then cat buildname; else echo "DEV"; fi + +STATDIR = $(USRSRC)/../../statdir/$(MACH)/$(BUILDNAME) + +# location of where you want to install the binares/files +DESTDIR = $(USRSRC)/../../destdir/$(MACH)/$(BUILDNAME) + +# We want to have 8 dmake jobs per processor +DMAKE_MAX_JOBS:sh = eval expr "`/usr/sbin/psrinfo | grep on-line | wc -l` \* 8" + +STUDIOBIN = /opt/SUNWspro/bin +CC = $(STUDIOBIN)/cc +LINT = $(STUDIOBIN)/lint +MAKE = $(STUDIOBIN)/dmake -m serial +DMAKE = $(STUDIOBIN)/dmake -m parallel -j $(DMAKE_MAX_JOBS) + +WORKDIRS = \ + $(STATDIR)/steps \ + $(STATDIR)/fail \ + $(STATDIR)/start \ + #end + +.INIT: + @echo `date` + @echo `uname -a` + @echo "Current directory: `pwd`" + @echo "--------------------------------------" + @env + @echo "--------------------------------------" + @echo "" + +.NO_PARALLEL: + +all: FRC \ + $(STATDIR)/steps/stepinstall \ + $(STATDIR)/steps/steplibm \ + $(STATDIR)/steps/step64libm \ + #end + +$(WORKDIRS): + mkdir -p $@ + +$(STATDIR)/steps/stepinstall: $(WORKDIRS) + @echo "touch $@ `date`" + touch $@; echo `uptime` >> $@ + +$(STATDIR)/steps/steplibm: + touch $(STATDIR)/start/$(@F) + cd $(USRSRC)/libm/wos; $(MAKE) clobber + if (cd $(USRSRC)/libm/wos; $(DMAKE) $(MAKEFLAGS) install \ + CC=$(CC) LINT=$(LINT) \ + BUILDNAME=$(BUILDNAME) DESTDIR=$(DESTDIR)); then \ + echo "touch $@ `date`"; \ + touch $@; echo `uptime` >> $@; \ + else \ + touch $(STATDIR)/fail/$(@F); \ + exit 1; \ + fi + echo "touch $@ `date`" + touch $@; echo `uptime` >> $@ + +$(STATDIR)/steps/step64libm: + touch $(STATDIR)/start/$(@F) + cd $(USRSRC)/libm/wos64; $(MAKE) clobber + if (cd $(USRSRC)/libm/wos64; $(DMAKE) $(MAKEFLAGS) install \ + CC=$(CC) LINT=$(LINT) \ + BUILDNAME=$(BUILDNAME) DESTDIR=$(DESTDIR)); then \ + echo "touch $@ `date`"; \ + touch $@; echo `uptime` >> $@; \ + else \ + touch $(STATDIR)/fail/$(@F); \ + exit 1; \ + fi + echo "touch $@ `date`" + touch $@; echo `uptime` >> $@ + +clean: + -$(RM) -r $(STATDIR)/steps $(STATDIR)/start $(STATDIR)/fail + -find $(USRSRC) -name '.make.state*' -exec $(RM) '{}' \; + +clobber: clean + +FRC: diff --git a/usr/src/libm/inc/complex.h b/usr/src/libm/inc/complex.h new file mode 100644 index 0000000..f3d33df --- /dev/null +++ b/usr/src/libm/inc/complex.h @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _COMPLEX_H +#define _COMPLEX_H + +#pragma ident "@(#)complex.h 1.10 05/10/06 SMI" + +#if !defined(__cplusplus) + +/* + * Compilation environments for Solaris must provide the _Imaginary datatype + * and the compiler intrinsics _Complex_I and _Imaginary_I + */ +#define _Complex_I _Complex_I +#define complex _Complex +#define _Imaginary_I _Imaginary_I +#define imaginary _Imaginary +#undef I +#define I _Imaginary_I + +extern float cabsf(float complex); +extern float cargf(float complex); +extern float cimagf(float complex); +extern float crealf(float complex); +extern float complex cacosf(float complex); +extern float complex cacoshf(float complex); +extern float complex casinf(float complex); +extern float complex casinhf(float complex); +extern float complex catanf(float complex); +extern float complex catanhf(float complex); +extern float complex ccosf(float complex); +extern float complex ccoshf(float complex); +extern float complex cexpf(float complex); +extern float complex clogf(float complex); +extern float complex conjf(float complex); +extern float complex cpowf(float complex, float complex); +extern float complex cprojf(float complex); +extern float complex csinf(float complex); +extern float complex csinhf(float complex); +extern float complex csqrtf(float complex); +extern float complex ctanf(float complex); +extern float complex ctanhf(float complex); + +extern double cabs(double complex); +extern double carg(double complex); +extern double cimag(double complex); +extern double creal(double complex); +extern double complex cacos(double complex); +extern double complex cacosh(double complex); +extern double complex casin(double complex); +extern double complex casinh(double complex); +extern double complex catan(double complex); +extern double complex catanh(double complex); +extern double complex ccos(double complex); +extern double complex ccosh(double complex); +extern double complex cexp(double complex); +#if defined(__PRAGMA_REDEFINE_EXTNAME) +#pragma redefine_extname clog __clog +#else +#undef clog +#define clog __clog +#endif +extern double complex clog(double complex); +extern double complex conj(double complex); +extern double complex cpow(double complex, double complex); +extern double complex cproj(double complex); +extern double complex csin(double complex); +extern double complex csinh(double complex); +extern double complex csqrt(double complex); +extern double complex ctan(double complex); +extern double complex ctanh(double complex); + +extern long double cabsl(long double complex); +extern long double cargl(long double complex); +extern long double cimagl(long double complex); +extern long double creall(long double complex); +extern long double complex cacoshl(long double complex); +extern long double complex cacosl(long double complex); +extern long double complex casinhl(long double complex); +extern long double complex casinl(long double complex); +extern long double complex catanhl(long double complex); +extern long double complex catanl(long double complex); +extern long double complex ccoshl(long double complex); +extern long double complex ccosl(long double complex); +extern long double complex cexpl(long double complex); +extern long double complex clogl(long double complex); +extern long double complex conjl(long double complex); +extern long double complex cpowl(long double complex, long double complex); +extern long double complex cprojl(long double complex); +extern long double complex csinhl(long double complex); +extern long double complex csinl(long double complex); +extern long double complex csqrtl(long double complex); +extern long double complex ctanhl(long double complex); +extern long double complex ctanl(long double complex); + +#endif /* !defined(__cplusplus) */ + +#endif /* _COMPLEX_H */ diff --git a/usr/src/libm/inc/fenv.h b/usr/src/libm/inc/fenv.h new file mode 100644 index 0000000..511fea5 --- /dev/null +++ b/usr/src/libm/inc/fenv.h @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FENV_H +#define _FENV_H + +#pragma ident "@(#)fenv.h 1.10 05/10/06 SMI" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +/* + * Rounding modes + */ +#if defined(__sparc) + +#define FE_TONEAREST 0 +#define FE_TOWARDZERO 1 +#define FE_UPWARD 2 +#define FE_DOWNWARD 3 + +#elif defined(__i386) || defined(__amd64) + +#define FE_TONEAREST 0 +#define FE_DOWNWARD 1 +#define FE_UPWARD 2 +#define FE_TOWARDZERO 3 + +#endif + +extern int fegetround __P((void)); +extern int fesetround __P((int)); + +#if (defined(__i386) || defined(__amd64)) && \ + (!defined(_STRICT_STDC) || defined(__EXTENSIONS__)) + +#define FE_FLTPREC 0 +#define FE_DBLPREC 2 +#define FE_LDBLPREC 3 + +extern int fegetprec __P((void)); +extern int fesetprec __P((int)); + +#endif + +/* + * Exception flags + */ +#if defined(__sparc) + +#define FE_INEXACT 0x01 +#define FE_DIVBYZERO 0x02 +#define FE_UNDERFLOW 0x04 +#define FE_OVERFLOW 0x08 +#define FE_INVALID 0x10 +#define FE_ALL_EXCEPT 0x1f + +#elif defined(__i386) || defined(__amd64) + +#define FE_INVALID 0x01 +#define FE_DIVBYZERO 0x04 +#define FE_OVERFLOW 0x08 +#define FE_UNDERFLOW 0x10 +#define FE_INEXACT 0x20 +#define FE_ALL_EXCEPT 0x3d + +#endif + +typedef int fexcept_t; + +extern int feclearexcept __P((int)); +extern int feraiseexcept __P((int)); +extern int fetestexcept __P((int)); +extern int fegetexceptflag __P((fexcept_t *, int)); +extern int fesetexceptflag __P((const fexcept_t *, int)); + +#if !defined(_STRICT_STDC) || defined(__EXTENSIONS__) + +/* + * Exception handling extensions + */ +#define FEX_NOHANDLER -1 +#define FEX_NONSTOP 0 +#define FEX_ABORT 1 +#define FEX_SIGNAL 2 +#define FEX_CUSTOM 3 + +#define FEX_INEXACT 0x001 +#define FEX_DIVBYZERO 0x002 +#define FEX_UNDERFLOW 0x004 +#define FEX_OVERFLOW 0x008 +#define FEX_INV_ZDZ 0x010 +#define FEX_INV_IDI 0x020 +#define FEX_INV_ISI 0x040 +#define FEX_INV_ZMI 0x080 +#define FEX_INV_SQRT 0x100 +#define FEX_INV_SNAN 0x200 +#define FEX_INV_INT 0x400 +#define FEX_INV_CMP 0x800 +#define FEX_INVALID 0xff0 +#define FEX_COMMON (FEX_INVALID | FEX_DIVBYZERO | FEX_OVERFLOW) +#define FEX_ALL (FEX_COMMON | FEX_UNDERFLOW | FEX_INEXACT) +#define FEX_NONE 0 + +#define FEX_NUM_EXC 12 + +/* structure to hold a numeric value in any format used by the FPU */ +typedef struct { + enum fex_nt { + fex_nodata = 0, + fex_int = 1, + fex_llong = 2, + fex_float = 3, + fex_double = 4, + fex_ldouble = 5 + } type; + union { + int i; +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) + long long l; +#else + struct { + int l[2]; + } l; +#endif + float f; + double d; + long double q; + } val; +} fex_numeric_t; + +/* structure to supply information about an exception to a custom handler */ +typedef struct { + enum fex_op { + fex_add = 0, + fex_sub = 1, + fex_mul = 2, + fex_div = 3, + fex_sqrt = 4, + fex_cnvt = 5, + fex_cmp = 6, + fex_other = 7 + } op; /* operation that caused the exception */ + int flags; /* flags to be set */ + fex_numeric_t op1, op2, res; /* operands and result */ +} fex_info_t; + +typedef struct fex_handler_data { + int __mode; + void (*__handler)(); +} fex_handler_t[FEX_NUM_EXC]; + +extern int fex_get_handling __P((int)); +extern int fex_set_handling __P((int, int, void (*)())); + +extern void fex_getexcepthandler __P((fex_handler_t *, int)); +extern void fex_setexcepthandler __P((const fex_handler_t *, int)); + +#ifdef __STDC__ +#include +#ifndef _FILEDEFED +#define _FILEDEFED +typedef __FILE FILE; +#endif +#endif +extern FILE *fex_get_log __P((void)); +extern int fex_set_log __P((FILE *)); +extern int fex_get_log_depth __P((void)); +extern int fex_set_log_depth __P((int)); +extern void fex_log_entry __P((const char *)); + +#define __fex_handler_t fex_handler_t + +#else + +typedef struct { + int __mode; + void (*__handler)(); +} __fex_handler_t[12]; + +#endif /* !defined(_STRICT_STDC) || defined(__EXTENSIONS__) */ + +/* + * Environment as a whole + */ +typedef struct { + __fex_handler_t __handlers; + unsigned long __fsr; +} fenv_t; + +#ifdef __STDC__ +extern const fenv_t __fenv_dfl_env; +#else +extern fenv_t __fenv_dfl_env; +#endif + +#define FE_DFL_ENV (&__fenv_dfl_env) + +extern int fegetenv __P((fenv_t *)); +extern int fesetenv __P((const fenv_t *)); +extern int feholdexcept __P((fenv_t *)); +extern int feupdateenv __P((const fenv_t *)); + +#if !defined(_STRICT_STDC) || defined(__EXTENSIONS__) +extern void fex_merge_flags __P((const fenv_t *)); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _FENV_H */ diff --git a/usr/src/libm/inc/floatingpoint.h b/usr/src/libm/inc/floatingpoint.h new file mode 100644 index 0000000..d5ef956 --- /dev/null +++ b/usr/src/libm/inc/floatingpoint.h @@ -0,0 +1,202 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (C) 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FLOATINGPOINT_H +#define _FLOATINGPOINT_H + +#pragma ident "@(#)floatingpoint.h 2.10 05/10/06 SMI" + +#ifdef __STDC__ +#include +#endif +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * contains definitions for constants, types, variables, + * and functions for: + * IEEE floating-point arithmetic base conversion; + * IEEE floating-point arithmetic modes; + * IEEE floating-point arithmetic exception handling. + */ + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if defined(__STDC__) && !defined(_FILEDEFED) +#define _FILEDEFED +typedef __FILE FILE; +#endif + +#define N_IEEE_EXCEPTION 5 /* Number of floating-point exceptions. */ + +typedef int sigfpe_code_type; /* Type of SIGFPE code. */ + +typedef void (*sigfpe_handler_type)(); /* Pointer to exception handler */ + +#define SIGFPE_DEFAULT (void (*)())0 /* default exception handling */ +#define SIGFPE_IGNORE (void (*)())1 /* ignore this exception or code */ +#define SIGFPE_ABORT (void (*)())2 /* force abort on exception */ + +extern sigfpe_handler_type sigfpe __P((sigfpe_code_type, sigfpe_handler_type)); + +/* + * Types for IEEE floating point. + */ +typedef float single; + +#ifndef _EXTENDED +#define _EXTENDED +typedef unsigned extended[3]; +#endif + +typedef long double quadruple; /* Quadruple-precision type. */ + +typedef unsigned fp_exception_field_type; + /* + * A field containing fp_exceptions OR'ed + * together. + */ +/* + * Definitions for base conversion. + */ +#define DECIMAL_STRING_LENGTH 512 /* Size of buffer in decimal_record. */ + +typedef char decimal_string[DECIMAL_STRING_LENGTH]; + /* Decimal significand. */ + +typedef struct { + enum fp_class_type fpclass; + int sign; + int exponent; + decimal_string ds; /* Significand - each char contains an ascii */ + /* digit, except the string-terminating */ + /* ascii null. */ + int more; /* On conversion from decimal to binary, != 0 */ + /* indicates more non-zero digits following */ + /* ds. */ + int ndigits; /* On fixed_form conversion from binary to */ + /* decimal, contains number of digits */ + /* required for ds. */ +} decimal_record; + +enum decimal_form { + fixed_form, /* Fortran F format: ndigits specifies number */ + /* of digits after point; if negative, */ + /* specifies rounding to occur to left of */ + /* point. */ + floating_form /* Fortran E format: ndigits specifies number */ + /* of significant digits. */ +}; + +typedef struct { + enum fp_direction_type rd; + /* Rounding direction. */ + enum decimal_form df; /* Format for conversion from binary to */ + /* decimal. */ + int ndigits; /* Number of digits for conversion. */ +} decimal_mode; + +enum decimal_string_form { /* Valid decimal number string formats. */ + invalid_form, /* Not a valid decimal string format. */ + whitespace_form, /* All white space - valid in Fortran! */ + fixed_int_form, /* */ + fixed_intdot_form, /* . */ + fixed_dotfrac_form, /* . */ + fixed_intdotfrac_form, /* . */ + floating_int_form, /* */ + floating_intdot_form, /* . */ + floating_dotfrac_form, /* . */ + floating_intdotfrac_form, /* . */ + inf_form, /* inf */ + infinity_form, /* infinity */ + nan_form, /* nan */ + nanstring_form /* nan(string) */ +}; + +extern void single_to_decimal __P((single *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void double_to_decimal __P((double *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void extended_to_decimal __P((extended *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); +extern void quadruple_to_decimal __P((quadruple *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); + +extern void decimal_to_single __P((single *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void decimal_to_double __P((double *, decimal_mode *, decimal_record *, + fp_exception_field_type *)); +extern void decimal_to_extended __P((extended *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); +extern void decimal_to_quadruple __P((quadruple *, decimal_mode *, + decimal_record *, fp_exception_field_type *)); + +extern void string_to_decimal __P((char **, int, int, decimal_record *, + enum decimal_string_form *, char **)); +extern void func_to_decimal __P((char **, int, int, decimal_record *, + enum decimal_string_form *, char **, + int (*)(void), int *, int (*)(int))); +extern void file_to_decimal __P((char **, int, int, decimal_record *, + enum decimal_string_form *, char **, + FILE *, int *)); + +extern char *seconvert __P((single *, int, int *, int *, char *)); +extern char *sfconvert __P((single *, int, int *, int *, char *)); +extern char *sgconvert __P((single *, int, int, char *)); +extern char *econvert __P((double, int, int *, int *, char *)); +extern char *fconvert __P((double, int, int *, int *, char *)); +extern char *gconvert __P((double, int, int, char *)); +extern char *qeconvert __P((quadruple *, int, int *, int *, char *)); +extern char *qfconvert __P((quadruple *, int, int *, int *, char *)); +extern char *qgconvert __P((quadruple *, int, int, char *)); + +extern char *ecvt __P((double, int, int *, int *)); +extern char *fcvt __P((double, int, int *, int *)); +extern char *gcvt __P((double, int, char *)); + +/* + * ANSI C Standard says the following entry points should be + * prototyped in . They are now, but weren't before. + */ +extern double atof __P((const char *)); +extern double strtod __P((const char *, char **)); + +#ifdef __cplusplus +} +#endif + +#endif /* _FLOATINGPOINT_H */ diff --git a/usr/src/libm/inc/iso/math_c99.h b/usr/src/libm/inc/iso/math_c99.h new file mode 100644 index 0000000..a418d18 --- /dev/null +++ b/usr/src/libm/inc/iso/math_c99.h @@ -0,0 +1,527 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ISO_MATH_C99_H +#define _ISO_MATH_C99_H + +#pragma ident "@(#)math_c99.h 1.11 05/10/06 SMI" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if defined(_STDC_C99) || _XOPEN_SOURCE - 0 >= 600 || defined(__C99FEATURES__) +#if defined(__GNUC__) +#undef HUGE_VAL +#define HUGE_VAL (__builtin_huge_val()) +#undef HUGE_VALF +#define HUGE_VALF (__builtin_huge_valf()) +#undef HUGE_VALL +#define HUGE_VALL (__builtin_huge_vall()) +#undef INFINITY +#define INFINITY (__builtin_inff()) +#undef NAN +#define NAN (__builtin_nanf("")) + +/* + * C99 7.12.3 classification macros + */ +#undef isnan +#if __GNUC__ >= 4 +#define isnan(x) __builtin_isnan(x) +#else +#define isnan(x) __extension__( \ + { __typeof(x) __x_n = (x); \ + __builtin_isunordered(__x_n, __x_n); }) +#endif +#undef isinf +#define isinf(x) __extension__( \ + { __typeof(x) __x_i = (x); \ + __x_i == (__typeof(__x_i)) INFINITY || \ + __x_i == (__typeof(__x_i)) (-INFINITY); }) +#undef isfinite +#define isfinite(x) __extension__( \ + { __typeof(x) __x_f = (x); \ + !isnan(__x_f) && !isinf(__x_f); }) +#undef isnormal +#define isnormal(x) __extension__( \ + { __typeof(x) __x_r = (x); isfinite(__x_r) && \ + (sizeof (__x_r) == sizeof (float) ? \ + __builtin_fabsf(__x_r) >= __FLT_MIN__ : \ + sizeof (__x_r) == sizeof (double) ? \ + __builtin_fabs(__x_r) >= __DBL_MIN__ : \ + __builtin_fabsl(__x_r) >= __LDBL_MIN__); }) +#undef fpclassify +#define fpclassify(x) __extension__( \ + { __typeof(x) __x_c = (x); \ + isnan(__x_c) ? FP_NAN : \ + isinf(__x_c) ? FP_INFINITE : \ + isnormal(__x_c) ? FP_NORMAL : \ + __x_c == (__typeof(__x_c)) 0 ? FP_ZERO : \ + FP_SUBNORMAL; }) +#undef signbit +#if defined(__sparc) +#define signbit(x) __extension__( \ + { __typeof(x) __x_s = (x); \ + (int) (*(unsigned *) &__x_s >> 31); }) +#elif defined(__i386) || defined(__amd64) +#define signbit(x) __extension__( \ + { __typeof(x) __x_s = (x); \ + (sizeof (__x_s) == sizeof (float) ? \ + (int) (*(unsigned *) &__x_s >> 31) : \ + sizeof (__x_s) == sizeof (double) ? \ + (int) (((unsigned *) &__x_s)[1] >> 31) : \ + (int) (((unsigned short *) &__x_s)[4] >> 15)); }) +#endif + +/* + * C99 7.12.14 comparison macros + */ +#undef isgreater +#define isgreater(x, y) __builtin_isgreater(x, y) +#undef isgreaterequal +#define isgreaterequal(x, y) __builtin_isgreaterequal(x, y) +#undef isless +#define isless(x, y) __builtin_isless(x, y) +#undef islessequal +#define islessequal(x, y) __builtin_islessequal(x, y) +#undef islessgreater +#define islessgreater(x, y) __builtin_islessgreater(x, y) +#undef isunordered +#define isunordered(x, y) __builtin_isunordered(x, y) +#else /* defined(__GNUC__) */ +#undef HUGE_VAL +#define HUGE_VAL __builtin_huge_val +#undef HUGE_VALF +#define HUGE_VALF __builtin_huge_valf +#undef HUGE_VALL +#define HUGE_VALL __builtin_huge_vall +#undef INFINITY +#define INFINITY __builtin_infinity +#undef NAN +#define NAN __builtin_nan + +/* + * C99 7.12.3 classification macros + */ +#undef fpclassify +#define fpclassify(x) __builtin_fpclassify(x) +#undef isfinite +#define isfinite(x) __builtin_isfinite(x) +#undef isinf +#define isinf(x) __builtin_isinf(x) +#undef isnan +#define isnan(x) __builtin_isnan(x) +#undef isnormal +#define isnormal(x) __builtin_isnormal(x) +#undef signbit +#define signbit(x) __builtin_signbit(x) + +/* + * C99 7.12.14 comparison macros + */ +#undef isgreater +#define isgreater(x, y) ((x) __builtin_isgreater(y)) +#undef isgreaterequal +#define isgreaterequal(x, y) ((x) __builtin_isgreaterequal(y)) +#undef isless +#define isless(x, y) ((x) __builtin_isless(y)) +#undef islessequal +#define islessequal(x, y) ((x) __builtin_islessequal(y)) +#undef islessgreater +#define islessgreater(x, y) ((x) __builtin_islessgreater(y)) +#undef isunordered +#define isunordered(x, y) ((x) __builtin_isunordered(y)) +#endif /* defined(__GNUC__) */ +#endif /* defined(_STDC_C99) || _XOPEN_SOURCE - 0 >= 600 || ... */ + +#if defined(__EXTENSIONS__) || defined(_STDC_C99) || \ + (!defined(_STRICT_STDC) && !defined(__XOPEN_OR_POSIX)) || \ + defined(__C99FEATURES__) +#if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ - 0 == 0 +typedef float float_t; +typedef double double_t; +#elif __FLT_EVAL_METHOD__ - 0 == 1 +typedef double float_t; +typedef double double_t; +#elif __FLT_EVAL_METHOD__ - 0 == 2 +typedef long double float_t; +typedef long double double_t; +#elif defined(__sparc) || defined(__amd64) +typedef float float_t; +typedef double double_t; +#elif defined(__i386) +typedef long double float_t; +typedef long double double_t; +#endif + +#undef FP_ZERO +#define FP_ZERO 0 +#undef FP_SUBNORMAL +#define FP_SUBNORMAL 1 +#undef FP_NORMAL +#define FP_NORMAL 2 +#undef FP_INFINITE +#define FP_INFINITE 3 +#undef FP_NAN +#define FP_NAN 4 + +#undef FP_ILOGB0 +#define FP_ILOGB0 (-2147483647) +#undef FP_ILOGBNAN +#define FP_ILOGBNAN 2147483647 + +#undef MATH_ERRNO +#define MATH_ERRNO 1 +#undef MATH_ERREXCEPT +#define MATH_ERREXCEPT 2 +#undef math_errhandling +#define math_errhandling MATH_ERREXCEPT + +extern double acosh __P((double)); +extern double asinh __P((double)); +extern double atanh __P((double)); + +extern double exp2 __P((double)); +extern double expm1 __P((double)); +extern int ilogb __P((double)); +extern double log1p __P((double)); +extern double log2 __P((double)); +extern double logb __P((double)); +extern double scalbn __P((double, int)); +extern double scalbln __P((double, long int)); + +extern double cbrt __P((double)); +extern double hypot __P((double, double)); + +extern double erf __P((double)); +extern double erfc __P((double)); +extern double lgamma __P((double)); +extern double tgamma __P((double)); + +extern double nearbyint __P((double)); +extern double rint __P((double)); +extern long int lrint __P((double)); +extern double round __P((double)); +extern long int lround __P((double)); +extern double trunc __P((double)); + +extern double remainder __P((double, double)); +extern double remquo __P((double, double, int *)); + +extern double copysign __P((double, double)); +extern double nan __P((const char *)); +extern double nextafter __P((double, double)); +extern double nexttoward __P((double, long double)); + +extern double fdim __P((double, double)); +extern double fmax __P((double, double)); +extern double fmin __P((double, double)); + +extern double fma __P((double, double, double)); + +extern float acosf __P((float)); +extern float asinf __P((float)); +extern float atanf __P((float)); +extern float atan2f __P((float, float)); +extern float cosf __P((float)); +extern float sinf __P((float)); +extern float tanf __P((float)); + +extern float acoshf __P((float)); +extern float asinhf __P((float)); +extern float atanhf __P((float)); +extern float coshf __P((float)); +extern float sinhf __P((float)); +extern float tanhf __P((float)); + +extern float expf __P((float)); +extern float exp2f __P((float)); +extern float expm1f __P((float)); +extern float frexpf __P((float, int *)); +extern int ilogbf __P((float)); +extern float ldexpf __P((float, int)); +extern float logf __P((float)); +extern float log10f __P((float)); +extern float log1pf __P((float)); +extern float log2f __P((float)); +extern float logbf __P((float)); +extern float modff __P((float, float *)); +extern float scalbnf __P((float, int)); +extern float scalblnf __P((float, long int)); + +extern float cbrtf __P((float)); +extern float fabsf __P((float)); +extern float hypotf __P((float, float)); +extern float powf __P((float, float)); +extern float sqrtf __P((float)); + +extern float erff __P((float)); +extern float erfcf __P((float)); +extern float lgammaf __P((float)); +extern float tgammaf __P((float)); + +extern float ceilf __P((float)); +extern float floorf __P((float)); +extern float nearbyintf __P((float)); +extern float rintf __P((float)); +extern long int lrintf __P((float)); +extern float roundf __P((float)); +extern long int lroundf __P((float)); +extern float truncf __P((float)); + +extern float fmodf __P((float, float)); +extern float remainderf __P((float, float)); +extern float remquof __P((float, float, int *)); + +extern float copysignf __P((float, float)); +extern float nanf __P((const char *)); +extern float nextafterf __P((float, float)); +extern float nexttowardf __P((float, long double)); + +extern float fdimf __P((float, float)); +extern float fmaxf __P((float, float)); +extern float fminf __P((float, float)); + +extern float fmaf __P((float, float, float)); + +extern long double acosl __P((long double)); +extern long double asinl __P((long double)); +extern long double atanl __P((long double)); +extern long double atan2l __P((long double, long double)); +extern long double cosl __P((long double)); +extern long double sinl __P((long double)); +extern long double tanl __P((long double)); + +extern long double acoshl __P((long double)); +extern long double asinhl __P((long double)); +extern long double atanhl __P((long double)); +extern long double coshl __P((long double)); +extern long double sinhl __P((long double)); +extern long double tanhl __P((long double)); + +extern long double expl __P((long double)); +extern long double exp2l __P((long double)); +extern long double expm1l __P((long double)); +extern long double frexpl __P((long double, int *)); +extern int ilogbl __P((long double)); +extern long double ldexpl __P((long double, int)); +extern long double logl __P((long double)); +extern long double log10l __P((long double)); +extern long double log1pl __P((long double)); +extern long double log2l __P((long double)); +extern long double logbl __P((long double)); +extern long double modfl __P((long double, long double *)); +extern long double scalbnl __P((long double, int)); +extern long double scalblnl __P((long double, long int)); + +extern long double cbrtl __P((long double)); +extern long double fabsl __P((long double)); +extern long double hypotl __P((long double, long double)); +extern long double powl __P((long double, long double)); +extern long double sqrtl __P((long double)); + +extern long double erfl __P((long double)); +extern long double erfcl __P((long double)); +extern long double lgammal __P((long double)); +extern long double tgammal __P((long double)); + +extern long double ceill __P((long double)); +extern long double floorl __P((long double)); +extern long double nearbyintl __P((long double)); +extern long double rintl __P((long double)); +extern long int lrintl __P((long double)); +extern long double roundl __P((long double)); +extern long int lroundl __P((long double)); +extern long double truncl __P((long double)); + +extern long double fmodl __P((long double, long double)); +extern long double remainderl __P((long double, long double)); +extern long double remquol __P((long double, long double, int *)); + +extern long double copysignl __P((long double, long double)); +extern long double nanl __P((const char *)); +extern long double nextafterl __P((long double, long double)); +extern long double nexttowardl __P((long double, long double)); + +extern long double fdiml __P((long double, long double)); +extern long double fmaxl __P((long double, long double)); +extern long double fminl __P((long double, long double)); + +extern long double fmal __P((long double, long double, long double)); + +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) +extern long long int llrint __P((double)); +extern long long int llround __P((double)); + +extern long long int llrintf __P((float)); +extern long long int llroundf __P((float)); + +extern long long int llrintl __P((long double)); +extern long long int llroundl __P((long double)); +#endif + +#if !defined(__cplusplus) +#pragma does_not_read_global_data(asinh, exp2, expm1) +#pragma does_not_read_global_data(ilogb, log2) +#pragma does_not_read_global_data(scalbn, scalbln, cbrt) +#pragma does_not_read_global_data(erf, erfc, tgamma) +#pragma does_not_read_global_data(nearbyint, rint, lrint, round, lround, trunc) +#pragma does_not_read_global_data(remquo) +#pragma does_not_read_global_data(copysign, nan, nexttoward) +#pragma does_not_read_global_data(fdim, fmax, fmin, fma) +#pragma does_not_write_global_data(asinh, exp2, expm1) +#pragma does_not_write_global_data(ilogb, log2) +#pragma does_not_write_global_data(scalbn, scalbln, cbrt) +#pragma does_not_write_global_data(erf, erfc, tgamma) +#pragma does_not_write_global_data(nearbyint, rint, lrint, round, lround, trunc) +#pragma does_not_write_global_data(copysign, nan, nexttoward) +#pragma does_not_write_global_data(fdim, fmax, fmin, fma) + +#pragma does_not_read_global_data(acosf, asinf, atanf, atan2f) +#pragma does_not_read_global_data(cosf, sinf, tanf) +#pragma does_not_read_global_data(acoshf, asinhf, atanhf, coshf, sinhf, tanhf) +#pragma does_not_read_global_data(expf, exp2f, expm1f, frexpf, ilogbf, ldexpf) +#pragma does_not_read_global_data(logf, log10f, log1pf, log2f, logbf) +#pragma does_not_read_global_data(modff, scalbnf, scalblnf) +#pragma does_not_read_global_data(cbrtf, fabsf, hypotf, powf, sqrtf) +#pragma does_not_read_global_data(erff, erfcf, lgammaf, tgammaf) +#pragma does_not_read_global_data(ceilf, floorf, nearbyintf) +#pragma does_not_read_global_data(rintf, lrintf, roundf, lroundf, truncf) +#pragma does_not_read_global_data(fmodf, remainderf, remquof) +#pragma does_not_read_global_data(copysignf, nanf, nextafterf, nexttowardf) +#pragma does_not_read_global_data(fdimf, fmaxf, fminf, fmaf) +#pragma does_not_write_global_data(acosf, asinf, atanf, atan2f) +#pragma does_not_write_global_data(cosf, sinf, tanf) +#pragma does_not_write_global_data(acoshf, asinhf, atanhf, coshf, sinhf, tanhf) +#pragma does_not_write_global_data(expf, exp2f, expm1f, ilogbf, ldexpf) +#pragma does_not_write_global_data(logf, log10f, log1pf, log2f, logbf) +#pragma does_not_write_global_data(cbrtf, fabsf, hypotf, powf, sqrtf) +#pragma does_not_write_global_data(erff, erfcf, tgammaf) +#pragma does_not_write_global_data(ceilf, floorf, nearbyintf) +#pragma does_not_write_global_data(rintf, lrintf, roundf, lroundf, truncf) +#pragma does_not_write_global_data(fmodf, remainderf) +#pragma does_not_write_global_data(copysignf, nanf, nextafterf, nexttowardf) +#pragma does_not_write_global_data(fdimf, fmaxf, fminf, fmaf) + +#pragma does_not_read_global_data(acosl, asinl, atanl, atan2l) +#pragma does_not_read_global_data(cosl, sinl, tanl) +#pragma does_not_read_global_data(acoshl, asinhl, atanhl, coshl, sinhl, tanhl) +#pragma does_not_read_global_data(expl, exp2l, expm1l, frexpl, ilogbl, ldexpl) +#pragma does_not_read_global_data(logl, log10l, log1pl, log2l, logbl) +#pragma does_not_read_global_data(modfl, scalbnl, scalblnl) +#pragma does_not_read_global_data(cbrtl, fabsl, hypotl, powl, sqrtl) +#pragma does_not_read_global_data(erfl, erfcl, lgammal, tgammal) +#pragma does_not_read_global_data(ceill, floorl, nearbyintl) +#pragma does_not_read_global_data(rintl, lrintl, roundl, lroundl, truncl) +#pragma does_not_read_global_data(fmodl, remainderl, remquol) +#pragma does_not_read_global_data(copysignl, nanl, nextafterl, nexttowardl) +#pragma does_not_read_global_data(fdiml, fmaxl, fminl, fmal) +#pragma does_not_write_global_data(acosl, asinl, atanl, atan2l) +#pragma does_not_write_global_data(cosl, sinl, tanl) +#pragma does_not_write_global_data(acoshl, asinhl, atanhl, coshl, sinhl, tanhl) +#pragma does_not_write_global_data(expl, exp2l, expm1l, ilogbl, ldexpl) +#pragma does_not_write_global_data(logl, log10l, log1pl, log2l, logbl) +#pragma does_not_write_global_data(cbrtl, fabsl, hypotl, powl, sqrtl) +#pragma does_not_write_global_data(erfl, erfcl, tgammal) +#pragma does_not_write_global_data(ceill, floorl, nearbyintl) +#pragma does_not_write_global_data(rintl, lrintl, roundl, lroundl, truncl) +#pragma does_not_write_global_data(fmodl, remainderl) +#pragma does_not_write_global_data(copysignl, nanl, nextafterl, nexttowardl) +#pragma does_not_write_global_data(fdiml, fmaxl, fminl, fmal) + +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) +#pragma does_not_read_global_data(llrint, llround) +#pragma does_not_read_global_data(llrintf, llroundf, llrintl, llroundl) +#pragma does_not_write_global_data(llrint, llround) +#pragma does_not_write_global_data(llrintf, llroundf, llrintl, llroundl) +#endif +#endif /* !defined(__cplusplus) */ + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(acosh, atanh, hypot, lgamma, log1p, logb) +#pragma does_not_read_global_data(nextafter, remainder) +#pragma does_not_write_global_data(acosh, atanh, hypot, log1p, logb) +#pragma does_not_write_global_data(nextafter, remainder) + +#pragma no_side_effect(acosh, asinh, atanh, exp2, expm1) +#pragma no_side_effect(ilogb, log1p, log2, logb) +#pragma no_side_effect(scalbn, scalbln, cbrt, hypot) +#pragma no_side_effect(erf, erfc, tgamma) +#pragma no_side_effect(nearbyint, rint, lrint, round, lround, trunc) +#pragma no_side_effect(remainder) +#pragma no_side_effect(copysign, nan, nextafter, nexttoward) +#pragma no_side_effect(fdim, fmax, fmin, fma) + +#pragma no_side_effect(acosf, asinf, atanf, atan2f) +#pragma no_side_effect(cosf, sinf, tanf, coshf, sinhf, tanhf) +#pragma no_side_effect(acoshf, asinhf, atanhf, coshf, sinhf, tanhf) +#pragma no_side_effect(expf, exp2f, expm1f, ilogbf, ldexpf) +#pragma no_side_effect(logf, log10f, log1pf, log2f, logbf) +#pragma no_side_effect(cbrtf, fabsf, hypotf, powf, sqrtf) +#pragma no_side_effect(erff, erfcf, tgammaf) +#pragma no_side_effect(ceilf, floorf, nearbyintf) +#pragma no_side_effect(rintf, lrintf, roundf, lroundf, truncf) +#pragma no_side_effect(fmodf, remainderf) +#pragma no_side_effect(copysignf, nanf, nextafterf, nexttowardf) +#pragma no_side_effect(fdimf, fmaxf, fminf, fmaf) + +#pragma no_side_effect(acosl, asinl, atanl, atan2l) +#pragma no_side_effect(cosl, sinl, tanl, coshl, sinhl, tanhl) +#pragma no_side_effect(acoshl, asinhl, atanhl, coshl, sinhl, tanhl) +#pragma no_side_effect(expl, exp2l, expm1l, ilogbl, ldexpl) +#pragma no_side_effect(logl, log10l, log1pl, log2l, logbl) +#pragma no_side_effect(cbrtl, fabsl, hypotl, powl, sqrtl) +#pragma no_side_effect(erfl, erfcl, tgammal) +#pragma no_side_effect(ceill, floorl, nearbyintl) +#pragma no_side_effect(rintl, lrintl, roundl, lroundl, truncl) +#pragma no_side_effect(fmodl, remainderl) +#pragma no_side_effect(copysignl, nanl, nextafterl, nexttowardl) +#pragma no_side_effect(fdiml, fmaxl, fminl, fmal) + +#if !defined(_STRICT_STDC) && !defined(_NO_LONGLONG) || defined(_STDC_C99) || \ + defined(__C99FEATURES__) +#pragma no_side_effect(llrint, llround, llrintf, llroundf, llrintl, llroundl) +#endif +#endif /* defined(__MATHERR_ERRNO_DONTCARE) */ +#endif /* defined(__EXTENSIONS__) || defined(_STDC_C99) || ... */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ISO_MATH_C99_H */ diff --git a/usr/src/libm/inc/iso/math_iso.h b/usr/src/libm/inc/iso/math_iso.h new file mode 100644 index 0000000..763bf91 --- /dev/null +++ b/usr/src/libm/inc/iso/math_iso.h @@ -0,0 +1,231 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ISO_MATH_ISO_H +#define _ISO_MATH_ISO_H + +#pragma ident "@(#)math_iso.h 1.10 05/10/06 SMI" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if !defined(_STDC_C99) && _XOPEN_SOURCE - 0 < 600 && !defined(__C99FEATURES__) +typedef union _h_val { + unsigned long _i[sizeof (double) / sizeof (unsigned long)]; + double _d; +} _h_val; + +#ifdef __STDC__ +extern const _h_val __huge_val; +#else +extern _h_val __huge_val; +#endif +#undef HUGE_VAL +#define HUGE_VAL __huge_val._d +#endif /* !defined(_STDC_C99) && _XOPEN_SOURCE - 0 < 600 && ... */ + +#if __cplusplus >= 199711L +namespace std { +#endif + +extern double acos __P((double)); +extern double asin __P((double)); +extern double atan __P((double)); +extern double atan2 __P((double, double)); +extern double cos __P((double)); +extern double sin __P((double)); +extern double tan __P((double)); + +extern double cosh __P((double)); +extern double sinh __P((double)); +extern double tanh __P((double)); + +extern double exp __P((double)); +extern double frexp __P((double, int *)); +extern double ldexp __P((double, int)); +extern double log __P((double)); +extern double log10 __P((double)); +extern double modf __P((double, double *)); + +extern double pow __P((double, double)); +extern double sqrt __P((double)); + +extern double ceil __P((double)); +extern double fabs __P((double)); +extern double floor __P((double)); +extern double fmod __P((double, double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(acos, asin, atan, atan2) +#pragma does_not_read_global_data(cos, sin, tan, cosh, sinh, tanh) +#pragma does_not_read_global_data(exp, log, log10, pow, sqrt) +#pragma does_not_read_global_data(frexp, ldexp, modf) +#pragma does_not_read_global_data(ceil, fabs, floor, fmod) +#pragma does_not_write_global_data(acos, asin, atan, atan2) +#pragma does_not_write_global_data(cos, sin, tan, cosh, sinh, tanh) +#pragma does_not_write_global_data(exp, log, log10, pow, sqrt) +#pragma does_not_write_global_data(ldexp) +#pragma does_not_write_global_data(ceil, fabs, floor, fmod) +#pragma no_side_effect(acos, asin, atan, atan2) +#pragma no_side_effect(cos, sin, tan, cosh, sinh, tanh) +#pragma no_side_effect(exp, log, log10, pow, sqrt) +#pragma no_side_effect(ldexp) +#pragma no_side_effect(ceil, fabs, floor, fmod) +#endif + +#if __cplusplus >= 199711L +extern float __acosf(float); +extern float __asinf(float); +extern float __atanf(float); +extern float __atan2f(float, float); +extern float __ceilf(float); +extern float __cosf(float); +extern float __coshf(float); +extern float __expf(float); +extern float __fabsf(float); +extern float __floorf(float); +extern float __fmodf(float, float); +extern float __frexpf(float, int *); +extern float __ldexpf(float, int); +extern float __logf(float); +extern float __log10f(float); +extern float __modff(float, float *); +extern float __powf(float, float); +extern float __sinf(float); +extern float __sinhf(float); +extern float __sqrtf(float); +extern float __tanf(float); +extern float __tanhf(float); + +extern long double __acosl(long double); +extern long double __asinl(long double); +extern long double __atanl(long double); +extern long double __atan2l(long double, long double); +extern long double __ceill(long double); +extern long double __cosl(long double); +extern long double __coshl(long double); +extern long double __expl(long double); +extern long double __fabsl(long double); +extern long double __floorl(long double); +extern long double __fmodl(long double, long double); +extern long double __frexpl(long double, int *); +extern long double __ldexpl(long double, int); +extern long double __logl(long double); +extern long double __log10l(long double); +extern long double __modfl(long double, long double *); +extern long double __powl(long double, long double); +extern long double __sinl(long double); +extern long double __sinhl(long double); +extern long double __sqrtl(long double); +extern long double __tanl(long double); +extern long double __tanhl(long double); + +extern "C++" { +#undef __X +#undef __Y + inline double abs(double __X) { return fabs(__X); } + inline double pow(double __X, int __Y) { return + pow(__X, (double) (__Y)); } + + inline float abs(float __X) { return __fabsf(__X); } + inline float acos(float __X) { return __acosf(__X); } + inline float asin(float __X) { return __asinf(__X); } + inline float atan(float __X) { return __atanf(__X); } + inline float atan2(float __X, float __Y) { return __atan2f(__X, __Y); } + inline float ceil(float __X) { return __ceilf(__X); } + inline float cos(float __X) { return __cosf(__X); } + inline float cosh(float __X) { return __coshf(__X); } + inline float exp(float __X) { return __expf(__X); } + inline float fabs(float __X) { return __fabsf(__X); } + inline float floor(float __X) { return __floorf(__X); } + inline float fmod(float __X, float __Y) { return __fmodf(__X, __Y); } + inline float frexp(float __X, int *__Y) { return __frexpf(__X, __Y); } + inline float ldexp(float __X, int __Y) { return __ldexpf(__X, __Y); } + inline float log(float __X) { return __logf(__X); } + inline float log10(float __X) { return __log10f(__X); } + inline float modf(float __X, float *__Y) { return __modff(__X, __Y); } + inline float pow(float __X, float __Y) { return __powf(__X, __Y); } + inline float pow(float __X, int __Y) { return + pow((double) (__X), (double) (__Y)); } + inline float sin(float __X) { return __sinf(__X); } + inline float sinh(float __X) { return __sinhf(__X); } + inline float sqrt(float __X) { return __sqrtf(__X); } + inline float tan(float __X) { return __tanf(__X); } + inline float tanh(float __X) { return __tanhf(__X); } + + inline long double abs(long double __X) { return __fabsl(__X); } + inline long double acos(long double __X) { return __acosl(__X); } + inline long double asin(long double __X) { return __asinl(__X); } + inline long double atan(long double __X) { return __atanl(__X); } + inline long double atan2(long double __X, long double __Y) { return + __atan2l(__X, __Y); } + inline long double ceil(long double __X) { return __ceill(__X); } + inline long double cos(long double __X) { return __cosl(__X); } + inline long double cosh(long double __X) { return __coshl(__X); } + inline long double exp(long double __X) { return __expl(__X); } + inline long double fabs(long double __X) { return __fabsl(__X); } + inline long double floor(long double __X) { return __floorl(__X); } + inline long double fmod(long double __X, long double __Y) { return + __fmodl(__X, __Y); } + inline long double frexp(long double __X, int *__Y) { return + __frexpl(__X, __Y); } + inline long double ldexp(long double __X, int __Y) { return + __ldexpl(__X, __Y); } + inline long double log(long double __X) { return __logl(__X); } + inline long double log10(long double __X) { return __log10l(__X); } + inline long double modf(long double __X, long double *__Y) { return + __modfl(__X, __Y); } + inline long double pow(long double __X, long double __Y) { return + __powl(__X, __Y); } + inline long double pow(long double __X, int __Y) { return + __powl(__X, (long double) (__Y)); } + inline long double sin(long double __X) { return __sinl(__X); } + inline long double sinh(long double __X) { return __sinhl(__X); } + inline long double sqrt(long double __X) { return __sqrtl(__X); } + inline long double tan(long double __X) { return __tanl(__X); } + inline long double tanh(long double __X) { return __tanhl(__X); } +} /* end of extern "C++" */ +#endif /* __cplusplus >= 199711L */ + +#if __cplusplus >= 199711L +} /* end of namespace std */ +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _ISO_MATH_ISO_H */ diff --git a/usr/src/libm/inc/math.h b/usr/src/libm/inc/math.h new file mode 100644 index 0000000..60800bb --- /dev/null +++ b/usr/src/libm/inc/math.h @@ -0,0 +1,350 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _MATH_H +#define _MATH_H + +#pragma ident "@(#)math.h 2.25 05/10/06 SMI" + +#include +#include + +#if __cplusplus >= 199711L +using std::abs; +using std::acos; +using std::asin; +using std::atan2; +using std::atan; +using std::ceil; +using std::cos; +using std::cosh; +using std::exp; +using std::fabs; +using std::floor; +using std::fmod; +using std::frexp; +using std::ldexp; +using std::log10; +using std::log; +using std::modf; +using std::pow; +using std::sin; +using std::sinh; +using std::sqrt; +using std::tan; +using std::tanh; +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__cplusplus) && defined(__GNUC__) +#define exception __math_exception +#endif + +#ifndef __P +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif +#endif /* !defined(__P) */ + +#if defined(__EXTENSIONS__) || defined(_XOPEN_SOURCE) || \ + !defined(_STRICT_STDC) && !defined(_POSIX_C_SOURCE) +/* + * SVID & X/Open + */ +#define M_E 2.7182818284590452354 +#define M_LOG2E 1.4426950408889634074 +#define M_LOG10E 0.43429448190325182765 +#define M_LN2 0.69314718055994530942 +#define M_LN10 2.30258509299404568402 +#define M_PI 3.14159265358979323846 +#define M_PI_2 1.57079632679489661923 +#define M_PI_4 0.78539816339744830962 +#define M_1_PI 0.31830988618379067154 +#define M_2_PI 0.63661977236758134308 +#define M_2_SQRTPI 1.12837916709551257390 +#define M_SQRT2 1.41421356237309504880 +#define M_SQRT1_2 0.70710678118654752440 + +extern int signgam; + +#define MAXFLOAT ((float)3.40282346638528860e+38) + +#if defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) +/* + * SVID + */ +enum version {libm_ieee = -1, c_issue_4, ansi_1, strict_ansi}; + +#ifdef __STDC__ +extern const enum version _lib_version; +#else +extern enum version _lib_version; +#endif + +struct exception { + int type; + char *name; + double arg1; + double arg2; + double retval; +}; + +#define HUGE MAXFLOAT + +#define _ABS(x) ((x) < 0 ? -(x) : (x)) + +#define _REDUCE(TYPE, X, XN, C1, C2) { \ + double x1 = (double)(TYPE)X, x2 = X - x1; \ + X = x1 - (XN) * (C1); X += x2; X -= (XN) * (C2); } + +#define DOMAIN 1 +#define SING 2 +#define OVERFLOW 3 +#define UNDERFLOW 4 +#define TLOSS 5 +#define PLOSS 6 + +#define _POLY1(x, c) ((c)[0] * (x) + (c)[1]) +#define _POLY2(x, c) (_POLY1((x), (c)) * (x) + (c)[2]) +#define _POLY3(x, c) (_POLY2((x), (c)) * (x) + (c)[3]) +#define _POLY4(x, c) (_POLY3((x), (c)) * (x) + (c)[4]) +#define _POLY5(x, c) (_POLY4((x), (c)) * (x) + (c)[5]) +#define _POLY6(x, c) (_POLY5((x), (c)) * (x) + (c)[6]) +#define _POLY7(x, c) (_POLY6((x), (c)) * (x) + (c)[7]) +#define _POLY8(x, c) (_POLY7((x), (c)) * (x) + (c)[8]) +#define _POLY9(x, c) (_POLY8((x), (c)) * (x) + (c)[9]) +#endif /* defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) */ + +/* + * SVID & X/Open + */ +/* BEGIN adopted by C99 */ +extern double erf __P((double)); +extern double erfc __P((double)); +extern double hypot __P((double, double)); +extern double lgamma __P((double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(erf, erfc, hypot) +#pragma does_not_write_global_data(erf, erfc, hypot) +#pragma no_side_effect(erf, erfc, hypot) +#endif + +#if !defined(_STDC_C99) && _XOPEN_SOURCE - 0 < 600 && !defined(__C99FEATURES__) +extern int isnan __P((double)); + +#pragma does_not_read_global_data(isnan) +#pragma does_not_write_global_data(isnan) +#pragma no_side_effect(isnan) +#endif +/* END adopted by C99 */ + +#if defined(__EXTENSIONS__) || _XOPEN_SOURCE - 0 < 600 +extern double gamma __P((double)); /* deprecated; use lgamma */ +#endif +extern double j0 __P((double)); +extern double j1 __P((double)); +extern double jn __P((int, double)); +extern double y0 __P((double)); +extern double y1 __P((double)); +extern double yn __P((int, double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(j0, j1, jn, y0, y1, yn) +#pragma does_not_write_global_data(j0, j1, jn, y0, y1, yn) +#pragma no_side_effect(j0, j1, jn, y0, y1, yn) +#endif +#if defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) || \ + _XOPEN_SOURCE - 0 >= 500 || \ + defined(_XOPEN_SOURCE) && _XOPEN_SOURCE_EXTENDED - 0 == 1 +/* + * SVID & XPG 4.2/5 + */ +extern double scalb __P((double, double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(scalb) +#pragma does_not_write_global_data(scalb) +#pragma no_side_effect(scalb) +#endif + +/* BEGIN adopted by C99 */ +extern double acosh __P((double)); +extern double asinh __P((double)); +extern double atanh __P((double)); +extern double cbrt __P((double)); +extern double logb __P((double)); +extern double nextafter __P((double, double)); +extern double remainder __P((double, double)); + +/* + * XPG 4.2/5 + */ +extern double expm1 __P((double)); +extern int ilogb __P((double)); +extern double log1p __P((double)); +extern double rint __P((double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(acosh, asinh, atanh, cbrt) +#pragma does_not_read_global_data(logb, nextafter, remainder) +#pragma does_not_read_global_data(expm1, ilogb, log1p, rint) +#pragma does_not_write_global_data(acosh, asinh, atanh, cbrt) +#pragma does_not_write_global_data(logb, nextafter, remainder) +#pragma does_not_write_global_data(expm1, ilogb, log1p, rint) +#pragma no_side_effect(acosh, asinh, atanh, cbrt) +#pragma no_side_effect(logb, nextafter, remainder) +#pragma no_side_effect(expm1, ilogb, log1p, rint) +#endif +/* END adopted by C99 */ +#endif /* defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) || ... */ + +#if defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) +/* + * SVID + */ +extern int matherr __P((struct exception *)); + +/* + * IEEE Test Vector + */ +extern double significand __P((double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(significand) +#pragma does_not_write_global_data(significand) +#pragma no_side_effect(significand) +#endif + +extern int signgamf; /* deprecated; use signgam */ +extern int signgaml; /* deprecated; use signgam */ + +extern int isnanf __P((float)); +extern int isnanl __P((long double)); +extern float gammaf __P((float)); /* deprecated; use lgammaf */ +extern float gammaf_r __P((float, int *)); /* deprecated; use lgammaf_r */ +extern float j0f __P((float)); +extern float j1f __P((float)); +extern float jnf __P((int, float)); +extern float lgammaf_r __P((float, int *)); +extern float scalbf __P((float, float)); +extern float significandf __P((float)); +extern float y0f __P((float)); +extern float y1f __P((float)); +extern float ynf __P((int, float)); +extern long double gammal __P((long double)); /* deprecated; use lgammal */ +extern long double gammal_r __P((long double, int *)); /* deprecated */ +extern long double j0l __P((long double)); +extern long double j1l __P((long double)); +extern long double jnl __P((int, long double)); +extern long double lgammal_r __P((long double, int *)); +extern long double scalbl __P((long double, long double)); +extern long double significandl __P((long double)); +extern long double y0l __P((long double)); +extern long double y1l __P((long double)); +extern long double ynl __P((int, long double)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(isnanf, isnanl) +#pragma does_not_write_global_data(isnanf, isnanl) +#pragma no_side_effect(isnanf, isnanl) +#pragma does_not_read_global_data(gammaf_r, j0f, j1f, jnf, lgammaf_r, scalbf) +#pragma does_not_read_global_data(significandf, y0f, y1f, ynf) +#pragma does_not_write_global_data(j0f, j1f, jnf, scalbf) +#pragma does_not_write_global_data(significandf, y0f, y1f, ynf) +#pragma no_side_effect(j0f, j1f, jnf, scalbf) +#pragma no_side_effect(significandf, y0f, y1f, ynf) +#pragma does_not_read_global_data(gammal_r, j0l, j1l, jnl, lgammal_r, scalbl) +#pragma does_not_read_global_data(significandl, y0l, y1l, ynl) +#pragma does_not_write_global_data(j0l, j1l, jnl, scalbl) +#pragma does_not_write_global_data(significandl, y0l, y1l, ynl) +#pragma no_side_effect(j0l, j1l, jnl, scalbl) +#pragma no_side_effect(significandl, y0l, y1l, ynl) +#endif + +/* + * for sin+cos->sincos transformation + */ +extern void sincos __P((double, double *, double *)); +extern void sincosf __P((float, float *, float *)); +extern void sincosl __P((long double, long double *, long double *)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(sincos, sincosf, sincosl) +#endif + +/* BEGIN adopted by C99 */ +/* + * Functions callable from C, intended to support IEEE arithmetic. + */ +extern double copysign __P((double, double)); +extern double scalbn __P((double, int)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(copysign, scalbn) +#pragma does_not_write_global_data(copysign, scalbn) +#pragma no_side_effect(copysign, scalbn) +#endif +/* END adopted by C99 */ + +/* + * Reentrant version of gamma & lgamma; passes signgam back by reference + * as the second argument; user must allocate space for signgam. + */ +extern double gamma_r __P((double, int *)); /* deprecated; use lgamma_r */ +extern double lgamma_r __P((double, int *)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(gamma_r, lgamma_r) +#endif + +/* BEGIN adopted by C99 */ +extern float modff __P((float, float *)); + +#if defined(__MATHERR_ERRNO_DONTCARE) +#pragma does_not_read_global_data(modff) +#endif +/* END adopted by C99 */ + +#if defined(__EXTENSIONS__) || !defined(__cplusplus) +#include +#endif +#endif /* defined(__EXTENSIONS__) || !defined(_XOPEN_SOURCE) */ +#endif /* defined(__EXTENSIONS__) || defined(_XOPEN_SOURCE) || ... */ + +#if defined(__cplusplus) && defined(__GNUC__) +#undef exception +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _MATH_H */ diff --git a/usr/src/libm/inc/sys/ieeefp.h b/usr/src/libm/inc/sys/ieeefp.h new file mode 100644 index 0000000..03b1585 --- /dev/null +++ b/usr/src/libm/inc/sys/ieeefp.h @@ -0,0 +1,117 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_IEEEFP_H +#define _SYS_IEEEFP_H + +#pragma ident "@(#)ieeefp.h 2.13 05/10/06 SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Sun types for IEEE floating point. + */ +#if defined(__sparc) +enum fp_direction_type { /* rounding direction */ + fp_nearest = 0, + fp_tozero = 1, + fp_positive = 2, + fp_negative = 3 +}; + +enum fp_precision_type { /* extended rounding precision */ + fp_extended = 0, + fp_single = 1, + fp_double = 2, + fp_precision_3 = 3 +}; + +enum fp_exception_type { /* exceptions according to bit number */ + fp_inexact = 0, + fp_division = 1, + fp_underflow = 2, + fp_overflow = 3, + fp_invalid = 4 +}; + +enum fp_trap_enable_type { /* trap enable bits according to bit number */ + fp_trap_inexact = 0, + fp_trap_division = 1, + fp_trap_underflow = 2, + fp_trap_overflow = 3, + fp_trap_invalid = 4 +}; +#endif /* defined(__sparc) */ + +#if defined(__i386) || defined(__amd64) +enum fp_direction_type { /* rounding direction */ + fp_nearest = 0, + fp_negative = 1, + fp_positive = 2, + fp_tozero = 3 +}; + +enum fp_precision_type { /* extended rounding precision */ + fp_single = 0, + fp_precision_3 = 1, + fp_double = 2, + fp_extended = 3 +}; + +enum fp_exception_type { /* exceptions according to bit number */ + fp_invalid = 0, + fp_denormalized = 1, + fp_division = 2, + fp_overflow = 3, + fp_underflow = 4, + fp_inexact = 5 +}; + +enum fp_trap_enable_type { /* trap enable bits according to bit number */ + fp_trap_invalid = 0, + fp_trap_denormalized = 1, + fp_trap_division = 2, + fp_trap_overflow = 3, + fp_trap_underflow = 4, + fp_trap_inexact = 5 +}; +#endif /* defined(__i386) || defined(__amd64) */ + +enum fp_class_type { /* floating-point classes */ + fp_zero = 0, + fp_subnormal = 1, + fp_normal = 2, + fp_infinity = 3, + fp_quiet = 4, + fp_signaling = 5 +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_IEEEFP_H */ diff --git a/usr/src/libm/inc/tgmath.h b/usr/src/libm/inc/tgmath.h new file mode 100644 index 0000000..c59311c --- /dev/null +++ b/usr/src/libm/inc/tgmath.h @@ -0,0 +1,170 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _TGMATH_H +#define _TGMATH_H + +#pragma ident "@(#)tgmath.h 1.8 05/10/06 SMI" + +#if !defined(__cplusplus) + +#include +#include + +/* + * real-floating and complex + */ +#undef acos +#define acos(x) __tgmath_acos(x) +#undef asin +#define asin(x) __tgmath_asin(x) +#undef atan +#define atan(x) __tgmath_atan(x) +#undef acosh +#define acosh(x) __tgmath_acosh(x) +#undef asinh +#define asinh(x) __tgmath_asinh(x) +#undef atanh +#define atanh(x) __tgmath_atanh(x) +#undef cos +#define cos(x) __tgmath_cos(x) +#undef sin +#define sin(x) __tgmath_sin(x) +#undef tan +#define tan(x) __tgmath_tan(x) +#undef cosh +#define cosh(x) __tgmath_cosh(x) +#undef sinh +#define sinh(x) __tgmath_sinh(x) +#undef tanh +#define tanh(x) __tgmath_tanh(x) +#undef exp +#define exp(x) __tgmath_exp(x) +#undef log +#define log(x) __tgmath_log(x) +#undef pow +#define pow(x, y) __tgmath_pow(x, y) +#undef sqrt +#define sqrt(x) __tgmath_sqrt(x) +#undef fabs +#define fabs(x) __tgmath_fabs(x) + +/* + * real-floating only + */ +#undef atan2 +#define atan2(y, x) __tgmath_atan2(y, x) +#undef cbrt +#define cbrt(x) __tgmath_cbrt(x) +#undef ceil +#define ceil(x) __tgmath_ceil(x) +#undef copysign +#define copysign(x, y) __tgmath_copysign(x, y) +#undef erf +#define erf(x) __tgmath_erf(x) +#undef erfc +#define erfc(x) __tgmath_erfc(x) +#undef exp2 +#define exp2(x) __tgmath_exp2(x) +#undef expm1 +#define expm1(x) __tgmath_expm1(x) +#undef fdim +#define fdim(x, y) __tgmath_fdim(x, y) +#undef floor +#define floor(x) __tgmath_floor(x) +#undef fma +#define fma(x, y, z) __tgmath_fma(x, y, z) +#undef fmax +#define fmax(x, y) __tgmath_fmax(x, y) +#undef fmin +#define fmin(x, y) __tgmath_fmin(x, y) +#undef fmod +#define fmod(x, y) __tgmath_fmod(x, y) +#undef frexp +#define frexp(x, ip) __tgmath_frexp(x, ip) +#undef hypot +#define hypot(x, y) __tgmath_hypot(x, y) +#undef ilogb +#define ilogb(x) __tgmath_ilogb(x) +#undef ldexp +#define ldexp(x, i) __tgmath_ldexp(x, i) +#undef lgamma +#define lgamma(x) __tgmath_lgamma(x) +#undef llrint +#define llrint(x) __tgmath_llrint(x) +#undef llround +#define llround(x) __tgmath_llround(x) +#undef log10 +#define log10(x) __tgmath_log10(x) +#undef log1p +#define log1p(x) __tgmath_log1p(x) +#undef log2 +#define log2(x) __tgmath_log2(x) +#undef logb +#define logb(x) __tgmath_logb(x) +#undef lrint +#define lrint(x) __tgmath_lrint(x) +#undef lround +#define lround(x) __tgmath_lround(x) +#undef nearbyint +#define nearbyint(x) __tgmath_nearbyint(x) +#undef nextafter +#define nextafter(x, y) __tgmath_nextafter(x, y) +#undef nexttoward +#define nexttoward(x, y) __tgmath_nexttoward(x, y) +#undef remainder +#define remainder(x, y) __tgmath_remainder(x, y) +#undef remquo +#define remquo(x, y, ip) __tgmath_remquo(x, y, ip) +#undef rint +#define rint(x) __tgmath_rint(x) +#undef round +#define round(x) __tgmath_round(x) +#undef scalbln +#define scalbln(x, l) __tgmath_scalbln(x, l) +#undef scalbn +#define scalbn(x, i) __tgmath_scalbn(x, i) +#undef tgamma +#define tgamma(x) __tgmath_tgamma(x) +#undef trunc +#define trunc(x) __tgmath_trunc(x) + +/* + * complex only + */ +#undef carg +#define carg(x) __tgmath_carg(x) +#undef cimag +#define cimag(x) __tgmath_cimag(x) +#undef conj +#define conj(x) __tgmath_conj(x) +#undef cproj +#define cproj(x) __tgmath_cproj(x) +#undef creal +#define creal(x) __tgmath_creal(x) + +#endif /* !defined(__cplusplus) */ + +#endif /* _TGMATH_H */ diff --git a/usr/src/libm/src/C/_SVID_error.c b/usr/src/libm/src/C/_SVID_error.c new file mode 100644 index 0000000..3443657 --- /dev/null +++ b/usr/src/libm/src/C/_SVID_error.c @@ -0,0 +1,978 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_SVID_error.c 1.75 06/01/23 SMI" + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#include +#include /* DBL_MAX, DBL_MIN */ +#include /* write */ +#if defined(__i386) || defined(i386) +#include +#undef fp_class +#define fp_class fpclass +#define fp_quiet FP_QNAN +#endif +#include +#undef fflush +#include + +/* INDENT OFF */ +/* + * Report libm exception error according to System V Interface Definition + * (SVID). + * Error mapping: + * 1 -- acos(|x|>1) + * 2 -- asin(|x|>1) + * 3 -- atan2(+-0,+-0) + * 4 -- hypot overflow + * 5 -- cosh overflow + * 6 -- exp overflow + * 7 -- exp underflow + * 8 -- y0(0) + * 9 -- y0(-ve) + * 10-- y1(0) + * 11-- y1(-ve) + * 12-- yn(0) + * 13-- yn(-ve) + * 14-- lgamma(finite) overflow + * 15-- lgamma(-integer) + * 16-- log(0) + * 17-- log(x<0) + * 18-- log10(0) + * 19-- log10(x<0) + * 20-- pow(0.0,0.0) + * 21-- pow(x,y) overflow + * 22-- pow(x,y) underflow + * 23-- pow(0,negative) + * 24-- pow(neg,non-integral) + * 25-- sinh(finite) overflow + * 26-- sqrt(negative) + * 27-- fmod(x,0) + * 28-- remainder(x,0) + * 29-- acosh(x<1) + * 30-- atanh(|x|>1) + * 31-- atanh(|x|=1) + * 32-- scalb overflow + * 33-- scalb underflow + * 34-- j0(|x|>X_TLOSS) + * 35-- y0(x>X_TLOSS) + * 36-- j1(|x|>X_TLOSS) + * 37-- y1(x>X_TLOSS) + * 38-- jn(|x|>X_TLOSS, n) + * 39-- yn(x>X_TLOSS, n) + * 40-- gamma(finite) overflow + * 41-- gamma(-integer) + * 42-- pow(NaN,0.0) return NaN for SVID/XOPEN + * 43-- log1p(-1) + * 44-- log1p(x<-1) + * 45-- logb(0) + * 46-- nextafter overflow + * 47-- scalb(x,inf) + */ +/* INDENT ON */ + +static double setexception(int, double); + +static const union { + unsigned x[2]; + double d; +} C[] = { +#ifdef _LITTLE_ENDIAN + { 0xffffffff, 0x7fffffff }, + { 0x54442d18, 0x400921fb }, +#else + { 0x7fffffff, 0xffffffff }, + { 0x400921fb, 0x54442d18 }, +#endif +}; + +#define NaN C[0].d +#define PI_RZ C[1].d + +#define __HI(x) ((unsigned *)&x)[HIWORD] +#define __LO(x) ((unsigned *)&x)[LOWORD] +#undef Inf +#define Inf HUGE_VAL + +double +_SVID_libm_err(double x, double y, int type) { + struct exception exc; + double t, w, ieee_retval; + enum version lib_version = _lib_version; + int iy; + + /* force libm_ieee behavior in SUSv3 mode */ + if ((__xpg6 & _C99SUSv3_math_errexcept) != 0) + lib_version = libm_ieee; + if (lib_version == c_issue_4) { + (void) fflush(stdout); + } + exc.arg1 = x; + exc.arg2 = y; + switch (type) { + case 1: + /* acos(|x|>1) */ + exc.type = DOMAIN; + exc.name = "acos"; + ieee_retval = setexception(3, 1.0); + exc.retval = 0.0; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "acos: DOMAIN error\n", 19); + } + errno = EDOM; + } + break; + case 2: + /* asin(|x|>1) */ + exc.type = DOMAIN; + exc.name = "asin"; + exc.retval = 0.0; + ieee_retval = setexception(3, 1.0); + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "asin: DOMAIN error\n", 19); + } + errno = EDOM; + } + break; + case 3: + /* atan2(+-0,+-0) */ + exc.arg1 = y; + exc.arg2 = x; + exc.type = DOMAIN; + exc.name = "atan2"; + ieee_retval = copysign(1.0, x) == 1.0 ? y : + copysign(PI_RZ + DBL_MIN, y); + exc.retval = 0.0; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "atan2: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 4: + /* hypot(finite,finite) overflow */ + exc.type = OVERFLOW; + exc.name = "hypot"; + ieee_retval = Inf; + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 5: + /* cosh(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "cosh"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 6: + /* exp(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "exp"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 7: + /* exp(finite) underflow */ + exc.type = UNDERFLOW; + exc.name = "exp"; + ieee_retval = setexception(1, 1.0); + exc.retval = 0.0; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 8: + /* y0(0) = -inf */ + exc.type = DOMAIN; /* should be SING for IEEE */ + exc.name = "y0"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y0: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 9: + /* y0(x<0) = NaN */ + exc.type = DOMAIN; + exc.name = "y0"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y0: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 10: + /* y1(0) = -inf */ + exc.type = DOMAIN; /* should be SING for IEEE */ + exc.name = "y1"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y1: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 11: + /* y1(x<0) = NaN */ + exc.type = DOMAIN; + exc.name = "y1"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "y1: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 12: + /* yn(n,0) = -inf */ + exc.type = DOMAIN; /* should be SING for IEEE */ + exc.name = "yn"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "yn: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 13: + /* yn(x<0) = NaN */ + exc.type = DOMAIN; + exc.name = "yn"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "yn: DOMAIN error\n", 17); + } + errno = EDOM; + } + break; + case 14: + /* lgamma(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "lgamma"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 15: + /* lgamma(-integer) or lgamma(0) */ + exc.type = SING; + exc.name = "lgamma"; + ieee_retval = setexception(0, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "lgamma: SING error\n", 19); + } + errno = EDOM; + } + break; + case 16: + /* log(0) */ + exc.type = SING; + exc.name = "log"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log: SING error\n", 16); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 17: + /* log(x<0) */ + exc.type = DOMAIN; + exc.name = "log"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log: DOMAIN error\n", 18); + } + errno = EDOM; + } + break; + case 18: + /* log10(0) */ + exc.type = SING; + exc.name = "log10"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log10: SING error\n", 18); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 19: + /* log10(x<0) */ + exc.type = DOMAIN; + exc.name = "log10"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log10: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 20: + /* pow(0.0,0.0) */ + /* error only if lib_version == c_issue_4 */ + exc.type = DOMAIN; + exc.name = "pow"; + exc.retval = 0.0; + ieee_retval = 1.0; + if (lib_version != c_issue_4) { + exc.retval = 1.0; + } else if (!matherr(&exc)) { + (void) write(2, "pow(0,0): DOMAIN error\n", 23); + errno = EDOM; + } + break; + case 21: + /* pow(x,y) overflow */ + exc.type = OVERFLOW; + exc.name = "pow"; + exc.retval = (lib_version == c_issue_4)? HUGE : HUGE_VAL; + if (signbit(x)) { + t = rint(y); + if (t == y) { + w = rint(0.5 * y); + if (t != w + w) { /* y is odd */ + exc.retval = -exc.retval; + } + } + } + ieee_retval = setexception(2, exc.retval); + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 22: + /* pow(x,y) underflow */ + exc.type = UNDERFLOW; + exc.name = "pow"; + exc.retval = 0.0; + if (signbit(x)) { + t = rint(y); + if (t == y) { + w = rint(0.5 * y); + if (t != w + w) /* y is odd */ + exc.retval = -exc.retval; + } + } + ieee_retval = setexception(1, exc.retval); + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 23: + /* (+-0)**neg */ + exc.type = DOMAIN; + exc.name = "pow"; + ieee_retval = setexception(0, 1.0); + { + int ahy, k, j, yisint, ly, hx; + /* INDENT OFF */ + /* + * determine if y is an odd int when x = -0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + /* INDENT ON */ + hx = __HI(x); + ahy = __HI(y)&0x7fffffff; + ly = __LO(y); + + yisint = 0; + if (ahy >= 0x43400000) { + yisint = 2; /* even integer y */ + } else if (ahy >= 0x3ff00000) { + k = (ahy >> 20) - 0x3ff; /* exponent */ + if (k > 20) { + j = ly >> (52 - k); + if ((j << (52 - k)) == ly) + yisint = 2 - (j & 1); + } else if (ly == 0) { + j = ahy >> (20 - k); + if ((j << (20 - k)) == ahy) + yisint = 2 - (j & 1); + } + } + if (hx < 0 && yisint == 1) + ieee_retval = -ieee_retval; + } + if (lib_version == c_issue_4) + exc.retval = 0.0; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "pow(0,neg): DOMAIN error\n", + 25); + } + errno = EDOM; + } + break; + case 24: + /* neg**non-integral */ + exc.type = DOMAIN; + exc.name = "pow"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = 0.0; + else + exc.retval = ieee_retval; /* X/Open allow NaN */ + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, + "neg**non-integral: DOMAIN error\n", 32); + } + errno = EDOM; + } + break; + case 25: + /* sinh(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "sinh"; + ieee_retval = copysign(Inf, x); + if (lib_version == c_issue_4) + exc.retval = x > 0.0 ? HUGE : -HUGE; + else + exc.retval = x > 0.0 ? HUGE_VAL : -HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 26: + /* sqrt(x<0) */ + exc.type = DOMAIN; + exc.name = "sqrt"; + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = 0.0; + else + exc.retval = ieee_retval; /* quiet NaN */ + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "sqrt: DOMAIN error\n", 19); + } + errno = EDOM; + } + break; + case 27: + /* fmod(x,0) */ + exc.type = DOMAIN; + exc.name = "fmod"; + if (fp_class(x) == fp_quiet) + ieee_retval = NaN; + else + ieee_retval = setexception(3, 1.0); + if (lib_version == c_issue_4) + exc.retval = x; + else + exc.retval = ieee_retval; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "fmod: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 28: + /* remainder(x,0) */ + exc.type = DOMAIN; + exc.name = "remainder"; + if (fp_class(x) == fp_quiet) + ieee_retval = NaN; + else + ieee_retval = setexception(3, 1.0); + exc.retval = NaN; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "remainder: DOMAIN error\n", + 24); + } + errno = EDOM; + } + break; + case 29: + /* acosh(x<1) */ + exc.type = DOMAIN; + exc.name = "acosh"; + ieee_retval = setexception(3, 1.0); + exc.retval = NaN; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "acosh: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 30: + /* atanh(|x|>1) */ + exc.type = DOMAIN; + exc.name = "atanh"; + ieee_retval = setexception(3, 1.0); + exc.retval = NaN; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "atanh: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 31: + /* atanh(|x|=1) */ + exc.type = SING; + exc.name = "atanh"; + ieee_retval = setexception(0, x); + exc.retval = ieee_retval; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "atanh: SING error\n", 18); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 32: + /* scalb overflow; SVID also returns +-HUGE_VAL */ + exc.type = OVERFLOW; + exc.name = "scalb"; + ieee_retval = setexception(2, x); + exc.retval = x > 0.0 ? HUGE_VAL : -HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 33: + /* scalb underflow */ + exc.type = UNDERFLOW; + exc.name = "scalb"; + ieee_retval = setexception(1, x); + exc.retval = ieee_retval; /* +-0.0 */ + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 34: + /* j0(|x|>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "j0"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 35: + /* y0(x>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "y0"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 36: + /* j1(|x|>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "j1"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 37: + /* y1(x>X_TLOSS) */ + exc.type = TLOSS; + exc.name = "y1"; + exc.retval = 0.0; + ieee_retval = y; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 38: + /* jn(|x|>X_TLOSS) */ + /* incorrect ieee value: ieee should never be here */ + exc.type = TLOSS; + exc.name = "jn"; + exc.retval = 0.0; + ieee_retval = 0.0; /* shall not be used */ + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 39: + /* yn(x>X_TLOSS) */ + /* incorrect ieee value: ieee should never be here */ + exc.type = TLOSS; + exc.name = "yn"; + exc.retval = 0.0; + ieee_retval = 0.0; /* shall not be used */ + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, exc.name, 2); + (void) write(2, ": TLOSS error\n", 14); + } + errno = ERANGE; + } + break; + case 40: + /* gamma(finite) overflow */ + exc.type = OVERFLOW; + exc.name = "gamma"; + ieee_retval = setexception(2, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 41: + /* gamma(-integer) or gamma(0) */ + exc.type = SING; + exc.name = "gamma"; + ieee_retval = setexception(0, 1.0); + if (lib_version == c_issue_4) + exc.retval = HUGE; + else + exc.retval = HUGE_VAL; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "gamma: SING error\n", 18); + } + errno = EDOM; + } + break; + case 42: + /* pow(NaN,0.0) */ + /* error if lib_version == c_issue_4 or ansi_1 */ + exc.type = DOMAIN; + exc.name = "pow"; + exc.retval = x; + ieee_retval = 1.0; + if (lib_version == strict_ansi) { + exc.retval = 1.0; + } else if (!matherr(&exc)) { + switch (lib_version) { + case c_issue_4: + case ansi_1: + errno = EDOM; + } + } + break; + case 43: + /* log1p(-1) */ + exc.type = SING; + exc.name = "log1p"; + ieee_retval = setexception(0, -1.0); + if (lib_version == c_issue_4) + exc.retval = -HUGE; + else + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) { + errno = ERANGE; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log1p: SING error\n", 18); + errno = EDOM; + } else { + errno = ERANGE; + } + } + break; + case 44: + /* log1p(x<-1) */ + exc.type = DOMAIN; + exc.name = "log1p"; + ieee_retval = setexception(3, 1.0); + exc.retval = ieee_retval; + if (lib_version == strict_ansi) { + errno = EDOM; + } else if (!matherr(&exc)) { + if (lib_version == c_issue_4) { + (void) write(2, "log1p: DOMAIN error\n", 20); + } + errno = EDOM; + } + break; + case 45: + /* logb(0) */ + exc.type = DOMAIN; + exc.name = "logb"; + ieee_retval = setexception(0, -1.0); + exc.retval = -HUGE_VAL; + if (lib_version == strict_ansi) + errno = EDOM; + else if (!matherr(&exc)) + errno = EDOM; + break; + case 46: + /* nextafter overflow */ + exc.type = OVERFLOW; + exc.name = "nextafter"; + /* + * The value as returned by setexception is +/-DBL_MAX in + * round-to-{zero,-/+Inf} mode respectively, which is not + * usable. + */ + (void) setexception(2, x); + ieee_retval = x > 0 ? Inf : -Inf; + exc.retval = x > 0 ? HUGE_VAL : -HUGE_VAL; + if (lib_version == strict_ansi) + errno = ERANGE; + else if (!matherr(&exc)) + errno = ERANGE; + break; + case 47: + /* scalb(x,inf) */ + iy = ((int *)&y)[HIWORD]; + if (lib_version == c_issue_4) + /* SVID3: ERANGE in all cases */ + errno = ERANGE; + else if ((x == 0.0 && iy > 0) || (!finite(x) && iy < 0)) + /* EDOM for scalb(0,+inf) or scalb(inf,-inf) */ + errno = EDOM; + exc.retval = ieee_retval = ((iy < 0)? x / -y : x * y); + break; + } + switch (lib_version) { + case c_issue_4: + case ansi_1: + case strict_ansi: + return (exc.retval); + /* NOTREACHED */ + default: + return (ieee_retval); + } + /* NOTREACHED */ +} + +static double +setexception(int n, double x) { + /* + * n = + * 0 division by zero + * 1 underflow + * 2 overflow + * 3 invalid + */ + volatile double one = 1.0, zero = 0.0, retv; + + switch (n) { + case 0: /* division by zero */ + retv = copysign(one / zero, x); + break; + case 1: /* underflow */ + retv = DBL_MIN * copysign(DBL_MIN, x); + break; + case 2: /* overflow */ + retv = DBL_MAX * copysign(DBL_MAX, x); + break; + case 3: /* invalid */ + retv = zero * Inf; /* for Cheetah */ + break; + } + return (retv); +} diff --git a/usr/src/libm/src/C/_TBL_atan.c b/usr/src/libm/src/C/_TBL_atan.c new file mode 100644 index 0000000..a4dbc3a --- /dev/null +++ b/usr/src/libm/src/C/_TBL_atan.c @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_atan.c 1.11 06/01/31 SMI" + +#include "libm_protos.h" + +/* + * Let y[j] = _TBL_atan[2j], atan_y[j] = _TBL_atan[2j+1], j = 0, 1, ..., 95. + * {y[j], 0 <= j < 96} is a set of break points in (-1/8, 8) chosen so that + * the high part of y[j] is very close to 0x3fc08000 + (j << 16), + * and atan_y[j] = atan(y[j]) rounded has relative error bounded by 2^-60. + * + * -- K.C. Ng, 10/17/2004 + */ + +const double _TBL_atan[] = { +1.28906287871928065814e-01, 1.28199318484201185697e-01, +1.36718905591866640714e-01, 1.35876480966603985223e-01, +1.44531257606217988787e-01, 1.43537301152401930437e-01, +1.52343679482641575218e-01, 1.51181262880709432750e-01, +1.60156177403962790562e-01, 1.58807537535115006477e-01, +1.67968772982362929413e-01, 1.66415323534856884891e-01, +1.75781211596017922227e-01, 1.74003563682464612583e-01, +1.83593807762862160082e-01, 1.81571767039387044207e-01, +1.91406205589629646591e-01, 1.89118806085245338977e-01, +1.99218440148815872925e-01, 1.96643947167121080355e-01, +2.07031180070658488157e-01, 2.04147078126891479144e-01, +2.14843557086546094181e-01, 2.11626624363759674452e-01, +2.22656308649619494311e-01, 2.19082566659412503185e-01, +2.30468759807905931858e-01, 2.26513550670145669130e-01, +2.38281413377399470255e-01, 2.33919360814280885563e-01, +2.46093763828156536499e-01, 2.41298839969374956382e-01, +2.57812599322508773092e-01, 2.52318074018685223336e-01, +2.73437443946477509726e-01, 2.66912935433335718471e-01, +2.89062532292519769328e-01, 2.81392462451501401688e-01, +3.04687577351389293767e-01, 2.95751756530947318424e-01, +3.20312405527377053183e-01, 3.09986305565206343715e-01, +3.35937715576634265968e-01, 3.24092664204967739749e-01, +3.51562621385942464247e-01, 3.38066230870244233131e-01, +3.67187719833070636000e-01, 3.51904019130060419229e-01, +3.82812538440931826589e-01, 3.65602365234580339859e-01, +3.98437724467857745658e-01, 3.79158862748537828224e-01, +4.14062683287296784407e-01, 3.92570291474021892952e-01, +4.29687654458357937148e-01, 4.05834423459965343284e-01, +4.45312642848883721847e-01, 4.18949086342842669239e-01, +4.60937644536906665493e-01, 4.31912354681638355203e-01, +4.76563149131543906112e-01, 4.44722952952162131623e-01, +4.92187842452541601812e-01, 4.57378374341803173309e-01, +5.15624825518001039804e-01, 4.76069192487019954285e-01, +5.46874516057966109095e-01, 5.00440440618262982753e-01, +5.78125566624434150675e-01, 5.24180053466007933594e-01, +6.09375102172641347487e-01, 5.47284455493244337276e-01, +6.40624936950189516338e-01, 5.69756408779493739303e-01, +6.71875248719545625775e-01, 5.91599881698465779323e-01, +7.03124988865964306584e-01, 6.12820194714659649549e-01, +7.34376295967088421612e-01, 6.33426724884753156175e-01, +7.65624929092156736310e-01, 6.53426296477277901431e-01, +7.96874196003358736817e-01, 6.72832055855442590087e-01, +8.28125565205639735389e-01, 6.91656957129326954714e-01, +8.59375453355927021448e-01, 7.09911879233846576653e-01, +8.90625694745052709500e-01, 7.27611720056701827275e-01, +9.21875110259870345075e-01, 7.44770185320721367361e-01, +9.53125042657123722201e-01, 7.61402792157321428590e-01, +9.84374765277631902372e-01, 7.77524191164056688308e-01, +1.03126494373528343473e+00, 8.00788807142382097481e-01, +1.09374968909110092952e+00, 8.30144253291031475328e-01, +1.15625019152505204012e+00, 8.57735575892430546219e-01, +1.21874985186151341132e+00, 8.83672057048812575886e-01, +1.28124876006842702836e+00, 9.08066349515326720621e-01, +1.34375006271148444981e+00, 9.31026566320014126177e-01, +1.40627222899692072566e+00, 9.52659566341466756967e-01, +1.46874957658300542285e+00, 9.73037801091363618866e-01, +1.53124999999999555911e+00, 9.92272112377190040888e-01, +1.59375089676214143353e+00, 1.01043670320979472876e+00, +1.65624949800269094524e+00, 1.02760661639661776690e+00, +1.71874946971376685312e+00, 1.04385296549501305208e+00, +1.78125111924655166185e+00, 1.05924046784549474864e+00, +1.84374921332370989013e+00, 1.07382754310190620117e+00, +1.90625055239083862624e+00, 1.08767078118685489585e+00, +1.96874992734227549640e+00, 1.10081967347672460278e+00, +2.06250046973591683042e+00, 1.11934332464931074469e+00, +2.18749905173933534286e+00, 1.14201813543610697366e+00, +2.31249933788800232648e+00, 1.16264711873167669864e+00, +2.43749855191054187742e+00, 1.18147939634549814514e+00, +2.56251104936881235474e+00, 1.19873002825057639598e+00, +2.68750036758144528193e+00, 1.21457671610223272296e+00, +2.81249907059852954916e+00, 1.22918073183895870670e+00, +2.93749583903062294610e+00, 1.24267599964591468620e+00, +3.06250108260464948273e+00, 1.25518076906426045980e+00, +3.18750016629930410517e+00, 1.26679540235591403530e+00, +3.31250071362610132297e+00, 1.27760948984166233799e+00, +3.43749999999999333866e+00, 1.28770054149540058575e+00, +3.56249877589327157423e+00, 1.29713691630583838332e+00, +3.68750696071718842006e+00, 1.30597947372626776996e+00, +3.81250023149192607264e+00, 1.31427972905173717777e+00, +3.93749827850909683846e+00, 1.32208623339324304879e+00, +4.12500187917697846984e+00, 1.33296050364557672196e+00, +4.37499759905160701123e+00, 1.34608503917096200553e+00, +4.62500066729278191957e+00, 1.35785800701782477518e+00, +4.87499852385410648026e+00, 1.36847463881194641999e+00, +5.12499918742110072145e+00, 1.37809553833018583191e+00, +5.37500000000004529710e+00, 1.38685287025772296943e+00, +5.62499999999991828759e+00, 1.39485670134236627860e+00, +5.87499417854096694924e+00, 1.40219922327269230777e+00, +6.12500000000013233858e+00, 1.40895889555647713109e+00, +6.37499999999991828759e+00, 1.41520149881786494461e+00, +6.62499933107761584949e+00, 1.42098385532083781868e+00, +6.87500431528593747288e+00, 1.42635483782722261026e+00, +7.12499228632883863099e+00, 1.43135612069194451124e+00, +7.37499257154547205317e+00, 1.43602490820671135907e+00, +7.62499911873607416624e+00, 1.44039300400460135165e+00, +7.87500000000018918200e+00, 1.44448820973165936721e+00, +}; diff --git a/usr/src/libm/src/C/_TBL_exp2.c b/usr/src/libm/src/C/_TBL_exp2.c new file mode 100644 index 0000000..7b44b96 --- /dev/null +++ b/usr/src/libm/src/C/_TBL_exp2.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_exp2.c 1.10 06/01/31 SMI" + +#include "libm_protos.h" + +const double _TBL_exp2_hi[] = { + 1.00000000000000000e+00, 1.01088928605170048e+00, 1.02189714865411663e+00, + 1.03302487902122841e+00, 1.04427378242741375e+00, 1.05564517836055716e+00, + 1.06714040067682370e+00, 1.07876079775711986e+00, 1.09050773266525769e+00, + 1.10238258330784089e+00, 1.11438674259589243e+00, 1.12652161860824185e+00, + 1.13878863475669156e+00, 1.15118922995298267e+00, 1.16372485877757748e+00, + 1.17639699165028122e+00, 1.18920711500272103e+00, 1.20215673145270308e+00, + 1.21524735998046896e+00, 1.22848053610687002e+00, 1.24185781207348400e+00, + 1.25538075702469110e+00, 1.26905095719173322e+00, 1.28287001607877826e+00, + 1.29683955465100964e+00, 1.31096121152476441e+00, 1.32523664315974132e+00, + 1.33966752405330292e+00, 1.35425554693689265e+00, 1.36900242297459052e+00, + 1.38390988196383202e+00, 1.39897967253831124e+00, 1.41421356237309515e+00, + 1.42961333839197002e+00, 1.44518080697704665e+00, 1.46091779418064704e+00, + 1.47682614593949935e+00, 1.49290772829126484e+00, 1.50916442759342284e+00, + 1.52559815074453820e+00, 1.54221082540794074e+00, 1.55900440023783693e+00, + 1.57598084510788650e+00, 1.59314215134226700e+00, 1.61049033194925428e+00, + 1.62802742185734783e+00, 1.64575547815396495e+00, 1.66367658032673638e+00, + 1.68179283050742900e+00, 1.70010635371852348e+00, 1.71861929812247793e+00, + 1.73733383527370622e+00, 1.75625216037329945e+00, 1.77537649252652119e+00, + 1.79470907500310717e+00, 1.81425217550039886e+00, 1.83400808640934243e+00, + 1.85397912508338547e+00, 1.87416763411029996e+00, 1.89457598158696561e+00, + 1.91520656139714740e+00, 1.93606179349229435e+00, 1.95714412417540018e+00, + 1.97845602638795093e+00, +}; +const double _TBL_exp2_lo[] = { + 0.00000000000000000e+00,-1.52347786033685772e-17, 5.10922502897344389e-17, + 7.60083887402708849e-18, 8.55188970553796366e-17, 1.75932573877209198e-18, +-7.89985396684158212e-17,-6.65666043605659260e-17,-3.04678207981247115e-17, + 5.26603687157069439e-17, 1.04102784568455710e-16, 5.16585675879545612e-17, + 8.91281267602540778e-17, 3.25071021886382721e-17, 3.82920483692409350e-17, + 5.55420325421807896e-17, 3.98201523146564611e-17, 6.64498149925230124e-17, +-7.71263069268148813e-17,-1.89878163130252995e-17, 4.65802759183693679e-17, +-6.71138982129687842e-18, 2.66793213134218610e-18, 1.71359491824356097e-17, + 2.53825027948883150e-17,-7.18153613551945386e-17,-2.85873121003886076e-17, + 8.92728259483173198e-17, 7.70094837980298946e-17, 9.59379791911884877e-17, +-6.77051165879478629e-17,-9.61421320905132307e-17,-9.66729331345291345e-17, +-1.20316424890536552e-17,-3.02375813499398732e-17,-5.60037718607521580e-17, +-3.48399455689279580e-17, 1.41929201542840358e-17,-1.01645532775429504e-16, + 1.11795187801605699e-16, 7.94983480969762086e-17, 3.78120705335752750e-17, +-1.01369164712783040e-17,-1.00944065423119625e-16, 2.47071925697978879e-17, +-6.71295508470708409e-17,-1.01256799136747726e-16, 5.89099269671309967e-17, + 8.19901002058149652e-17,-8.02371937039770025e-18,-1.85138041826311099e-17, + 3.16438929929295695e-17, 2.96014069544887331e-17, 6.42973179655657203e-17, + 1.82274584279120868e-17,-9.96953153892034882e-17, 3.28310722424562659e-17, + 9.76188749072759354e-17,-6.12276341300414256e-17, 3.40340353521652967e-17, +-1.06199460561959626e-16, 1.03323859606763257e-16, 8.96076779103666777e-17, + 4.03887531092781666e-17, +}; diff --git a/usr/src/libm/src/C/_TBL_ipio2.c b/usr/src/libm/src/C/_TBL_ipio2.c new file mode 100644 index 0000000..8419b96 --- /dev/null +++ b/usr/src/libm/src/C/_TBL_ipio2.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_ipio2.c 1.10 06/01/31 SMI" + +#include "libm_protos.h" + +/* + * Table of constants for 2/pi, used in __rem_pio2 (trigl) function. + */ + +/* + * 396 Hex digits (476 decimal) of 2/pi + */ +const int _TBL_ipio2_inf[] = { +0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, +0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, +0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, +0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, +0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, +0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, +0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, +0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, +0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, +0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, +0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, +}; + +#if 0 /* remove from SVR4 */ +/* + * 396 Hex digits (476 decimal) of 2/PI, PI = 66 bits of pi + */ +const int _TBL_ipio2_66[] = { +0xA2F983, 0x6E4E44, 0x152A00, 0x062BC4, 0x0DA276, 0xBED4C1, +0xFDF905, 0x5CD5BA, 0x767CEC, 0x1F80D6, 0xC26053, 0x3A0070, +0x107C2A, 0xF68EE9, 0x687B7A, 0xB990AA, 0x38DE4B, 0x96CFF3, +0x92735E, 0x8B34F6, 0x195BFC, 0x27F88E, 0xA93EC5, 0x3958A5, +0x3E5D13, 0x1C55A8, 0x5B4A8B, 0xA42E04, 0x12D105, 0x35580D, +0xF62347, 0x450900, 0xB98BCA, 0xF7E8A4, 0xA2E5D5, 0x69BC52, +0xF0381D, 0x1A0A88, 0xFE8714, 0x7F6735, 0xBB7D4D, 0xC6F642, +0xB27E80, 0x6191BF, 0xB6B750, 0x52776E, 0xD60FD0, 0x607DCC, +0x68BFAF, 0xED69FC, 0x6EB305, 0xD2557D, 0x25BDFB, 0x3E4AA1, +0x84472D, 0x8B0376, 0xF77740, 0xD290DF, 0x15EC8C, 0x45A5C3, +0x6181EF, 0xC5E7E8, 0xD8909C, 0xF62144, 0x298428, 0x6E5D9D, +}; + +/* + * 396 Hex digits (476 decimal) of 2/PI, PI = 53 bits of pi + */ +const int _TBL_ipio2_53[] = { +0xA2F983, 0x6E4E44, 0x16F3C4, 0xEA69B5, 0xD3E131, 0x60E1D2, +0xD7982A, 0xC031F5, 0xD67BCC, 0xFD1375, 0x60919B, 0x3FA0BB, +0x612ABB, 0x714F9B, 0x03DA8A, 0xC05948, 0xD023F4, 0x5AFA37, +0x51631D, 0xCD7A90, 0xC0474A, 0xF6A6F3, 0x1A52E1, 0x5C3927, +0x3ADA45, 0x4E2DB5, 0x64E8C4, 0x274A5B, 0xB74ADC, 0x1E6591, +0x2822BE, 0x4771F5, 0x12A63F, 0x83BD35, 0x2488CA, 0x1FE1BE, +0x42C21A, 0x682569, 0x2AFB91, 0x68ADE1, 0x4A42E5, 0x9BE357, +0xB79675, 0xCE998A, 0x83AF8B, 0xE645E6, 0xDF0789, 0x9E9747, +0xAA15FF, 0x358C3F, 0xAF3141, 0x72A3F7, 0x2BF1D4, 0xF3AD96, +0x7D759F, 0x257FCE, 0x29FB69, 0xB1B42C, 0xC32DE1, 0x8C0BBD, +0x31EC2F, 0x942026, 0x85DCE7, 0x653FF3, 0x136FA7, 0x0D7A5F, +}; +#endif diff --git a/usr/src/libm/src/C/_TBL_log.c b/usr/src/libm/src/C/_TBL_log.c new file mode 100644 index 0000000..12d9f73 --- /dev/null +++ b/usr/src/libm/src/C/_TBL_log.c @@ -0,0 +1,298 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_log.c 1.11 06/01/31 SMI" + +#include "libm_protos.h" + +/* + * Table of constants for log, log2, and log10 + * By K.C. Ng, November 21, 2004 + * + * Y[j], 1/Y[j], log(Y[j]) for j = 0 to 255 + * where HIWORD(Y[j]) ~ 0x3fb8400 + (j<<15) + * That is, 256 Y[j] space out logrithmically between 0.09375 and 24, and + * each is chosen so that 1/Y[j] and log(Y[j]) are very close to a IEEE + * double. In addition, each log(Y[j]) has 3 trailing zeros. + */ +const double _TBL_log[] = { +9.47265623608246343e-02, 1.05567010464380857e+01, -2.35676082856530300e+00, +9.66796869131412717e-02, 1.03434344062203838e+01, -2.33635196153499791e+00, +9.86328118117651004e-02, 1.01386139321308306e+01, -2.31635129573594156e+00, +1.00585936733578435e-01, 9.94174764856737347e+00, -2.29674282498938709e+00, +1.02539062499949152e-01, 9.75238095238578850e+00, -2.27751145544242561e+00, +1.04492186859904843e-01, 9.57009351656812157e+00, -2.25864297726331742e+00, +1.06445312294918631e-01, 9.39449543094380957e+00, -2.24012392529694537e+00, +1.08398437050250693e-01, 9.22522526350104144e+00, -2.22194160843615762e+00, +1.10351562442130582e-01, 9.06194690740703912e+00, -2.20408398741152212e+00, +1.12304686894746625e-01, 8.90434787407592943e+00, -2.18653968262558962e+00, +1.14257811990227776e-01, 8.75213679118525256e+00, -2.16929787526329321e+00, +1.16210936696872255e-01, 8.60504207627572093e+00, -2.15234831939887172e+00, +1.18164061975360682e-01, 8.46280995492959498e+00, -2.13568126444263484e+00, +1.20117187499996322e-01, 8.32520325203277523e+00, -2.11928745022706622e+00, +1.22070312499895098e-01, 8.19200000000703987e+00, -2.10315806829801133e+00, +1.24023436774175100e-01, 8.06299217317146599e+00, -2.08728472499318229e+00, +1.26953123746900931e-01, 7.87692315467275872e+00, -2.06393736501443570e+00, +1.30859374098123454e-01, 7.64179109744297769e+00, -2.03363201254049386e+00, +1.34765623780674720e-01, 7.42028992220936967e+00, -2.00421812948999545e+00, +1.38671874242985771e-01, 7.21126764500034501e+00, -1.97564475345722457e+00, +1.42578124148616536e-01, 7.01369867201821506e+00, -1.94786518986246371e+00, +1.46484374166731490e-01, 6.82666670549979404e+00, -1.92083651719164372e+00, +1.50390624434435488e-01, 6.64935067435644189e+00, -1.89451920694646070e+00, +1.54296874339723084e-01, 6.48101268596180624e+00, -1.86887677685174936e+00, +1.58203124999987427e-01, 6.32098765432149001e+00, -1.84387547036714849e+00, +1.62109374999815342e-01, 6.16867469880220742e+00, -1.81948401724404896e+00, +1.66015624243955634e-01, 6.02352943919619310e+00, -1.79567337310324682e+00, +1.69921874302298687e-01, 5.88505749542848644e+00, -1.77241651049093640e+00, +1.73828124315277527e-01, 5.75280901142480605e+00, -1.74968825924644555e+00, +1.77734374286237506e-01, 5.62637364896854919e+00, -1.72746512253855222e+00, +1.81640624146994889e-01, 5.50537636993989743e+00, -1.70572513658236602e+00, +1.85546874316304788e-01, 5.38947370406942916e+00, -1.68444773712372431e+00, +1.89453124405085355e-01, 5.27835053203882509e+00, -1.66361364967629299e+00, +1.93359374570531595e-01, 5.17171718320401652e+00, -1.64320477712600699e+00, +1.97265624263334577e-01, 5.06930694962380368e+00, -1.62320411193263148e+00, +2.01171874086291030e-01, 4.97087380898513764e+00, -1.60359564135180399e+00, +2.05078123979995308e-01, 4.87619050044336610e+00, -1.58436427985572159e+00, +2.08984373896073439e-01, 4.78504675424820736e+00, -1.56549579585994181e+00, +2.12890623963011144e-01, 4.69724772930228163e+00, -1.54697674768135762e+00, +2.16796874723889421e-01, 4.61261261848719517e+00, -1.52879442500076479e+00, +2.20703124198150608e-01, 4.53097346778917753e+00, -1.51093680996032553e+00, +2.24609374375627030e-01, 4.45217392541970725e+00, -1.49339249945607477e+00, +2.28515625000094036e-01, 4.37606837606657528e+00, -1.47615069024134016e+00, +2.32421873924349737e-01, 4.30252102831546246e+00, -1.45920113655598627e+00, +2.36328123935216378e-01, 4.23140497774241098e+00, -1.44253408394829741e+00, +2.40234375000066919e-01, 4.16260162601510064e+00, -1.42614026966681173e+00, +2.44140623863132178e-01, 4.09600001907347711e+00, -1.41001089239381727e+00, +2.48046874999894917e-01, 4.03149606299383390e+00, -1.39413753858134015e+00, +2.53906248590769879e-01, 3.93846156032078243e+00, -1.37079018013412401e+00, +2.61718748558906533e-01, 3.82089554342693294e+00, -1.34048483059486401e+00, +2.69531249159214337e-01, 3.71014493910979404e+00, -1.31107094300173976e+00, +2.77343749428383191e-01, 3.60563381024826013e+00, -1.28249756949928795e+00, +2.85156249289339359e-01, 3.50684932380819214e+00, -1.25471800582335113e+00, +2.92968749999700462e-01, 3.41333333333682321e+00, -1.22768933094427446e+00, +3.00781248554318814e-01, 3.32467534065511261e+00, -1.20137202743229921e+00, +3.08593748521894806e-01, 3.24050634463533127e+00, -1.17572959680235023e+00, +3.16406249999639899e-01, 3.16049382716409077e+00, -1.15072828980826181e+00, +3.24218749999785061e-01, 3.08433734939963511e+00, -1.12633683668362750e+00, +3.32031248841858584e-01, 3.01176471638753718e+00, -1.10252619147729547e+00, +3.39843749265406558e-01, 2.94252874199264314e+00, -1.07926932798654107e+00, +3.47656249999834799e-01, 2.87640449438338930e+00, -1.05654107474789782e+00, +3.55468749999899247e-01, 2.81318681318761055e+00, -1.03431793796299587e+00, +3.63281249999864997e-01, 2.75268817204403371e+00, -1.01257795132667816e+00, +3.71093749064121570e-01, 2.69473684890124421e+00, -9.91300555400967731e-01, +3.78906249999751032e-01, 2.63917525773369288e+00, -9.70466465976836723e-01, +3.86718748879039009e-01, 2.58585859335407608e+00, -9.50057597243619156e-01, +3.94531249999987899e-01, 2.53465346534661240e+00, -9.30056927638333697e-01, +4.02343749999485523e-01, 2.48543689320706163e+00, -9.10448456251205407e-01, +4.10156249578856991e-01, 2.43809524059864202e+00, -8.91217095348825872e-01, +4.17968749447214571e-01, 2.39252336765021800e+00, -8.72348611340208357e-01, +4.25781248601723117e-01, 2.34862386092395203e+00, -8.53829565534445223e-01, +4.33593749393073047e-01, 2.30630630953458038e+00, -8.35647244566987801e-01, +4.41406248572254134e-01, 2.26548673299152270e+00, -8.17789629001761220e-01, +4.49218749348472501e-01, 2.22608695975035964e+00, -8.00245317566669279e-01, +4.57031249277175089e-01, 2.18803419149470768e+00, -7.83003511263371976e-01, +4.64843748529596368e-01, 2.15126051100659366e+00, -7.66053954531254355e-01, +4.72656248830947701e-01, 2.11570248457175136e+00, -7.49386901356188240e-01, +4.80468748609962581e-01, 2.08130081902951236e+00, -7.32993092000230995e-01, +4.88281249241778237e-01, 2.04800000318021258e+00, -7.16863708730099525e-01, +4.96093748931098810e-01, 2.01574803583926521e+00, -7.00990360175606675e-01, +5.07812497779701388e-01, 1.96923077784079825e+00, -6.77642998396260410e-01, +5.23437498033319737e-01, 1.91044776837204044e+00, -6.47337648285891021e-01, +5.39062498006593560e-01, 1.85507247062801328e+00, -6.17923763020271188e-01, +5.54687498964024250e-01, 1.80281690477552603e+00, -5.89350388745976339e-01, +5.70312499806522322e-01, 1.75342465812909332e+00, -5.61570823110474571e-01, +5.85937497921867001e-01, 1.70666667271966777e+00, -5.34542153929987052e-01, +6.01562498226483444e-01, 1.66233766723853860e+00, -5.08224845014116688e-01, +6.17187498682654212e-01, 1.62025316801528496e+00, -4.82582413587029357e-01, +6.32812500000264566e-01, 1.58024691357958624e+00, -4.57581109246760320e-01, +6.48437499353274216e-01, 1.54216867623689291e+00, -4.33189657120379490e-01, +6.64062498728508976e-01, 1.50588235582451335e+00, -4.09379009344016609e-01, +6.79687498865382267e-01, 1.47126437027210688e+00, -3.86122146934356092e-01, +6.95312498728747119e-01, 1.43820224982050338e+00, -3.63393896015796081e-01, +7.10937499999943157e-01, 1.40659340659351906e+00, -3.41170757402847080e-01, +7.26562499999845568e-01, 1.37634408602179792e+00, -3.19430770766573779e-01, +7.42187500000120126e-01, 1.34736842105241350e+00, -2.98153372318914478e-01, +7.57812499999581890e-01, 1.31958762886670744e+00, -2.77319285416786077e-01, +7.73437498602746576e-01, 1.29292929526503420e+00, -2.56910415591577124e-01, +7.89062500000142664e-01, 1.26732673267303819e+00, -2.36909747078176913e-01, +8.04687500000259015e-01, 1.24271844660154174e+00, -2.17301275689659512e-01, +8.20312499999677036e-01, 1.21904761904809900e+00, -1.98069913762487504e-01, +8.35937499999997113e-01, 1.19626168224299478e+00, -1.79201429457714445e-01, +8.51562499999758749e-01, 1.17431192660583728e+00, -1.60682381690756770e-01, +8.67187500000204725e-01, 1.15315315315288092e+00, -1.42500062607046951e-01, +8.82812500000407896e-01, 1.13274336283133503e+00, -1.24642445206814556e-01, +8.98437499999816813e-01, 1.11304347826109651e+00, -1.07098135556570995e-01, +9.14062499999708455e-01, 1.09401709401744296e+00, -8.98563291221800009e-02, +9.29687500000063949e-01, 1.07563025210076635e+00, -7.29067708080189947e-02, +9.45312499999844014e-01, 1.05785123966959604e+00, -5.62397183230410880e-02, +9.60937500000120459e-01, 1.04065040650393459e+00, -3.98459085470743157e-02, +9.76562499999976685e-01, 1.02400000000002445e+00, -2.37165266173399170e-02, +9.92187500000169420e-01, 1.00787401574785940e+00, -7.84317746085513856e-03, +1.01562500000004907e+00, 9.84615384615337041e-01, 1.55041865360135717e-02, +1.04687500000009237e+00, 9.55223880596930641e-01, 4.58095360313824362e-02, +1.07812500000002154e+00, 9.27536231884039442e-01, 7.52234212376075018e-02, +1.10937499999982481e+00, 9.01408450704367703e-01, 1.03796793681485644e-01, +1.14062500000007416e+00, 8.76712328767066285e-01, 1.31576357788784293e-01, +1.17187500000009659e+00, 8.53333333333263000e-01, 1.58605030176721007e-01, +1.20312499999950173e+00, 8.31168831169175393e-01, 1.84922338493597849e-01, +1.23437500000022027e+00, 8.10126582278336449e-01, 2.10564769107528083e-01, +1.26562500000064615e+00, 7.90123456789720069e-01, 2.35566071313277448e-01, +1.29687500000144706e+00, 7.71084337348537208e-01, 2.59957524438041876e-01, +1.32812499999945932e+00, 7.52941176470894757e-01, 2.83768173130237500e-01, +1.35937500055846350e+00, 7.35632183605830825e-01, 3.07025035705735583e-01, +1.39062499999999467e+00, 7.19101123595508374e-01, 3.29753286372464149e-01, +1.42187500000017564e+00, 7.03296703296616421e-01, 3.51976423157301710e-01, +1.45312500161088876e+00, 6.88172042247866766e-01, 3.73716410902152685e-01, +1.48437500134602307e+00, 6.73684209915422660e-01, 3.94993809147663466e-01, +1.51562499999932343e+00, 6.59793814433284220e-01, 4.15827895143264570e-01, +1.54687500000028200e+00, 6.46464646464528614e-01, 4.36236766775100371e-01, +1.57812500000061906e+00, 6.33663366336385092e-01, 4.56237433481979870e-01, +1.60937500243255216e+00, 6.21359222361793417e-01, 4.75845906381452632e-01, +1.64062500000026312e+00, 6.09523809523711768e-01, 4.95077266798011895e-01, +1.67187500000027911e+00, 5.98130841121395473e-01, 5.13945751102401260e-01, +1.70312500224662178e+00, 5.87155962528224662e-01, 5.32464800188589216e-01, +1.73437500283893620e+00, 5.76576575632799071e-01, 5.50647119589526390e-01, +1.76562500399259092e+00, 5.66371680135198341e-01, 5.68504737613959144e-01, +1.79687500443862880e+00, 5.56521737755718449e-01, 5.86049047473771623e-01, +1.82812500114411280e+00, 5.47008546666207462e-01, 6.03290852063923744e-01, +1.85937500250667465e+00, 5.37815125325376786e-01, 6.20240411099985067e-01, +1.89062500504214515e+00, 5.28925618424108568e-01, 6.36907464903988974e-01, +1.92187500371610143e+00, 5.20325202245941476e-01, 6.53301273946326866e-01, +1.95312500494870611e+00, 5.11999998702726389e-01, 6.69430656476366792e-01, +1.98437500351688123e+00, 5.03937006980894941e-01, 6.85304004871206018e-01, +2.03125000000003997e+00, 4.92307692307682621e-01, 7.08651367095930240e-01, +2.09375000579615866e+00, 4.77611938976327366e-01, 7.38956719359554093e-01, +2.15625000000061062e+00, 4.63768115941897652e-01, 7.68370601797816022e-01, +2.21875000323311955e+00, 4.50704224695355204e-01, 7.96943975698769513e-01, +2.28125000853738547e+00, 4.38356162743050726e-01, 8.24723542091080120e-01, +2.34374999999916556e+00, 4.26666666666818573e-01, 8.51752210736227866e-01, +2.40625000438447856e+00, 4.15584414827170512e-01, 8.78069520876078258e-01, +2.46875000884389584e+00, 4.05063289688167072e-01, 9.03711953249632494e-01, +2.53124999999940403e+00, 3.95061728395154743e-01, 9.28713251872476775e-01, +2.59375000434366632e+00, 3.85542168029044230e-01, 9.53104706671537905e-01, +2.65625000734081196e+00, 3.76470587194880080e-01, 9.76915356454189698e-01, +2.71875000787161980e+00, 3.67816090889081959e-01, 1.00017221875016560e+00, +2.78125001557333462e+00, 3.59550559784484969e-01, 1.02290047253181449e+00, +2.84375001147093220e+00, 3.51648350229895601e-01, 1.04512360775085789e+00, +2.90625000771072894e+00, 3.44086020592463127e-01, 1.06686359300668343e+00, +2.96875001371853831e+00, 3.36842103706616824e-01, 1.08814099342179560e+00, +3.03125000512624965e+00, 3.29896906658595002e-01, 1.10897507739479018e+00, +3.09375001373132807e+00, 3.23232321797685962e-01, 1.12938395177327244e+00, +3.15625001204422961e+00, 3.16831681959289180e-01, 1.14938461785752644e+00, +3.21875000888250318e+00, 3.10679610793130057e-01, 1.16899308818952186e+00, +3.28125000000102052e+00, 3.04761904761809976e-01, 1.18822444735810784e+00, +3.34375001587649123e+00, 2.99065419140752298e-01, 1.20709293641028914e+00, +3.40625000791328070e+00, 2.93577980969346064e-01, 1.22561198175258212e+00, +3.46875000615970519e+00, 2.88288287776354346e-01, 1.24379430028837845e+00, +3.53125000516822674e+00, 2.83185840293502689e-01, 1.26165191737618265e+00, +3.59375001425228779e+00, 2.78260868461675415e-01, 1.27919622952937750e+00, +3.65625001719730669e+00, 2.73504272217836075e-01, 1.29643803670156643e+00, +3.71875000856489324e+00, 2.68907562405871714e-01, 1.31338759261496740e+00, +3.78125001788371806e+00, 2.64462808666557803e-01, 1.33005464752659286e+00, +3.84375001532508964e+00, 2.60162600588744020e-01, 1.34644845655970613e+00, +3.90625000429340918e+00, 2.55999999718627136e-01, 1.36257783560168733e+00, +3.96875001912740766e+00, 2.51968502722644594e-01, 1.37845118847836900e+00, +4.06250002536431332e+00, 2.46153844616978895e-01, 1.40179855389937913e+00, +4.18750001743208244e+00, 2.38805969155131859e-01, 1.43210390131407017e+00, +4.31250002253733200e+00, 2.31884056759177282e-01, 1.46151778758352613e+00, +4.43750000671406397e+00, 2.25352112335092170e-01, 1.49009115631456268e+00, +4.56250002627485340e+00, 2.19178080929562313e-01, 1.51787072466748185e+00, +4.68750001185115028e+00, 2.13333332793974317e-01, 1.54489939382477459e+00, +4.81250001682742301e+00, 2.07792207065640028e-01, 1.57121670311050998e+00, +4.93750000000042366e+00, 2.02531645569602875e-01, 1.59685913022732606e+00, +5.06249999999927613e+00, 1.97530864197559108e-01, 1.62186043243251454e+00, +5.18750002327641901e+00, 1.92771083472381588e-01, 1.64625189004383721e+00, +5.31250002381002329e+00, 1.88235293273997795e-01, 1.67006253873242194e+00, +5.43750000000577405e+00, 1.83908045976816203e-01, 1.69331939641586438e+00, +5.56250002193114934e+00, 1.79775280190080267e-01, 1.71604765143503712e+00, +5.68749999999938005e+00, 1.75824175824194989e-01, 1.73827078427695980e+00, +5.81250002749782002e+00, 1.72043009938785768e-01, 1.76001077564428243e+00, +5.93749999999874767e+00, 1.68421052631614471e-01, 1.78128816936054868e+00, +6.06250001966917473e+00, 1.64948453073088669e-01, 1.80212225950800153e+00, +6.18750003004243609e+00, 1.61616160831459688e-01, 1.82253113275015188e+00, +6.31250002448351388e+00, 1.58415840969730465e-01, 1.84253179848005466e+00, +6.43750001359968849e+00, 1.55339805497076044e-01, 1.86214026810242750e+00, +6.56250003345742350e+00, 1.52380951604072529e-01, 1.88137163301601618e+00, +6.68750002403557531e+00, 1.49532709742937614e-01, 1.90024011581622965e+00, +6.81250003423489581e+00, 1.46788990088028509e-01, 1.91875916501466826e+00, +6.93750003062940923e+00, 1.44144143507740546e-01, 1.93694148348760287e+00, +7.06250002747386052e+00, 1.41592919803171097e-01, 1.95479910036266347e+00, +7.18750003617887856e+00, 1.39130434082284093e-01, 1.97234341115705192e+00, +7.31250000000050537e+00, 1.36752136752127301e-01, 1.98958521255804399e+00, +7.43750002212249761e+00, 1.34453781112678528e-01, 2.00653477384620160e+00, +7.56250003604752941e+00, 1.32231404328381430e-01, 2.02320182812357530e+00, +7.68750005007207449e+00, 1.30081299965731312e-01, 2.03959563964607682e+00, +7.81249996125652668e+00, 1.28000000634773070e-01, 2.05572501010335529e+00, +7.93750005224239974e+00, 1.25984251139310915e-01, 2.07159837080052966e+00, +8.12500004244456164e+00, 1.23076922433975874e-01, 2.09494573343974722e+00, +8.37500006149772425e+00, 1.19402984197849338e-01, 2.12525108505414195e+00, +8.62500006593247370e+00, 1.15942028099206410e-01, 2.15466497056176820e+00, +8.87500007743793873e+00, 1.12676055354884341e-01, 2.18323834408688100e+00, +9.12500001754142609e+00, 1.09589040885222130e-01, 2.21101790139090326e+00, +9.37500007707016181e+00, 1.06666665789779500e-01, 2.23804658007729174e+00, +9.62500004426353151e+00, 1.03896103418305616e-01, 2.26436388477265638e+00, +9.87500006518495788e+00, 1.01265822116353585e-01, 2.29000631738819393e+00, +1.01250000000026539e+01, 9.87654320987395445e-02, 2.31500761299286495e+00, +1.03750000409819823e+01, 9.63855417879450060e-02, 2.33939907006683256e+00, +1.06250000362555337e+01, 9.41176467376672460e-02, 2.36320971822276604e+00, +1.08750000879032314e+01, 9.19540222452362582e-02, 2.38646658505780351e+00, +1.11250000697274576e+01, 8.98876398860551373e-02, 2.40919483431994053e+00, +1.13750000462194141e+01, 8.79120875548795450e-02, 2.43141796890025930e+00, +1.16250000714972366e+01, 8.60215048472860316e-02, 2.45315795762371991e+00, +1.18750000788855150e+01, 8.42105257563797310e-02, 2.47443535656369562e+00, +1.21250000895724916e+01, 8.24742261948517991e-02, 2.49526944421096886e+00, +1.23750000985058719e+01, 8.08080801648427965e-02, 2.51567831641482442e+00, +1.26250000894226950e+01, 7.92079202310506381e-02, 2.53567898224440924e+00, +1.28750000768594433e+01, 7.76699024489580225e-02, 2.55528745251946532e+00, +1.31250000578007420e+01, 7.61904758549435401e-02, 2.57451881288155349e+00, +1.33750000809310077e+01, 7.47663546877819496e-02, 2.59338729883298669e+00, +1.36250000915049636e+01, 7.33944949199294983e-02, 2.61190634726526838e+00, +1.38750000830616607e+01, 7.20720716406179490e-02, 2.63008866561892418e+00, +1.41249999999960103e+01, 7.07964601770111474e-02, 2.64794627703222218e+00, +1.43750000290097564e+01, 6.95652172509168693e-02, 2.66549058870148414e+00, +1.46250000868097665e+01, 6.83760679702078294e-02, 2.68273239905363070e+00, +1.48750000966053975e+01, 6.72268903196987927e-02, 2.69968195792617394e+00, +1.51250001097012756e+01, 6.61157019998031836e-02, 2.71634901116988203e+00, +1.53750000510427132e+01, 6.50406501905787804e-02, 2.73274281701243282e+00, +1.56250001080665442e+01, 6.39999995573594382e-02, 2.74887220253872400e+00, +1.58750000434989929e+01, 6.29921258116476201e-02, 2.76474554751884938e+00, +1.62500000641781739e+01, 6.15384612954199342e-02, 2.78809291272517257e+00, +1.67500001015987401e+01, 5.97014921751882754e-02, 2.81839826433667184e+00, +1.72500001048300184e+01, 5.79710141404578272e-02, 2.84781214955447126e+00, +1.77500001262529885e+01, 5.63380277682904579e-02, 2.87638552303426920e+00, +1.82500001543340602e+01, 5.47945200845665337e-02, 2.90416508848516131e+00, +1.87500001096404212e+01, 5.33333330214672482e-02, 2.93119375826390893e+00, +1.92500001680268191e+01, 5.19480514946147609e-02, 2.95751106946245912e+00, +1.97500000329124035e+01, 5.06329113080278073e-02, 2.98315349301358168e+00, +2.02500001270002485e+01, 4.93827157396732261e-02, 3.00815479982416534e+00, +2.07500001519906796e+01, 4.81927707313324349e-02, 3.03254625400155930e+00, +2.12500001425219267e+01, 4.70588232137922752e-02, 3.05635690207734001e+00, +2.17500000758314478e+01, 4.59770113339538697e-02, 3.07961376102119644e+00, +2.22500001767207358e+01, 4.49438198677525880e-02, 3.10234201655475417e+00, +2.27500001365873317e+01, 4.39560436921389575e-02, 3.12456515140079816e+00, +2.32500001697599998e+01, 4.30107523741288036e-02, 3.14630513933487066e+00, +2.37500001766865303e+01, 4.21052628446554611e-02, 3.16758253792008304e+00, +}; diff --git a/usr/src/libm/src/C/_TBL_log2.c b/usr/src/libm/src/C/_TBL_log2.c new file mode 100644 index 0000000..fe4028a --- /dev/null +++ b/usr/src/libm/src/C/_TBL_log2.c @@ -0,0 +1,120 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_log2.c 1.9 06/01/31 SMI" + +#include "libm_protos.h" + +const double _TBL_log2_hi[] = { + 0.00000000000000000e+00, 1.12272500991821289e-02, 2.23678052425384521e-02, + 3.34229767322540283e-02, 4.43941056728363037e-02, 5.52824139595031738e-02, + 6.60891532897949219e-02, 7.68155455589294434e-02, 8.74627828598022461e-02, + 9.80320572853088379e-02, 1.08524441719055176e-01, 1.18941068649291992e-01, + 1.29282951354980469e-01, 1.39551281929016113e-01, 1.49747014045715332e-01, + 1.59871220588684082e-01, 1.69924974441528320e-01, 1.79908990859985352e-01, + 1.89824461936950684e-01, 1.99672341346740723e-01, 2.09453344345092773e-01, + 2.19168424606323242e-01, 2.28818655014038086e-01, 2.38404631614685059e-01, + 2.47927427291870117e-01, 2.57387638092041016e-01, 2.66786336898803711e-01, + 2.76124238967895508e-01, 2.85402059555053711e-01, 2.94620513916015625e-01, + 3.03780555725097656e-01, 3.12882900238037109e-01, 3.21928024291992188e-01, + 3.30916643142700195e-01, 3.39849948883056641e-01, 3.48727941513061523e-01, + 3.57551813125610352e-01, 3.66322040557861328e-01, 3.75039339065551758e-01, + 3.83704185485839844e-01, 3.92317295074462891e-01, 4.00879383087158203e-01, + 4.09390926361083984e-01, 4.17852401733398438e-01, 4.26264524459838867e-01, + 4.34628009796142578e-01, 4.42943334579467773e-01, 4.51210975646972656e-01, + 4.59431409835815430e-01, 4.67605352401733398e-01, 4.75733280181884766e-01, + 4.83815670013427734e-01, 4.91852998733520508e-01, 4.99845743179321289e-01, + 5.07794380187988281e-01, 5.15699386596679688e-01, 5.23561954498291016e-01, + 5.31381130218505859e-01, 5.39158344268798828e-01, 5.46894073486328125e-01, + 5.54588794708251953e-01, 5.62242031097412109e-01, 5.69855213165283203e-01, + 5.77428817749023438e-01, 5.84962368011474609e-01, 5.92456817626953125e-01, + 5.99912643432617188e-01, 6.07329845428466797e-01, 6.14709377288818359e-01, + 6.22051715850830078e-01, 6.29356384277343750e-01, 6.36624336242675781e-01, + 6.43856048583984375e-01, 6.51051521301269531e-01, 6.58211231231689453e-01, + 6.65335655212402344e-01, 6.72425270080566406e-01, 6.79480075836181641e-01, + 6.86500072479248047e-01, 6.93486690521240234e-01, 7.00439453125000000e-01, + 7.07358837127685547e-01, 7.14245319366455078e-01, 7.21098899841308594e-01, + 7.27920055389404297e-01, 7.34709262847900391e-01, 7.41466522216796875e-01, + 7.48192787170410156e-01, 7.54887104034423828e-01, 7.61550903320312500e-01, + 7.68184185028076172e-01, 7.74786949157714844e-01, 7.81359672546386719e-01, + 7.87902355194091797e-01, 7.94415473937988281e-01, 8.00899505615234375e-01, + 8.07354450225830078e-01, 8.13780784606933594e-01, 8.20178508758544922e-01, + 8.26548099517822266e-01, 8.32889556884765625e-01, 8.39203357696533203e-01, + 8.45489978790283203e-01, 8.51748943328857422e-01, 8.57980728149414062e-01, + 8.64185810089111328e-01, 8.70364665985107422e-01, 8.76516819000244141e-01, + 8.82642745971679688e-01, 8.88742923736572266e-01, 8.94817352294921875e-01, + 9.00866508483886719e-01, 9.06890392303466797e-01, 9.12889003753662109e-01, + 9.18862819671630859e-01, 9.24812316894531250e-01, 9.30737018585205078e-01, + 9.36637878417968750e-01, 9.42514419555664062e-01, 9.48367118835449219e-01, + 9.54195976257324219e-01, 9.60001468658447266e-01, 9.65784072875976562e-01, + 9.71543312072753906e-01, 9.77279663085937500e-01, 9.82993125915527344e-01, + 9.88684654235839844e-01, 9.94353294372558594e-01, +}; +const double _TBL_log2_lo[] = { + 0.00000000000000000e+00, 5.32407199143163062e-09, 7.78591605611869461e-09, + 2.48051962506972834e-08, 1.36856171339421649e-08, 2.15416864274073636e-08, + 3.71679775110542797e-08, 5.14919014488721604e-08, 5.83905371621603131e-08, + 2.56752178779050280e-08, 1.50591138779666358e-08, 4.07421543880223335e-09, + 6.55899859865622946e-08, 7.04697774403433060e-08, 1.05458966729375492e-07, + 1.16189705334564924e-07, 2.70007840425949794e-08, 9.91549491170275978e-08, + 9.69430665462702729e-08, 3.48962367368142750e-09, 2.12838570084203029e-08, + 9.58558383294243244e-08, 3.54818427912568755e-08, 1.07710393847949145e-07, + 8.61517153766060168e-08, 2.04600610755536536e-07, 2.03796097652703831e-07, + 1.66306342048863931e-07, 1.59307194630913047e-07, 2.34975611381410033e-07, + 1.92452005268177275e-07, 5.50463182513595194e-08, 7.05953701603703195e-08, + 2.34971916784423615e-07, 5.40015680851899589e-08, 2.12718016029126278e-07, + 1.91492473341603465e-07, 1.73687954457398432e-07, 9.22813729985471341e-08, + 1.06988212380721318e-07, 1.27704297398270718e-07, 5.31950261176686284e-08, + 9.77661777174938596e-09, 1.13152499419201003e-07, 2.30242259071696645e-07, + 2.17840582054596399e-07, 1.61269260528736021e-07, 1.36185356146932601e-07, + 2.08801481826511869e-07, 1.97681264041823641e-07, 1.50784512989339287e-07, + 1.07250828689716638e-07, 9.75961542029652924e-08, 1.43903884071471071e-07, + 2.60010707986588806e-07, 4.51687362770425967e-07, 1.55872185666914818e-09, + 3.30297806270353139e-07, 4.66839232562134881e-07, 3.86401308539453419e-07, + 5.69693854190458130e-08, 3.93123660542428204e-07, 3.95165664638538863e-07, + 1.02867252517587785e-08, 1.32709681572078730e-07, 2.19641127294637299e-07, + 1.98754510492326232e-07, 4.68321143892845854e-07, 4.66826389855508924e-07, + 1.03605546188658804e-07, 2.35802265869106829e-07, 2.84300973057307715e-07, + 1.41190740320740639e-07, 1.69877659083133016e-07, 2.51520105284046651e-07, + 2.61972773884411727e-07, 7.18909291834578061e-08, 2.36692644004112907e-08, + 4.54703970334185855e-07, 2.66978085000826612e-07, 2.65016092160396791e-07, + 2.94953197203117899e-07, 1.98299667558641024e-07, 2.88865876540408914e-07, + 3.99173794882405776e-07, 3.57377937852235498e-07, 4.64184350072864601e-07, + 6.24190501305044646e-08, 3.98129044716236242e-07, 3.29124166816248113e-07, + 1.39748850186603795e-07, 1.10443458567567753e-07, 4.09782728853196823e-08, + 2.04197339771775867e-07, 3.92412117682061536e-07, 3.94305070358032831e-07, + 4.71831774029316962e-07, 4.06610103464898125e-07, 4.53656642786443564e-07, + 3.87773092718157073e-07, 4.57279976050247260e-07, 4.30400410735578705e-07, + 7.21540920170394723e-08, 9.80872001232200742e-08, 2.66978158058219765e-07, + 3.34565168908893463e-07, 5.35982971014292903e-08, 1.27564755579416119e-07, + 3.03390161571307385e-07, 3.25161686840256005e-07, 4.11013021640696012e-07, + 2.99496861839592342e-07, 2.03305051732449063e-07, 3.32476299509608735e-07, + 4.17602963653023739e-07, 1.86711249657268702e-07, 3.18977681198347184e-07, + 6.05846018127542565e-08, 8.57835758121197076e-08, 1.12749228435440334e-07, + 3.34129550990056099e-07, 4.63409633672188390e-07, 2.11786110481110945e-07, + 2.41878018084726962e-07, 2.60413978970349421e-07, 4.48778782784743522e-07, + 3.25363260095300064e-08, 1.42486299343828112e-07, +}; diff --git a/usr/src/libm/src/C/_TBL_sin.c b/usr/src/libm/src/C/_TBL_sin.c new file mode 100644 index 0000000..97f3f66 --- /dev/null +++ b/usr/src/libm/src/C/_TBL_sin.c @@ -0,0 +1,798 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_sin.c 1.10 06/01/23 SMI" + +#include "libm_protos.h" + +/* + * Table of constants for x[i],sin(x[i]),cos(x[i]), where + * x[i] ~ (i+10.5)/64 chosen to make the value of sine and + * cosine nearly representable in double (with error less + * than 2**-8 ulp) + * By K.C. Ng, May 5, 1995 + * + * For each i, _TBL_sincosx[i] := x[i], _TBL_sincos[2*i] := + * sin(x[i]), and _TBL_sincos[2*i+1] := cos(x[i]). + */ + +const double _TBL_sincos[] = { + 1.63327491736778435127e-01, 9.86571908399470176576e-01, + 1.78722113534634630128e-01, 9.83899591489758251761e-01, + 1.94073102892906523831e-01, 9.80987069605669836925e-01, + 2.09376712086097482857e-01, 9.77835053797937558961e-01, + 2.24629204957583178404e-01, 9.74444313586017130113e-01, + 2.39826857830661321902e-01, 9.70815676770349522684e-01, + 2.54965960415442560727e-01, 9.66950029230792762469e-01, + 2.70042816718758793559e-01, 9.62848314709330965755e-01, + 2.85053745940880454146e-01, 9.58511534581129587274e-01, + 2.99995083378835347698e-01, 9.53940747608846839611e-01, + 3.14863181320744367486e-01, 9.49137069684131584602e-01, + 3.29654409930721814526e-01, 9.44101673557052656349e-01, + 3.44365158144533722862e-01, 9.38835788546692695533e-01, + 3.58991834544317267586e-01, 9.33340700243220688925e-01, + 3.73530868238515501023e-01, 9.27617750192923362640e-01, + 3.87978709726743087316e-01, 9.21668335573470609567e-01, + 4.02331831777567594521e-01, 9.15493908848391546584e-01, + 4.16586730281922223984e-01, 9.09095977415485534401e-01, + 4.30739925110786514573e-01, 9.02476103237949467406e-01, + 4.44787960958008266044e-01, 8.95635902466408118094e-01, + 4.58727408216676513231e-01, 8.88577045028066558885e-01, + 4.72554863751536879946e-01, 8.81301254251215970825e-01, + 4.86266951795427115890e-01, 8.73810306411857196096e-01, + 4.99860324731856597857e-01, 8.66106030321324382726e-01, + 5.13331663943585647658e-01, 8.58190306862591900661e-01, + 5.26677680590333596733e-01, 8.50065068549453184410e-01, + 5.39895116435048061376e-01, 8.41732299041438647436e-01, + 5.52980744632255882820e-01, 8.33194032663434169805e-01, + 5.65931370507619768695e-01, 8.24452353914625679643e-01, + 5.78743832357296650315e-01, 8.15509396946711651033e-01, + 5.91415002201596706755e-01, 8.06367345054898265744e-01, + 6.03941786558566895415e-01, 7.97028430138126520177e-01, + 6.16321127179607297641e-01, 7.87494932169127248578e-01, + 6.28550001844884853597e-01, 7.77769178600434929471e-01, + 6.40625425044079821468e-01, 7.67853543839638774671e-01, + 6.52544448725672743272e-01, 7.57750448655299613243e-01, + 6.64304163044103668234e-01, 7.47462359562187539375e-01, + 6.75901697026429104653e-01, 7.36991788256011193248e-01, + 6.87334219302880855551e-01, 7.26341290975047959577e-01, + 6.98598938789923074033e-01, 7.15513467882745946014e-01, + 7.09693105361432152733e-01, 7.04510962443060329008e-01, + 7.20614010544995853280e-01, 6.93336460750663685637e-01, + 7.31358988151144640000e-01, 6.81992690906972898190e-01, + 7.41925414945620254059e-01, 6.70482422333180339002e-01, + 7.52310711296420575600e-01, 6.58808465085774175307e-01, + 7.62512341773335489137e-01, 6.46973669204044199432e-01, + 7.72527815799416095466e-01, 6.34980923978180178402e-01, + 7.82354688238184881044e-01, 6.22833157267443926486e-01, + 7.91990560000511156780e-01, 6.10533334773848967991e-01, + 8.01433078627164507957e-01, 5.98084459321745920413e-01, + 8.10679938859144910701e-01, 5.85489570130274028514e-01, + 8.19728883213368231253e-01, 5.72751742053888568407e-01, + 8.28577702516849257108e-01, 5.59874084854710574177e-01, + 8.37224236455711978699e-01, 5.46859742430497508536e-01, + 8.45666374107491569667e-01, 5.33711892039036461810e-01, + 8.53902054441761149128e-01, 5.20433743544881588505e-01, + 8.61929266833302509809e-01, 5.07028538621057900393e-01, + 8.69746051561515076678e-01, 4.93499549942200410602e-01, + 8.77350500260862697921e-01, 4.79850080433476600117e-01, + 8.84740756420631879742e-01, 4.66083462405874393575e-01, + 8.91915015812867362222e-01, 4.52203056787028545571e-01, + 8.98871526946913745881e-01, 4.38212252275223035358e-01, + 9.05608591487805036913e-01, 4.24114464529888268718e-01, + 9.12124564678846838639e-01, 4.09913135321892496687e-01, + 9.18417855741508804002e-01, 3.95611731695571844369e-01, + 9.24486928255549345046e-01, 3.81213745141251114656e-01, + 9.30330300545781363475e-01, 3.66722690716563270996e-01, + 9.35946546034209125864e-01, 3.52142106210879102246e-01, + 9.41334293596668869597e-01, 3.37475551260917883134e-01, + 9.46492227896101323559e-01, 3.22726606483374089951e-01, + 9.51419089686698082886e-01, 3.07898872650964328113e-01, + 9.56113676155394554002e-01, 2.92995969713948978264e-01, + 9.60574841181938254842e-01, 2.78021536015277237475e-01, + 9.64801495637480077683e-01, 2.62979227346346711158e-01, + 9.68792607644664016675e-01, 2.47872716072285947941e-01, + 9.72547202831614887586e-01, 2.32705690227810568782e-01, + 9.76064364566613607010e-01, 2.17481852629530458820e-01, + 9.79343234187565414572e-01, 2.02204919947659544910e-01, + 9.82383011202836109454e-01, 1.86878621837941599759e-01, + 9.85182953494231017366e-01, 1.71506699998524386741e-01, + 9.87742377497998091940e-01, 1.56092907252707135957e-01, + 9.90060658366647028394e-01, 1.40641006660935985462e-01, + 9.92137230124395808062e-01, 1.25154770588626285122e-01, + 9.93971585806359803072e-01, 1.09637979777038541140e-01, + 9.95563277581850036846e-01, 9.40944224196323536491e-02, + 9.96911916861350277941e-01, 7.85278932598362233719e-02, + 9.98017174394052908326e-01, 6.29421926414276133865e-02, + 9.98878780347215333713e-01, 4.73411255892753485286e-02, + 9.99496524372108563483e-01, 3.17285008797294557081e-02, + 9.99870255655346151791e-01, 1.61081301122361006395e-02, + 9.99999882955821872699e-01, 4.83826769160181427432e-04, + 9.99885374626887313276e-01, -1.51405946795101862407e-02, + 9.99526758624139421983e-01, -3.07613197753498594789e-02, + 9.98924122498464628350e-01, -4.63745349375326090802e-02, + 9.98077613374894423437e-01, -6.19764284214149308028e-02, + 9.96987437916807328619e-01, -7.75631912448182664344e-02, + 9.95653862273598311283e-01, -9.31310181393209396417e-02, + 9.94077212020575529117e-01, -1.08676108420387079745e-01, + 9.92257872072439317535e-01, -1.24194666996109981394e-01, + 9.90196286596708996619e-01, -1.39682905217811598186e-01, + 9.87892958898728967831e-01, -1.55137041864005509328e-01, + 9.85348451302295424981e-01, -1.70553304031812708041e-01, + 9.82563385014030843401e-01, -1.85927928052160545969e-01, + 9.79538439968065888230e-01, -2.01257160431443482551e-01, + 9.76274354660002341433e-01, -2.16537258764389006771e-01, + 9.72771925969731610095e-01, -2.31764492632368090952e-01, + 9.69032008956924317822e-01, -2.46935144556029828600e-01, + 9.65055516693764658953e-01, -2.62045510739892129060e-01, + 9.60843419958733790942e-01, -2.77091902303196746526e-01, + 9.56396747083171572257e-01, -2.92070645852553878452e-01, + 9.51716583658057113659e-01, -3.06978084543891138747e-01, + 9.46804072278775166183e-01, -3.21810578937871294425e-01, + 9.41660412264228252610e-01, -3.36564507894643705210e-01, + 9.36286859366077139910e-01, -3.51236269451786098372e-01, + 9.30684725460523609719e-01, -3.65822281708577445869e-01, + 9.24855378224429758305e-01, -3.80318983708869018390e-01, + 9.18800240811794344253e-01, -3.94722836284130795814e-01, + 9.12520791499566663596e-01, -4.09030322935848345001e-01, + 9.06018563323250702979e-01, -4.23237950701107090712e-01, + 8.99295143708603639254e-01, -4.37342250991282488481e-01, + 8.92352174084417359978e-01, -4.51339780439098559039e-01, + 8.85191349474114597129e-01, -4.65227121754735961634e-01, + 8.77814418087698666859e-01, -4.79000884547570504601e-01, + 8.70223180902864101860e-01, -4.92657706140177065190e-01, + 8.62419491209962973954e-01, -5.06194252418129098103e-01, + 8.54405254167239447405e-01, -5.19607218629047684644e-01, + 8.46182426332270809510e-01, -5.32893330195106762481e-01, + 8.37753015193838712626e-01, -5.46049343497116423940e-01, + 8.29119078677651999421e-01, -5.59072046674417011403e-01, + 8.20282724626069215113e-01, -5.71958260435175724901e-01, + 8.11246110312714763246e-01, -5.84704838788333125521e-01, + 8.02011441899084687179e-01, -5.97308669837422590021e-01, + 7.92580973890125495274e-01, -6.09766676547169317324e-01, + 7.82957008603788473522e-01, -6.22075817467780289860e-01, + 7.73141895594474215514e-01, -6.34233087497477643346e-01, + 7.63138031079152456826e-01, -6.46235518615801973752e-01, + 7.52947857359473227135e-01, -6.58080180599429964694e-01, + 7.42573862219235825144e-01, -6.69764181745192588302e-01, + 7.32018578314804879703e-01, -6.81284669577975954269e-01, + 7.21284582577006005977e-01, -6.92638831525286491342e-01, + 7.10374495555637031075e-01, -7.03823895633044149811e-01, + 6.99290980797484418297e-01, -7.14837131223114541356e-01, + 6.88036744157449198234e-01, -7.25675849597612554476e-01, + 6.76614533221899572268e-01, -7.36337404613476742554e-01, + 6.65027136549188546688e-01, -7.46819193415104276568e-01, + 6.53277383052505156158e-01, -7.57118657009633100330e-01, + 6.41368141233487065733e-01, -7.67233280958732777322e-01, + 6.29302318589868403542e-01, -7.77160595898567008177e-01, + 6.17082860810903133242e-01, -7.86898178224750721732e-01, + 6.04712751105658807838e-01, -7.96443650643424705393e-01, + 5.92195009450509846083e-01, -8.05794682770934245220e-01, + 5.79532691867931770702e-01, -8.14948991689853463605e-01, + 5.66728889706594629594e-01, -8.23904342488817387213e-01, + 5.53786728799491090314e-01, -8.32658548869558479133e-01, + 5.40709368819720759269e-01, -8.41209473597735457595e-01, + 5.27500002380493770993e-01, -8.49555029111463189118e-01, + 5.14161854409658891640e-01, -8.57693177931374672873e-01, + 5.00698181184736190730e-01, -8.65621933270118271153e-01, + 4.87112269682015319727e-01, -8.73339359427499739574e-01, + 4.73407436683839610847e-01, -8.80843572317148937323e-01, + 4.59587028080454429446e-01, -8.88132739865035936155e-01, + 4.45654417892204612883e-01, -8.95205082544307417791e-01, + 4.31613007576607476956e-01, -9.02058873738668665077e-01, + 4.17466225094956511210e-01, -9.08692440215591923369e-01, + 4.03217524247773739798e-01, -9.15104162453376668296e-01, + 3.88870383625079307777e-01, -9.21292475134407928827e-01, + 3.74428305866518040812e-01, -9.27255867474522488259e-01, + 3.59894816812003803808e-01, -9.32992883591217014860e-01, + 3.45273464602750546071e-01, -9.38502122875176647554e-01, + 3.30567818825136694461e-01, -9.43782240327286303661e-01, + 3.15781469657649860316e-01, -9.48831946880402399280e-01, + 3.00918026974915431282e-01, -9.53650009721346392233e-01, + 2.85981119468962208252e-01, -9.58235252590552089025e-01, + 2.70974393771316324209e-01, -9.62586556066657106356e-01, + 2.55901513568614069616e-01, -9.66702857838587559236e-01, + 2.40766158683884484715e-01, -9.70583152971761897732e-01, + 2.25572024178931879179e-01, -9.74226494152062860721e-01, + 2.10322819513115238932e-01, -9.77631991902911057224e-01, + 1.95022267545207572681e-01, -9.80798814824694553671e-01, + 1.79674103687683967001e-01, -9.83726189782516358129e-01, + 1.64282074965636487596e-01, -9.86413402101261493904e-01, + 1.48849939140241666058e-01, -9.88859795733422641817e-01, + 1.33381463740289751829e-01, -9.91064773428305123559e-01, + 1.17880425165185737102e-01, -9.93027796873216961338e-01, + 1.02350607771443738447e-01, -9.94748386823932517764e-01, + 8.67958029390951818494e-02, -9.96226123223115322958e-01, + 7.12198081674702832000e-02, -9.97460645301151083153e-01, + 5.56264261071372570489e-02, -9.98451651667994988237e-01, + 4.00194636390110436430e-02, -9.99198900384726140800e-01, + 2.44027309972172715136e-02, -9.99702209020204901613e-01, + 8.78004077991816241078e-03, -9.99961454699081375708e-01, + -6.84479296391702837776e-03, -9.99976574130254869388e-01, + -2.24679556394218951643e-02, -9.99747563622630175395e-01, + -3.80856331006515710924e-02, -9.99274479085362488107e-01, + -5.36940124898220294547e-02, -9.98557436015947375019e-01, + -6.92892832575160572128e-02, -9.97596609469809547655e-01, + -8.48676380386628043118e-02, -9.96392234019183087312e-01, + -1.00425273601341916163e-01, -9.94944603695148255262e-01, + -1.15958391781735684067e-01, -9.93254071914831615508e-01, + -1.31463200384306394541e-01, -9.91321051397939245753e-01, + -1.46935914119801724897e-01, -9.89146014065556578032e-01, + -1.62372755568482129984e-01, -9.86729490918913376696e-01, + -1.77769956039573850948e-01, -9.84072071918356994225e-01, + -1.93123756521520834051e-01, -9.81174405835688490107e-01, + -2.08430408606563005725e-01, -9.78037200094199477007e-01, + -2.23686175400125447643e-01, -9.74661220596605204491e-01, + -2.38887332428331961021e-01, -9.71047291539024692852e-01, + -2.54030168529570332669e-01, -9.67196295214595047618e-01, + -2.69110986809851404633e-01, -9.63109171785954898404e-01, + -2.84126105504238113397e-01, -9.58786919065437892584e-01, + -2.99071858881536201125e-01, -9.54230592270622235418e-01, + -3.13944598143160502612e-01, -9.49441303765919730751e-01, + -3.28740692363219233485e-01, -9.44420222774031481450e-01, + -3.43456529243486463621e-01, -9.39168575134420757777e-01, + -3.58088516132365641820e-01, -9.33687642958886176991e-01, + -3.72633080853157161449e-01, -9.27978764333475703019e-01, + -3.87086672547184373894e-01, -9.22043333003578990947e-01, + -4.01445762590873223008e-01, -9.15882798013933796533e-01, + -4.15706845395529489551e-01, -9.09498663380709615467e-01, + -4.29866439353555507275e-01, -9.02892487684716527063e-01, + -4.43921087571260808424e-01, -8.96065883743795366101e-01, + -4.57867358817895864220e-01, -8.89020518171051099543e-01, + -4.71701848327647499381e-01, -8.81758110982984399939e-01, + -4.85421178579811707365e-01, -8.74280435207254624785e-01, + -4.99022000232008211551e-01, -8.66589316391822128693e-01, + -5.12500992809901023683e-01, -8.58686632229048840692e-01, + -5.25854865641323332426e-01, -8.50574312027670975667e-01, + -5.39080358520030999969e-01, -8.42254336324791408330e-01, + -5.52174242663304060130e-01, -8.33728736304085060738e-01, + -5.65133321393192722404e-01, -8.24999593364201810886e-01, + -5.77954430931352902689e-01, -8.16069038603239760299e-01, + -5.90634441175508673183e-01, -8.06939252296785092256e-01, + -6.03170256463835929850e-01, -7.97612463366358381833e-01, + -6.15558816459891189332e-01, -7.88090948735295393490e-01, + -6.27797096543907584554e-01, -7.78377033044423516372e-01, + -6.39882108993420795073e-01, -7.68473087746169514212e-01, + -6.51810903392718188343e-01, -7.58381530773507561705e-01, + -6.63580567511655061708e-01, -7.48104825823834529430e-01, + -6.75188227925781481176e-01, -7.37645481834222960238e-01, + -6.86631050850229573967e-01, -7.27006052250123602221e-01, + -6.97906242654146802273e-01, -7.16189134561793783185e-01, + -7.09011050643817641870e-01, -7.05197369581700761465e-01, + -7.19942763756367454242e-01, -6.94033440775618015728e-01, + -7.30698713155769064009e-01, -6.82700073672548479742e-01, + -7.41276272975477157345e-01, -6.71200035103981407225e-01, + -7.51672860805046583188e-01, -6.59536132694151233657e-01, + -7.61885938516202787518e-01, -6.47711213961349341339e-01, + -7.71913012640803364306e-01, -6.35728165897814334606e-01, + -7.81751635309322678857e-01, -6.23589913878664137137e-01, + -7.91399404523052685256e-01, -6.11299421331770620469e-01, + -8.00853964899717496451e-01, -5.98859688829029512824e-01, + -8.10113008319712335492e-01, -5.86273753251146056975e-01, + -8.19174274236826760465e-01, -5.73544687385881157837e-01, + -8.28035550507897455397e-01, -5.60675598804766250893e-01, + -8.36694673776658404130e-01, -5.47669629314764150330e-01, + -8.45149530028187490061e-01, -5.34529954158916909002e-01, + -8.53398055161871504914e-01, -5.21259781151332757254e-01, + -8.61438235389631601358e-01, -5.07862350060326539491e-01, + -8.69268107829002323328e-01, -4.94340931656873816546e-01, + -8.76885760925650292741e-01, -4.80698827006935058836e-01, + -8.84289334936661730602e-01, -4.66939366639049280305e-01, + -8.91477022398163843064e-01, -4.53065909704210345588e-01, + -8.98447068525225711610e-01, -4.39081843234753188554e-01, + -9.05197771673453388530e-01, -4.24990581257296273776e-01, + -9.11727483791179293959e-01, -4.10795563875518132679e-01, + -9.18034610707084031134e-01, -3.96500256675695827990e-01, + -9.24117612643078456536e-01, -3.82108149615860981374e-01, + -9.29975004511545022545e-01, -3.67622756346437040698e-01, + -9.35605356329172521690e-01, -3.53047613231079804308e-01, + -9.41007293511755382731e-01, -3.38386278619096869669e-01, + -9.46179497257704227309e-01, -3.23642331855951870256e-01, + -9.51120704853153031699e-01, -3.08819372448752571536e-01, + -9.55829709968717189383e-01, -2.93921019223052470970e-01, + -9.60305362967905695726e-01, -2.78950909399985458315e-01, + -9.64546571183209522360e-01, -2.63912697721639999404e-01, + -9.68552299193694232748e-01, -2.48810055517474232323e-01, + -9.72321569045517364316e-01, -2.33646669928897571245e-01, + -9.75853460530087812863e-01, -2.18426242863471758993e-01, + -9.79147111396304836717e-01, -2.03152490125698942380e-01, + -9.82201717531947959827e-01, -1.87829140649930864670e-01, + -9.85016533205280153673e-01, -1.72459935382833828843e-01, + -9.87590871221861066331e-01, -1.57048626479970948600e-01, + -9.89924103089018792012e-01, -1.41598976420741456961e-01, + -9.92015659185421450061e-01, -1.26114756991058563074e-01, + -9.93865028889118318212e-01, -1.10599748423005059261e-01, + -9.95471760691319929037e-01, -9.50577385914659067634e-02, + -9.96835462344218936614e-01, -7.94925217425341834598e-02, + -9.97955800916290658442e-01, -6.39078979276023889655e-02, + -9.98832502892746831868e-01, -4.83076719063431220258e-02, + -9.99465354238023406808e-01, -3.26956522776712110723e-02, + -9.99854200451614993916e-01, -1.70756504784357332483e-02, + -9.99998946602528415717e-01, -1.45147987706449187358e-03, + -9.99899557352339485305e-01, 1.41730450713867285606e-02, + -9.99556056965451689145e-01, 2.97941098823021957576e-02, + -9.98968529303411734155e-01, 4.54079008695470534573e-02, + -9.98137117802025963798e-01, 6.10106061751933687054e-02, + -9.97062025438244736719e-01, 7.65984166219185330649e-02, + -9.95743514682696617690e-01, 9.21675266422526118237e-02, + -9.94181907425219280050e-01, 1.07714135322866152999e-01, + -9.92377584917212285376e-01, 1.23234447107459038628e-01, + -9.90330987653228356216e-01, 1.38724672981345553691e-01, + -9.88042615281836456020e-01, 1.54181031216647779214e-01, + -9.85513026476385278762e-01, 1.69599748364658631239e-01, + -9.82742838804682716791e-01, 1.84977060140206039929e-01, + -9.79732728555263054915e-01, 2.00309212463279484595e-01, + -9.76483430616286174342e-01, 2.15592462140605983789e-01, + -9.72995738247511954278e-01, 2.30823078032026951512e-01, + -9.69270502929450938900e-01, 2.45997341755737758406e-01, + -9.65308634114379171542e-01, 2.61111548776057855736e-01, + -9.61111099038787108917e-01, 2.76162009162112587202e-01, + -9.56678922485658334018e-01, 2.91145048509638459944e-01, + -9.52013186489632401432e-01, 3.06057008986653333871e-01, + -9.47115030121562395671e-01, 3.20894250054175877995e-01, + -9.41985649202698782645e-01, 3.35653149391108962529e-01, + -9.36626296000886870985e-01, 3.50330103815899684960e-01, + -9.31038278925287121623e-01, 3.64921530162087393023e-01, + -9.25222962204842236389e-01, 3.79423866156172462372e-01, + -9.19181765559584973424e-01, 3.93833571274420646269e-01, + -9.12916163872961705650e-01, 4.08147127564896294860e-01, + -9.06427686803489396361e-01, 4.22361040575566504263e-01, + -8.99717918410242289973e-01, 4.36471840204543548580e-01, + -8.92788496793018526709e-01, 4.50476081489419755144e-01, + -8.85641113671704172106e-01, 4.64370345494136360642e-01, + -8.78277513965914136129e-01, 4.78151240155093082418e-01, + -8.70699495405757306621e-01, 4.91815401040023969514e-01, + -8.62908908048144129843e-01, 5.05359492253939501794e-01, + -8.54907653871092576559e-01, 5.18780207171229856833e-01, + -8.46697686222891654495e-01, 5.32074269388026044325e-01, + -8.38281009508205721126e-01, 5.45238433254574883513e-01, + -8.29659678498936070667e-01, 5.58269484991829045839e-01, + -8.20835797971514846694e-01, 5.71164243250981584765e-01, + -8.11811522157973475267e-01, 5.83919559949445554636e-01, + -8.02589054191142126093e-01, 5.96532321079560556853e-01, + -7.93170645644559635379e-01, 6.08999447362468804279e-01, + -7.83558595847759331576e-01, 6.21317895181756174594e-01, + -7.73755251444074421130e-01, 6.33484657164415598807e-01, + -7.63763005819449558587e-01, 6.45496762921116018497e-01, + -7.53584298396099971917e-01, 6.57351279918779618505e-01, + -7.43221614171602262822e-01, 6.69045314031985416392e-01, + -7.32677483058112311021e-01, 6.80576010317458623966e-01, + -7.21954479231692536345e-01, 6.91940553745258979390e-01, + -7.11055220593523329420e-01, 7.03136169789818077369e-01, + -6.99982367997418419847e-01, 7.14160125246941168697e-01, + -6.88738624756222161949e-01, 7.25009728740868553132e-01, + -6.77326735865867002317e-01, 7.35682331500009611958e-01, + -6.65749487360529190738e-01, 7.46175327975397872926e-01, + -6.54009705667427665432e-01, 7.56486156444917789976e-01, + -6.42110256878597240870e-01, 7.66612299673897656938e-01, + -6.30054046069779882799e-01, 7.76551285512489308793e-01, + -6.17844016641709514737e-01, 7.86300687459981162419e-01, + -6.05483149427811451204e-01, 7.95858125396090021475e-01, + -5.92974462184078454641e-01, 8.05221265986873269149e-01, + -5.80321008740226185196e-01, 8.14387823346301220617e-01, + -5.67525878248187232167e-01, 8.23355559596596009442e-01, + -5.54592194460652221366e-01, 8.32122285390385463266e-01, + -5.41523114921985349035e-01, 8.40685860476545809838e-01, + -5.28321830279222970361e-01, 8.49044194168013799384e-01, + -5.14991563445484024086e-01, 8.57195245892075630145e-01, + -5.01535568812419785267e-01, 8.65137025687840122146e-01, + -4.87957131464199334037e-01, 8.72867594686175807261e-01, + -4.74259566375507701785e-01, 8.80385065582847903265e-01, + -4.60446217616484521074e-01, 8.87687603091691812551e-01, + -4.46520457522199765155e-01, 8.94773424400929218159e-01, + -4.32485685857187662773e-01, 9.01640799614035870491e-01, + -4.18345329015600841949e-01, 9.08288052156819181171e-01, + -4.04102839158860249746e-01, 9.14713559199681003342e-01, + -3.89761693387688290535e-01, 9.20915752046603697245e-01, + -3.75325392887144893006e-01, 9.26893116521052995438e-01, + -3.60797462091837162212e-01, 9.32644193327814230443e-01, + -3.46181447754430826613e-01, 9.38167578437160809557e-01, + -3.31480918189186846146e-01, 9.43461923384538936332e-01, + -3.16699462305234713533e-01, 9.48525935636751693636e-01, + -3.01840688808588275549e-01, 9.53358378879399670502e-01, + -2.86908225223433177575e-01, 9.57958073351407035645e-01, + -2.71905717092672694069e-01, 9.62323896103759568454e-01, + -2.56836827106365517270e-01, 9.66454781271185447977e-01, + -2.41705234084330145006e-01, 9.70349720366960877271e-01, + -2.26514632188532627488e-01, 9.74007762497041795768e-01, + -2.11268729991721526673e-01, 9.77428014601425809715e-01, + -1.95971249573089145724e-01, 9.80609641672343546048e-01, + -1.80625925602857506647e-01, 9.83551866959801457391e-01, + -1.65236504388101917984e-01, 9.86253972168224413153e-01, + -1.49806743037279310737e-01, 9.88715297616337362996e-01, + -1.34340408538235145386e-01, 9.90935242401732363504e-01, + -1.18841276732320783038e-01, 9.92913264562737096774e-01, + -1.03313131549733094872e-01, 9.94648881188425981748e-01, + -8.77597639605576101962e-02, 9.96141668554020087711e-01, + -7.21849710624100776579e-02, 9.97391262219956997725e-01, + -5.65925552406322598942e-02, 9.98397357113557260000e-01, + -4.09863231473179684405e-02, 9.99159707611782965664e-01, + -2.53700848500082870585e-02, 9.99678127596429599855e-01, + -9.74765281333290004029e-03, 9.99952490503739244154e-01, + 5.87715899658624793545e-03, 9.99982729351926780126e-01, + 2.15005359577336609134e-02, 9.99768836758543000265e-01, + 3.71186638844091532086e-02, 9.99310864942154153390e-01, + 5.27277298119544560184e-02, 9.98608925710599448777e-01, + 6.83239230305028866219e-02, 9.97663190431380964007e-01, + 8.39034359259593492952e-02, 9.96473889994022088423e-01, + 9.94624650198910192911e-02, 9.95041314746361149624e-01, + 1.14997211772979862632e-01, 9.93365814433152527485e-01, + 1.30503883601530007441e-01, 9.91447798103822663940e-01, + 1.45978694798140268274e-01, 9.89287734011208397256e-01, + 1.61417867390196478894e-01, 9.86886149506213783411e-01, + 1.76817632086965909055e-01, 9.84243630908099076393e-01, + 1.92174229316510819521e-01, 9.81360823340021615202e-01, + 2.07483909972419000578e-01, 9.78238430599900898876e-01, + 2.22742936384479617296e-01, 9.74877214981876405453e-01, + 2.37947583337762752498e-01, 9.71277997065576714775e-01, + 2.53094138761417730699e-01, 9.67441655566172231673e-01, + 2.68178904913313143066e-01, 9.63369127053330553956e-01, + 2.83198199008898365836e-01, 9.59061405791160170864e-01, + 2.98148354355895761625e-01, 9.54519543432648220893e-01, + 3.13025720984674848957e-01, 9.49744648840952665481e-01, + 3.27826666953088208256e-01, 9.44737887688658850571e-01, + 3.42547578723123691269e-01, 9.39500482336717790410e-01, + 3.57184862422095295020e-01, 9.34033711413302714099e-01, + 3.71734944552921997563e-01, 9.28338909557407276907e-01, + 3.86194272922183889918e-01, 9.22417467073399333088e-01, + 4.00559317492650446280e-01, 9.16270829596698477282e-01, + 4.14826571255144882500e-01, 9.09900497736263580428e-01, + 4.28992551069135752417e-01, 9.03308026714694345394e-01, + 4.43053798493044215245e-01, 8.96495025998965022751e-01, + 4.57006880688855809947e-01, 8.89463158879018389591e-01, + 4.70848391227359996947e-01, 8.82214142075838037016e-01, + 4.84574950851524355322e-01, 8.74749745359918784438e-01, + 4.98183208470846405902e-01, 8.67071791028685923131e-01, + 5.11669841801385194557e-01, 8.59182153557058847504e-01, + 5.25031558273095666500e-01, 8.51082759088283347104e-01, + 5.38265095838926344030e-01, 8.42775584958125989488e-01, + 5.51367223674840811753e-01, 8.34262659272904327779e-01, + 5.64334743129053073574e-01, 8.25546060312485341370e-01, + 5.77164488339731551747e-01, 8.16627916127985353789e-01, + 5.89853327114563730227e-01, 8.07510403952716560028e-01, + 6.02398161667909270989e-01, 7.98195749687458211419e-01, + 6.14795929310800737255e-01, 7.88686227407876638829e-01, + 6.27043603440996633047e-01, 7.78984158621810585110e-01, + 6.39138193783907904155e-01, 7.69091912092854990135e-01, + 6.51076747732772576072e-01, 7.59011902779999636515e-01, + 6.62856350634406732425e-01, 7.48746591594002808279e-01, + 6.74474126652610750376e-01, 7.38298484676894073431e-01, + 6.85927239488512419108e-01, 7.27670132771483846312e-01, + 6.97212893028884672653e-01, 7.16864130637244967303e-01, + 7.08328332056515685977e-01, 7.05883116391116449684e-01, + 7.19270842858181325141e-01, 6.94729770928295131682e-01, + 7.30037753982098469585e-01, 6.83406817174640912604e-01, + 7.40626436901593243611e-01, 6.71917019402284765306e-01, + 7.51034306483622016160e-01, 6.60263182742052423535e-01, + 7.61258821807797358971e-01, 6.48448152298858992992e-01, + 7.71297486713702129535e-01, 6.36474812533164180373e-01, + 7.81147850424134593261e-01, 6.24346086539952493943e-01, + 7.90807508031525441261e-01, 6.12064935477412253029e-01, + 8.00274101326324149852e-01, 5.99634357543281759639e-01, + 8.09545319236430693799e-01, 5.87057387401253572001e-01, + 8.18618898249645288168e-01, 5.74337095640301553701e-01, + 8.27492623168671781464e-01, 5.61476587758947043305e-01, + 8.36164327654276950952e-01, 5.48479003388890884452e-01, + 8.44631894603476873762e-01, 5.35347515748920921297e-01, + 8.52893256793554432882e-01, 5.22085330684634363330e-01, + 8.60946397328884449607e-01, 5.08695685971892852528e-01, + 8.68789350153792105935e-01, 4.95181850494696151888e-01, + 8.76420200509378299891e-01, 4.81547123487516159912e-01, + 8.83837085454141080376e-01, 4.67794833635354845303e-01, + 8.91038194240763248288e-01, 4.53928338401734798868e-01, + 8.98021768869999070795e-01, 4.39951022996421692302e-01, + 9.04786104293555437650e-01, 4.25866300001880193626e-01, + 9.11329549200603827863e-01, 4.11677607787725330368e-01, + 9.17650506064666360295e-01, 3.97388410398770597354e-01, + 9.23747431723077494503e-01, 3.83002196318791732210e-01, + 9.29618837697029576361e-01, 3.68522477738907783262e-01, + 9.35263290560562010612e-01, 3.53952789690701208336e-01, + 9.40679412304837647696e-01, 3.39296689146571628370e-01, + 9.45865880661811875285e-01, 3.24557754182295044032e-01, + 9.50821429431150111355e-01, 3.09739583092804637854e-01, + 9.55544848784945832776e-01, 2.94845793526980815003e-01, + 9.60034985637467253028e-01, 2.79880021352128749434e-01, + 9.64290743580318188144e-01, 2.64845920952762714506e-01, + 9.68311083831862262628e-01, 2.49747162002622591359e-01, + 9.72095024823529496594e-01, 2.34587430851146777622e-01, + 9.75641642748477533331e-01, 2.19370428579268944569e-01, + 9.78950071770562035844e-01, 2.04099870113656461923e-01, + 9.82019504170923540620e-01, 1.88779483598969205493e-01, + 9.84849190595408208182e-01, 1.73413009268535894813e-01, + 9.87438440210647860873e-01, 1.58004198660550820854e-01, + 9.89786620894844482166e-01, 1.42556813578185059832e-01, + 9.91893159367450705233e-01, 1.27074625319365142051e-01, + 9.93757541353338824663e-01, 1.11561413595234956708e-01, + 9.95379311685924861308e-01, 9.60209657713066710993e-02, + 9.96758074438687136087e-01, 8.04570757688883031467e-02, + 9.97893492999770703733e-01, 6.48735433648924275651e-02, + 9.98785290176304019205e-01, 4.92741730264058125366e-02, + 9.99433248251151762354e-01, 3.36627730609296640929e-02, + 9.99837209032161888800e-01, 1.80431547900308138221e-02, + 9.99997073896832011641e-01, 2.41913161566219324719e-03, + 9.99912803818512774257e-01, -1.32054821873487954892e-02, + 9.99584419370001642235e-01, -2.88268720595330242562e-02, + 9.99012000721555049054e-01, -4.44412242666163900817e-02, + 9.98195687620527460915e-01, -6.00447267941353959864e-02, + 9.97135679355775073063e-01, -7.56335702958476074897e-02, + 9.95832234722008102779e-01, -9.12039488650100010902e-02, + 9.94285671925894676271e-01, -1.06752061351864019345e-01, + 9.92496368545773943737e-01, -1.22274111828511194977e-01, + 9.90464761404806215417e-01, -1.37766310886661608182e-01 +}; + +const double _TBL_sincosx[] = { + 1.64062500000167837966e-01, 1.79687499999472477530e-01, + 1.95312499999996669331e-01, 2.10937500000106192832e-01, + 2.26562499999874683576e-01, 2.42187499999999750200e-01, + 2.57812499999549193941e-01, 2.73437500000180466753e-01, + 2.89062500000347444296e-01, 3.04687500000159650071e-01, + 3.20312500001052657961e-01, 3.35937499999853450561e-01, + 3.51562499998759436792e-01, 3.67187499998127386824e-01, + 3.82812499999808708573e-01, 3.98437499999694078046e-01, + 4.14062499999775512904e-01, 4.29687499999869215728e-01, + 4.45312499999981514787e-01, 4.60937499992721433362e-01, + 4.76562499999932387418e-01, 4.92187500000263733479e-01, + 5.07812500002462252624e-01, 5.23437499998664290679e-01, + 5.39062500000133337785e-01, 5.54687499999937494444e-01, + 5.70312499999814259688e-01, 5.85937500002074562744e-01, + 6.01562499999652833260e-01, 6.17187499999419131314e-01, + 6.32812500000347721851e-01, 6.48437500005533351555e-01, + 6.64062499997531863194e-01, 6.79687499999813815599e-01, + 6.95312500005013212068e-01, 7.10937499999876987289e-01, + 7.26562500001548428052e-01, 7.42187500000339617223e-01, + 7.57812499998633315457e-01, 7.73437500000337285755e-01, + 7.89062499996497468402e-01, 8.04687500000179967152e-01, + 8.20312500001350475287e-01, 8.35937499996779354028e-01, + 8.51562500000668243239e-01, 8.67187499999485522650e-01, + 8.82812500000538014078e-01, 8.98437500000525690602e-01, + 9.14062500000757727214e-01, 9.29687500002357114504e-01, + 9.45312499999430455588e-01, 9.60937500000796696042e-01, + 9.76562500001389000026e-01, 9.92187499998313238159e-01, + 1.00781250000027000624e+00, 1.02343750000073119288e+00, + 1.03906249999567279474e+00, 1.05468750000121480603e+00, + 1.07031249999813948826e+00, 1.08593749999936250994e+00, + 1.10156249999885291757e+00, 1.11718750000074029671e+00, + 1.13281249999926680871e+00, 1.14843749999650057703e+00, + 1.16406249999956079577e+00, 1.17968749999995736744e+00, + 1.19531250000235189646e+00, 1.21093750000001554312e+00, + 1.22656249999714606069e+00, 1.24218750000679789558e+00, + 1.25781249999789324079e+00, 1.27343750000030864200e+00, + 1.28906250000041366910e+00, 1.30468750000013344881e+00, + 1.32031249999823008245e+00, 1.33593749999817146268e+00, + 1.35156249999504352033e+00, 1.36718750000051336713e+00, + 1.38281250000255573340e+00, 1.39843749999889488400e+00, + 1.41406250000066702199e+00, 1.42968750000377853304e+00, + 1.44531250000268074452e+00, 1.46093749999857935862e+00, + 1.47656250000000177636e+00, 1.49218750000007549517e+00, + 1.50781249999986965982e+00, 1.52343749999979238829e+00, + 1.53906250000026356695e+00, 1.55468750000024247271e+00, + 1.57031250000686006807e+00, 1.58593749999970379250e+00, + 1.60156249999876076906e+00, 1.61718749999920530236e+00, + 1.63281249999894950697e+00, 1.64843749999433342168e+00, + 1.66406250000158717484e+00, 1.67968749999775224246e+00, + 1.69531250000185917948e+00, 1.71093749999863442568e+00, + 1.72656249999789279670e+00, 1.74218750000263478128e+00, + 1.75781250000296740410e+00, 1.77343749999920641258e+00, + 1.78906249999844191301e+00, 1.80468749999888578017e+00, + 1.82031250003296385387e+00, 1.83593749999912847493e+00, + 1.85156249999896371783e+00, 1.86718749999873900869e+00, + 1.88281249999986122212e+00, 1.89843750000025601743e+00, + 1.91406250000089750429e+00, 1.92968749999936717288e+00, + 1.94531249999502553472e+00, 1.96093749999814637164e+00, + 1.97656250000163713487e+00, 1.99218750000058819616e+00, + 2.00781250000015099033e+00, 2.02343750000025890401e+00, + 2.03906249999571986820e+00, 2.05468749999347455315e+00, + 2.07031249999880184731e+00, 2.08593749999950617280e+00, + 2.10156249999859534583e+00, 2.11718749999749178414e+00, + 2.13281250000269562150e+00, 2.14843750000770983277e+00, + 2.16406250000204325445e+00, 2.17968750000288169488e+00, + 2.19531250000207567297e+00, 2.21093749999685940111e+00, + 2.22656249999882449586e+00, 2.24218750000040500936e+00, + 2.25781249999956967756e+00, 2.27343749999970867748e+00, + 2.28906249999833111275e+00, 2.30468749999696020936e+00, + 2.32031250000405675493e+00, 2.33593750000527755617e+00, + 2.35156250000277511347e+00, 2.36718749998901101250e+00, + 2.38281250000068833828e+00, 2.39843750000151390012e+00, + 2.41406250000618571860e+00, 2.42968749999278221807e+00, + 2.44531250000394617672e+00, 2.46093750000379341003e+00, + 2.47656250000329514194e+00, 2.49218749999781508109e+00, + 2.50781249999807354101e+00, 2.52343750000954214485e+00, + 2.53906250000098099306e+00, 2.55468750001107025582e+00, + 2.57031250000341415785e+00, 2.58593750002171240965e+00, + 2.60156250000635891340e+00, 2.61718750000451771953e+00, + 2.63281250000028421709e+00, 2.64843750001994493459e+00, + 2.66406250000455235849e+00, 2.67968749999316235844e+00, + 2.69531249997396704643e+00, 2.71093749999957500663e+00, + 2.72656249999638511383e+00, 2.74218749999314947985e+00, + 2.75781249999954258811e+00, 2.77343750000063726802e+00, + 2.78906249999834177089e+00, 2.80468750000019895197e+00, + 2.82031249999983835153e+00, 2.83593749999777511306e+00, + 2.85156249999855315735e+00, 2.86718750000235678144e+00, + 2.88281249999902611236e+00, 2.89843749999328359479e+00, + 2.91406250000365130148e+00, 2.92968749999994892974e+00, + 2.94531249999847322130e+00, 2.96093749999701350006e+00, + 2.97656250000468292072e+00, 2.99218750000308997272e+00, + 3.00781249999819877416e+00, 3.02343749999709299203e+00, + 3.03906249999948618878e+00, 3.05468750000752597984e+00, + 3.07031250000433075797e+00, 3.08593749999511279825e+00, + 3.10156249999957589480e+00, 3.11718749999961186603e+00, + 3.13281249999836441944e+00, 3.14843750000262057043e+00, + 3.16406249999657873673e+00, 3.17968750000540190115e+00, + 3.19531250000325739435e+00, 3.21093750000270583556e+00, + 3.22656250000035882408e+00, 3.24218749999618305324e+00, + 3.25781250000001199041e+00, 3.27343750000431255032e+00, + 3.28906249999634914261e+00, 3.30468749999773381276e+00, + 3.32031250000108801856e+00, 3.33593750000042854609e+00, + 3.35156249999819699781e+00, 3.36718749999951061369e+00, + 3.38281250000727817806e+00, 3.39843750000385558252e+00, + 3.41406250000184297022e+00, 3.42968750000183808524e+00, + 3.44531249999830135877e+00, 3.46093749998354383024e+00, + 3.47656249999984101606e+00, 3.49218750000081934459e+00, + 3.50781249999577759979e+00, 3.52343749999866640010e+00, + 3.53906249999683852892e+00, 3.55468750000498978636e+00, + 3.57031249999826005848e+00, 3.58593750001092637092e+00, + 3.60156250000782085507e+00, 3.61718749999987299049e+00, + 3.63281250000544186918e+00, 3.64843749999226352188e+00, + 3.66406250000062438943e+00, 3.67968749999757616109e+00, + 3.69531250001872235700e+00, 3.71093750000574784664e+00, + 3.72656249999563016218e+00, 3.74218749999581179466e+00, + 3.75781250000033528735e+00, 3.77343749999415045693e+00, + 3.78906249995283994636e+00, 3.80468750000592104143e+00, + 3.82031249998920063859e+00, 3.83593750000164934733e+00, + 3.85156250000057731597e+00, 3.86718750000405053768e+00, + 3.88281249997192157153e+00, 3.89843749998371702503e+00, + 3.91406249999986277643e+00, 3.92968749999597033451e+00, + 3.94531249999519229021e+00, 3.96093749997563104870e+00, + 3.97656250000223510099e+00, 3.99218750000022870594e+00, + 4.00781250004454392410e+00, 4.02343749999355093649e+00, + 4.03906249999698196973e+00, 4.05468749999250022142e+00, + 4.07031249994990851349e+00, 4.08593749999590372113e+00, + 4.10156249999066258027e+00, 4.11718749999303490483e+00, + 4.13281249999853184107e+00, 4.14843749998088373587e+00, + 4.16406249999834177089e+00, 4.17968749999662758654e+00, + 4.19531249999891109326e+00, 4.21093749999872102308e+00, + 4.22656249999120881000e+00, 4.24218750000338129524e+00, + 4.25781250000494537744e+00, 4.27343749997698019172e+00, + 4.28906250000330668826e+00, 4.30468749999959232611e+00, + 4.32031250000562039304e+00, 4.33593749999550670537e+00, + 4.35156250000948219281e+00, 4.36718750000763922259e+00, + 4.38281249999987476684e+00, 4.39843750000314237525e+00, + 4.41406250000408473255e+00, 4.42968750000079314333e+00, + 4.44531249998868371875e+00, 4.46093750000322319949e+00, + 4.47656249999480770896e+00, 4.49218749997964028609e+00, + 4.50781250000320810045e+00, 4.52343749999724753508e+00, + 4.53906249999181721222e+00, 4.55468750000258193467e+00, + 4.57031249999976196818e+00, 4.58593750000821920310e+00, + 4.60156250004601385939e+00, 4.61718750000444977388e+00, + 4.63281249999695177166e+00, 4.64843749999638600201e+00, + 4.66406250000544897460e+00, 4.67968749999663469197e+00, + 4.69531249998381028377e+00, 4.71093749999796340688e+00, + 4.72656250000119992905e+00, 4.74218750001258992910e+00, + 4.75781250000492050845e+00, 4.77343750000340971695e+00, + 4.78906250000747402140e+00, 4.80468749998990762862e+00, + 4.82031250001256594828e+00, 4.83593750000031530334e+00, + 4.85156250000026023628e+00, 4.86718750000094679820e+00, + 4.88281250000185362836e+00, 4.89843749997600141910e+00, + 4.91406249999471889112e+00, 4.92968749998860822359e+00, + 4.94531250000475353090e+00, 4.96093749999659205940e+00, + 4.97656250000856825721e+00, 4.99218750002637179364e+00, + 5.00781249999760014191e+00, 5.02343749998691091463e+00, + 5.03906249999699618058e+00, 5.05468750000537525580e+00, + 5.07031250000353406193e+00, 5.08593749999286881547e+00, + 5.10156249998831601289e+00, 5.11718750000479172257e+00, + 5.13281250001085087575e+00, 5.14843750000346744855e+00, + 5.16406250000581845683e+00, 5.17968750000350119933e+00, + 5.19531249999482636071e+00, 5.21093750000432454073e+00, + 5.22656250000434585701e+00, 5.24218750001077093970e+00, + 5.25781249998869881779e+00, 5.27343750002139977084e+00, + 5.28906249999702104958e+00, 5.30468749998945909851e+00, + 5.32031249999385913441e+00, 5.33593749999546851370e+00, + 5.35156250001908162517e+00, 5.36718749999724487054e+00, + 5.38281249999679634044e+00, 5.39843750001770139590e+00, + 5.41406249999678212959e+00, 5.42968749999906563630e+00, + 5.44531250000517097476e+00, 5.46093749999811794993e+00, + 5.47656250001082511858e+00, 5.49218749999457500621e+00, + 5.50781250001214406353e+00, 5.52343750001415045858e+00, + 5.53906250000498356911e+00, 5.55468750000498889818e+00, + 5.57031250000316013882e+00, 5.58593750000908428888e+00, + 5.60156250002763478335e+00, 5.61718749999503863535e+00, + 5.63281250000129496414e+00, 5.64843750001081890133e+00, + 5.66406250000738609174e+00, 5.67968750000023270275e+00, + 5.69531249998335997731e+00, 5.71093749999160404940e+00, + 5.72656250000217958984e+00, 5.74218750000474997819e+00, + 5.75781250000163868918e+00, 5.77343749999750688318e+00, + 5.78906249999925304195e+00, 5.80468749999988631316e+00, + 5.82031249999487254598e+00, 5.83593749999551025809e+00, + 5.85156249999513455862e+00, 5.86718749999803179662e+00, + 5.88281250000295141689e+00, 5.89843750000985433957e+00, + 5.91406249999845634591e+00, 5.92968750000455990801e+00, + 5.94531250000243982612e+00, 5.96093750000733901828e+00, + 5.97656249999234212567e+00, 5.99218749999141753193e+00, + 6.00781250000843591863e+00, 6.02343749999880984092e+00, + 6.03906249999745359247e+00, 6.05468750000370548037e+00, + 6.07031250001220445967e+00, 6.08593750001188915633e+00, + 6.10156249999290700714e+00, 6.11718749998957456171e+00, + 6.13281249999975663911e+00, 6.14843749999015098950e+00, + 6.16406250000358646446e+00, 6.17968750000026467717e+00, + 6.19531249998414246249e+00, 6.21093749998937294521e+00, + 6.22656249999281197205e+00, 6.24218750000707967018e+00, + 6.25781250000234834374e+00, 6.27343749999462829692e+00, + 6.28906250001052136156e+00, 6.30468750000171862524e+00, + 6.32031250000594013727e+00, 6.33593750000045385917e+00, + 6.35156250000499689179e+00, 6.36718749999230215764e+00, + 6.38281249999868105505e+00, 6.39843749999853628196e+00, + 6.41406249999377564563e+00, 6.42968750000876010375e+00, + 6.44531250002396838283e+00, 6.46093750000062527761e+00, + 6.47656249999929212180e+00, 6.49218750000642064180e+00, + 6.50781249999003996720e+00, 6.52343750000912248055e+00, + 6.53906249998720845440e+00, 6.55468749999868371958e+00, + 6.57031249998638067211e+00, 6.58593750000546407364e+00, + 6.60156249994729282804e+00, 6.61718749997319211076e+00, + 6.63281249997879296387e+00, 6.64843749999244426618e+00, + 6.66406249999900524017e+00, 6.67968749999884092716e+00, + 6.69531249999227373593e+00, 6.71093749999063504674e+00, + 6.72656249999940136775e+00, 6.74218749999563193853e+00, + 6.75781249999463895506e+00, 6.77343750001427569174e+00, + 6.78906249999704858311e+00, 6.80468750000215738538e+00, + 6.82031250000341859874e+00, 6.83593749999844302323e+00, + 6.85156250001598987609e+00, 6.86718750000203925765e+00, + 6.88281250000989430760e+00, 6.89843750000604671868e+00, + 6.91406249999750777135e+00, 6.92968749999960120789e+00, + 6.94531249995244071016e+00, 6.96093750002739852789e+00, + 6.97656249999430233544e+00, 6.99218749999911892701e+00, + 7.00781250000804245559e+00, 7.02343750000080380147e+00, + 7.03906249999665778461e+00, 7.05468749999575539533e+00, + 7.07031250001700328767e+00, 7.08593750000647215614e+00, + 7.10156249997034372257e+00, 7.11718749999698641062e+00, + 7.13281250000188915550e+00, 7.14843750000192734717e+00, + 7.16406250000996358551e+00, 7.17968750005667022407e+00, + 7.19531250000950883816e+00, 7.21093749995827248966e+00, + 7.22656250000638511466e+00, 7.24218750002578115499e+00, + 7.25781249999116351290e+00, 7.27343749999614619384e+00, + 7.28906249998626343256e+00, 7.30468749998397015588e+00, + 7.32031249998488320330e+00, 7.33593749999550048813e+00, + 7.35156249998663557932e+00, 7.36718750000183675297e+00, + 7.38281249999652722238e+00, 7.39843750007829115134e+00, + 7.41406250000048494542e+00, 7.42968750000089794838e+00, + 7.44531249999165467557e+00, 7.46093749999333422096e+00, + 7.47656250000219557705e+00, 7.49218750000104360964e+00, + 7.50781249999853983468e+00, 7.52343749999573585541e+00, + 7.53906249999752819946e+00, 7.55468749999868549594e+00, + 7.57031250024692781153e+00, 7.58593750000690736357e+00, + 7.60156249999365662973e+00, 7.61718749999451283372e+00, + 7.63281249995806998498e+00, 7.64843749997276400876e+00, + 7.66406249998022381931e+00, 7.67968750000013145041e+00, + 7.69531249999808597551e+00, 7.71093750001158539931e+00, + 7.72656249999979038989e+00, 7.74218750000620037355e+00, + 7.75781249999318234245e+00, 7.77343750001715427800e+00, + 7.78906250000142730272e+00, 7.80468749997501465288e+00, + 7.82031249999300381859e+00, 7.83593750003314948316e+00, + 7.85156249999927524641e+00, 7.86718749999776001403e+00, + 7.88281249999449951105e+00, 7.89843749999351540936e+00, + 7.91406250000050803806e+00, 7.92968750004068656523e+00, + 7.94531249999952127183e+00, 7.96093750001230571200e+00, + 7.97656249999947331020e+00, 7.99218750003934363946e+00 +}; diff --git a/usr/src/libm/src/C/_TBL_tan.c b/usr/src/libm/src/C/_TBL_tan.c new file mode 100644 index 0000000..1ad97b4 --- /dev/null +++ b/usr/src/libm/src/C/_TBL_tan.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_tan.c 1.9 06/01/31 SMI" + +#include "libm_protos.h" + +const double _TBL_tan_hi[] = { + 1.57534107325271622e-01, 1.61539784049521462e-01, 1.65550519273933966e-01, + 1.69566445219766521e-01, 1.73587694767981526e-01, 1.77614401477446726e-01, + 1.81646699603321415e-01, 1.85684724115634414e-01, 1.89728610718059132e-01, + 1.93778495866891859e-01, 1.97834516790238668e-01, 2.01896811507417145e-01, + 2.05965518848578860e-01, 2.10040778474558987e-01, 2.14122730896958657e-01, + 2.18211517498467428e-01, 2.22307280553431325e-01, 2.26410163248673829e-01, + 2.30520309704576154e-01, 2.34637864996423667e-01, 2.38762975176025932e-01, + 2.42895787293616550e-01, 2.47036449420041271e-01, 2.51185110669240763e-01, + 2.55341921221036272e-01, 2.63680596419996804e-01, 2.72053698658770882e-01, + 2.80462470145251386e-01, 2.88908172440514699e-01, 2.97392087269024608e-01, + 3.05915517353059274e-01, 3.14479787272571532e-01, 3.23086244351745544e-01, + 3.31736259573572778e-01, 3.40431228523830398e-01, 3.49172572365910372e-01, + 3.57961738848017019e-01, 3.66800203344323394e-01, 3.75689469931754838e-01, + 3.84631072504149241e-01, 3.93626575925632771e-01, 4.02677577225140193e-01, + 4.11785706834108478e-01, 4.20952629869475847e-01, 4.30180047464230053e-01, + 4.39469698147866239e-01, 4.48823359279239720e-01, 4.58242848534432368e-01, + 4.67730025452391784e-01, 4.77286793041252266e-01, 4.86915099448406330e-01, + 4.96616939697565651e-01, 5.06394357496229852e-01, 5.16249447117175131e-01, + 5.26184355357779188e-01, 5.36201283581215993e-01, 5.46302489843790484e-01, + 5.66767065580586427e-01, 5.87597367591443209e-01, 6.08813740324380737e-01, + 6.30437673835884782e-01, 6.52491897928808018e-01, 6.75000485144242934e-01, + 6.97988963623599301e-01, 7.21484440990904474e-01, 7.45515740559391960e-01, + 7.70113551344208669e-01, 7.95310593568674173e-01, 8.21141801589894138e-01, + 8.47644526446552637e-01, 8.74858760554482306e-01, 9.02827387452673547e-01, + 9.31596459944072475e-01, 9.61215510494370373e-01, 9.91737898363268644e-01, +}; +const double _TBL_tan_lo[] = { +-1.10615392752930551e-17, 1.42255435911932711e-17, 1.02781342487141920e-17, +-1.04735896510580927e-17,-5.46679990560150911e-18, 1.50201543247778489e-18, + 1.22522327805930836e-17,-2.52772423968968903e-18, 9.78955701743985001e-19, + 4.61515122717816178e-18, 7.14813042382104539e-19,-1.25529909642919992e-17, + 1.19416304006222131e-17,-5.91325462642753544e-18, 7.53213214053688138e-18, + 4.77223821731568090e-18, 6.32882137760769522e-18, 8.33823681661647871e-18, +-1.25419320906151988e-17, 1.16585041935775587e-17,-1.19653634178542542e-17, +-7.22806346068389604e-18,-6.16674472236513534e-18, 4.26199277415660669e-18, +-5.58935834356478328e-18,-4.56998635843850688e-18, 1.78004627511465564e-18, + 1.74249040881549088e-17, 2.70817328270223006e-17,-1.80870634839170844e-17, +-1.00676145758650168e-17,-1.53577462986005684e-17,-2.38939880909534397e-17, +-1.08193046058071237e-17,-1.06856311222117164e-17,-1.96951245902998606e-17, +-2.08660034657941102e-17, 2.82596474303348100e-17, 2.34797942068937341e-18, +-1.76131026613802985e-17,-1.29729310968305823e-17, 1.87495311063417555e-17, +-2.29163073231136327e-18,-2.51936954463539765e-17,-4.11327516430776285e-18, + 1.50393242431203736e-18,-1.09029595007501330e-17,-6.87284752683418342e-19, + 1.55195027932634982e-17,-4.62284921534513474e-18,-5.45294879014110259e-18, +-2.56576334605328725e-17,-4.00960685506800741e-17, 1.35860113023765056e-17, +-4.34857062258506890e-17, 3.85791583096984630e-17, 2.90965762168371759e-17, + 1.90815918857458480e-17, 1.21159907937263400e-17,-1.52112721227855650e-17, +-1.51838757657007437e-17,-2.51352280752587451e-17,-2.66690480643161193e-17, +-4.59728584599455591e-17,-5.42439848134543255e-17, 3.56284233494755594e-17, + 3.61475127591663133e-17, 1.22197541073075113e-17,-1.61356193051149559e-17, + 1.66243632690603545e-17, 4.30578558405427098e-17,-4.43234026650131250e-17, +-1.35473813965930355e-17, 4.30118334112910435e-17, 3.62593428168003066e-17, +}; diff --git a/usr/src/libm/src/C/__cos.c b/usr/src/libm/src/C/__cos.c new file mode 100644 index 0000000..3ce23e5 --- /dev/null +++ b/usr/src/libm/src/C/__cos.c @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__cos.c 1.10 06/01/23 SMI" + +/* INDENT OFF */ +/* + * __k_cos(double x; double y) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see __sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* + * |cos(x) - (1+q1*x^2+...+q4*x^8)| <= 2^-55.86 for |x| <= 0.1640625 (10.5/64) + */ +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +__k_cos(double x, double y) { + double z, w, s, v, p, q; + int i, j, n, hx, ix; + + hx = ((int *)&x)[HIWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) /* |x| < 2**-27 */ + if ((int)x == 0) + return (ONE); + z = x * x; + if (ix < 0x3f800000) /* |x| < 0.008 */ + q = z * (QQ1 + z * QQ2); + else + q = z * ((Q1 + z * Q2) + (z * z) * (Q3 + z * Q4)); + return (ONE + q); + } else { /* 0.164062500 < |x| < ~pi/4 */ + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (hx < 0) + v = -y - (_TBL_sincosx[j] + x); + else + v = y - (_TBL_sincosx[j] - x); + s = v * v; + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = s * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + p = v + v * p; + return (z - (w * p - z * q)); + } +} diff --git a/usr/src/libm/src/C/__lgamma.c b/usr/src/libm/src/C/__lgamma.c new file mode 100644 index 0000000..656c1cd --- /dev/null +++ b/usr/src/libm/src/C/__lgamma.c @@ -0,0 +1,268 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__lgamma.c 1.9 06/01/23 SMI" + +/* + * double __k_lgamma(double x, int *signgamp); + * + * K.C. Ng, March, 1989. + * + * Part of the algorithm is based on W. Cody's lgamma function. + */ + +#include "libm.h" + +static const double +one = 1.0, +zero = 0.0, +hln2pi = 0.9189385332046727417803297, /* log(2*pi)/2 */ +pi = 3.1415926535897932384626434, +two52 = 4503599627370496.0, /* 43300000,00000000 (used by sin_pi) */ +/* + * Numerator and denominator coefficients for rational minimax Approximation + * P/Q over (0.5,1.5). + */ +D1 = -5.772156649015328605195174e-1, +p7 = 4.945235359296727046734888e0, +p6 = 2.018112620856775083915565e2, +p5 = 2.290838373831346393026739e3, +p4 = 1.131967205903380828685045e4, +p3 = 2.855724635671635335736389e4, +p2 = 3.848496228443793359990269e4, +p1 = 2.637748787624195437963534e4, +p0 = 7.225813979700288197698961e3, +q7 = 6.748212550303777196073036e1, +q6 = 1.113332393857199323513008e3, +q5 = 7.738757056935398733233834e3, +q4 = 2.763987074403340708898585e4, +q3 = 5.499310206226157329794414e4, +q2 = 6.161122180066002127833352e4, +q1 = 3.635127591501940507276287e4, +q0 = 8.785536302431013170870835e3, +/* + * Numerator and denominator coefficients for rational minimax Approximation + * G/H over (1.5,4.0). + */ +D2 = 4.227843350984671393993777e-1, +g7 = 4.974607845568932035012064e0, +g6 = 5.424138599891070494101986e2, +g5 = 1.550693864978364947665077e4, +g4 = 1.847932904445632425417223e5, +g3 = 1.088204769468828767498470e6, +g2 = 3.338152967987029735917223e6, +g1 = 5.106661678927352456275255e6, +g0 = 3.074109054850539556250927e6, +h7 = 1.830328399370592604055942e2, +h6 = 7.765049321445005871323047e3, +h5 = 1.331903827966074194402448e5, +h4 = 1.136705821321969608938755e6, +h3 = 5.267964117437946917577538e6, +h2 = 1.346701454311101692290052e7, +h1 = 1.782736530353274213975932e7, +h0 = 9.533095591844353613395747e6, +/* + * Numerator and denominator coefficients for rational minimax Approximation + * U/V over (4.0,12.0). + */ +D4 = 1.791759469228055000094023e0, +u7 = 1.474502166059939948905062e4, +u6 = 2.426813369486704502836312e6, +u5 = 1.214755574045093227939592e8, +u4 = 2.663432449630976949898078e9, +u3 = 2.940378956634553899906876e10, +u2 = 1.702665737765398868392998e11, +u1 = 4.926125793377430887588120e11, +u0 = 5.606251856223951465078242e11, +v7 = 2.690530175870899333379843e3, +v6 = 6.393885654300092398984238e5, +v5 = 4.135599930241388052042842e7, +v4 = 1.120872109616147941376570e9, +v3 = 1.488613728678813811542398e10, +v2 = 1.016803586272438228077304e11, +v1 = 3.417476345507377132798597e11, +v0 = 4.463158187419713286462081e11, +/* + * Coefficients for minimax approximation over (12, INF). + */ +c5 = -1.910444077728e-03, +c4 = 8.4171387781295e-04, +c3 = -5.952379913043012e-04, +c2 = 7.93650793500350248e-04, +c1 = -2.777777777777681622553e-03, +c0 = 8.333333333333333331554247e-02, +c6 = 5.7083835261e-03; + +/* + * Return sin(pi*x). We assume x is finite and negative, and if it + * is an integer, then the sign of the zero returned doesn't matter. + */ +static double +sin_pi(double x) { + double y, z; + int n; + + y = -x; + if (y <= 0.25) + return (__k_sin(pi * x, 0.0)); + if (y >= two52) + return (zero); + z = floor(y); + if (y == z) + return (zero); + + /* argument reduction: set y = |x| mod 2 */ + y *= 0.5; + y = 2.0 * (y - floor(y)); + + /* now floor(y * 4) tells which octant y is in */ + n = (int)(y * 4.0); + switch (n) { + case 0: + y = __k_sin(pi * y, 0.0); + break; + case 1: + case 2: + y = __k_cos(pi * (0.5 - y), 0.0); + break; + case 3: + case 4: + y = __k_sin(pi * (1.0 - y), 0.0); + break; + case 5: + case 6: + y = -__k_cos(pi * (y - 1.5), 0.0); + break; + default: + y = __k_sin(pi * (y - 2.0), 0.0); + break; + } + return (-y); +} + +static double +neg(double z, int *signgamp) { + double t, p; + + /* + * written by K.C. Ng, Feb 2, 1989. + * + * Since + * -z*G(-z)*G(z) = pi/sin(pi*z), + * we have + * G(-z) = -pi/(sin(pi*z)*G(z)*z) + * = pi/(sin(pi*(-z))*G(z)*z) + * Algorithm + * z = |z| + * t = sin_pi(z); ...note that when z>2**52, z is an int + * and hence t=0. + * + * if(t==0.0) return 1.0/0.0; + * if(t< 0.0) *signgamp = -1; else t= -t; + * if(z+1.0==1.0) ...tiny z + * return -log(z); + * else + * return log(pi/(t*z))-__k_lgamma(z, signgamp); + */ + + t = sin_pi(z); /* t := sin(pi*z) */ + if (t == zero) /* return 1.0/0.0 = +INF */ + return (one / fabs(t)); + z = -z; + p = z + one; + if (p == one) + p = -log(z); + else + p = log(pi / (fabs(t) * z)) - __k_lgamma(z, signgamp); + if (t < zero) + *signgamp = -1; + return (p); +} + +double +__k_lgamma(double x, int *signgamp) { + double t, p, q, cr, y; + + /* purge off +-inf, NaN and negative arguments */ + if (!finite(x)) + return (x * x); + *signgamp = 1; + if (signbit(x)) + return (neg(x, signgamp)); + + /* lgamma(x) ~ log(1/x) for really tiny x */ + t = one + x; + if (t == one) { + if (x == zero) + return (one / x); + return (-log(x)); + } + + /* for tiny < x < inf */ + if (x <= 1.5) { + if (x < 0.6796875) { + cr = -log(x); + y = x; + } else { + cr = zero; + y = x - one; + } + + if (x <= 0.5 || x >= 0.6796875) { + if (x == one) + return (zero); + p = p0+y*(p1+y*(p2+y*(p3+y*(p4+y*(p5+y*(p6+y*p7)))))); + q = q0+y*(q1+y*(q2+y*(q3+y*(q4+y*(q5+y*(q6+y* + (q7+y))))))); + return (cr+y*(D1+y*(p/q))); + } else { + y = x - one; + p = g0+y*(g1+y*(g2+y*(g3+y*(g4+y*(g5+y*(g6+y*g7)))))); + q = h0+y*(h1+y*(h2+y*(h3+y*(h4+y*(h5+y*(h6+y* + (h7+y))))))); + return (cr+y*(D2+y*(p/q))); + } + } else if (x <= 4.0) { + if (x == 2.0) + return (zero); + y = x - 2.0; + p = g0+y*(g1+y*(g2+y*(g3+y*(g4+y*(g5+y*(g6+y*g7)))))); + q = h0+y*(h1+y*(h2+y*(h3+y*(h4+y*(h5+y*(h6+y*(h7+y))))))); + return (y*(D2+y*(p/q))); + } else if (x <= 12.0) { + y = x - 4.0; + p = u0+y*(u1+y*(u2+y*(u3+y*(u4+y*(u5+y*(u6+y*u7)))))); + q = v0+y*(v1+y*(v2+y*(v3+y*(v4+y*(v5+y*(v6+y*(v7-y))))))); + return (D4+y*(p/q)); + } else if (x <= 1.0e17) { /* x ~< 2**(prec+3) */ + t = one / x; + y = t * t; + p = hln2pi+t*(c0+y*(c1+y*(c2+y*(c3+y*(c4+y*(c5+y*c6)))))); + q = log(x); + return (x*(q-one)-(0.5*q-p)); + } else { /* may overflow */ + return (x * (log(x) - 1.0)); + } +} diff --git a/usr/src/libm/src/C/__libx_errno.c b/usr/src/libm/src/C/__libx_errno.c new file mode 100644 index 0000000..41d81db --- /dev/null +++ b/usr/src/libm/src/C/__libx_errno.c @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__libx_errno.c 1.14 06/01/25 SMI" + +extern int *___errno(void); + +int * +__libm_errno(void) { + return (___errno()); +} diff --git a/usr/src/libm/src/C/__rem_pio2.c b/usr/src/libm/src/C/__rem_pio2.c new file mode 100644 index 0000000..2862e74 --- /dev/null +++ b/usr/src/libm/src/C/__rem_pio2.c @@ -0,0 +1,167 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__rem_pio2.c 1.13 06/01/23 SMI" + +/* + * __rem_pio2(x, y) passes back a better-than-double-precision + * approximation to x mod pi/2 in y[0]+y[1] and returns an integer + * congruent mod 8 to the integer part of x/(pi/2). + * + * This implementation tacitly assumes that x is finite and at + * least about pi/4 in magnitude. + */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; + +/* INDENT OFF */ +/* + * invpio2: 53 bits of 2/pi + * pio2_1: first 33 bit of pi/2 + * pio2_1t: pi/2 - pio2_1 + * pio2_2: second 33 bit of pi/2 + * pio2_2t: pi/2 - pio2_2 + * pio2_3: third 33 bit of pi/2 + * pio2_3t: pi/2 - pio2_3 + */ +static const double + half = 0.5, + invpio2 = 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + pio2_1 = 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + pio2_1t = 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ + pio2_2 = 6.077100506303965976596e-11, /* 2^-34 * 1.0B4611A600000 */ + pio2_2t = 2.022266248795950732400e-21, /* 2^-69 * 1.3198A2E037073 */ + pio2_3 = 2.022266248711166455796e-21, /* 2^-69 * 1.3198A2E000000 */ + pio2_3t = 8.478427660368899643959e-32; /* 2^-104 * 1.B839A252049C1 */ +/* INDENT ON */ + +int +__rem_pio2(double x, double *y) { + double w, t, r, fn; + double tx[3]; + int e0, i, j, nx, n, ix, hx, lx; + + hx = ((int *)&x)[HIWORD]; + ix = hx & 0x7fffffff; + + if (ix < 0x4002d97c) { + /* |x| < 3pi/4, special case with n=1 */ + t = fabs(x) - pio2_1; + if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */ + y[0] = t - pio2_1t; + y[1] = (t - y[0]) - pio2_1t; + } else { /* near pi/2, use 33+33+53 bit pi */ + t -= pio2_2; + y[0] = t - pio2_2t; + y[1] = (t - y[0]) - pio2_2t; + } + if (hx < 0) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-1); + } + return (1); + } + + if (ix <= 0x413921fb) { + /* |x| <= 2^19 pi */ + t = fabs(x); + n = (int)(t * invpio2 + half); + fn = (double)n; + r = t - fn * pio2_1; + j = ix >> 20; + w = fn * pio2_1t; /* 1st round good to 85 bit */ + y[0] = r - w; + i = j - ((((int *)y)[HIWORD] >> 20) & 0x7ff); + if (i > 16) { /* 2nd iteration (rare) */ + /* 2nd round good to 118 bit */ + if (i < 35) { + t = r; /* r-fn*pio2_2 may not be exact */ + w = fn * pio2_2; + r = t - w; + w = fn * pio2_2t - ((t - r) - w); + y[0] = r - w; + } else { + r -= fn * pio2_2; + w = fn * pio2_2t; + y[0] = r - w; + i = j - ((((int *)y)[HIWORD] >> 20) & 0x7ff); + if (i > 49) { + /* 3rd iteration (extremely rare) */ + if (i < 68) { + t = r; + w = fn * pio2_3; + r = t - w; + w = fn * pio2_3t - + ((t - r) - w); + y[0] = r - w; + } else { + /* + * 3rd round good to 151 bits; + * covered all possible cases + */ + r -= fn * pio2_3; + w = fn * pio2_3t; + y[0] = r - w; + } + } + } + } + y[1] = (r - y[0]) - w; + if (hx < 0) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); + } + + e0 = (ix >> 20) - 1046; /* e0 = ilogb(x)-23; */ + + /* break x into three 24 bit pieces */ + lx = ((int *)&x)[LOWORD]; + i = (lx & 0x1f) << 19; + tx[2] = (double)i; + j = (lx >> 5) & 0xffffff; + tx[1] = (double)j; + tx[0] = (double)((((ix & 0xfffff) | 0x100000) << 3) | + ((unsigned)lx >> 29)); + nx = 3; + if (i == 0) { + /* skip zero term */ + nx--; + if (j == 0) + nx--; + } + n = __rem_pio2m(tx, y, e0, nx, 2, _TBL_ipio2_inf); + if (hx < 0) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); +} diff --git a/usr/src/libm/src/C/__rem_pio2m.c b/usr/src/libm/src/C/__rem_pio2m.c new file mode 100644 index 0000000..55d7aaf --- /dev/null +++ b/usr/src/libm/src/C/__rem_pio2m.c @@ -0,0 +1,362 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__rem_pio2m.c 1.19 06/01/23 SMI" + +/* + * int __rem_pio2m(x,y,e0,nx,prec,ipio2) + * double x[],y[]; int e0,nx,prec; const int ipio2[]; + * + * __rem_pio2m return the last three digits of N with + * y = x - N*pi/2 + * so that |y| < pi/4. + * + * The method is to compute the integer (mod 8) and fraction parts of + * (2/pi)*x without doing the full multiplication. In general we + * skip the part of the product that are known to be a huge integer ( + * more accurately, = 0 mod 8 ). Thus the number of operations are + * independent of the exponent of the input. + * + * (2/PI) is represented by an array of 24-bit integers in ipio2[]. + * Here PI could as well be a machine value pi. + * + * Input parameters: + * x[] The input value (must be positive) is broken into nx + * pieces of 24-bit integers in double precision format. + * x[i] will be the i-th 24 bit of x. The scaled exponent + * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 + * match x's up to 24 bits. + * + * Example of breaking a double z into x[0]+x[1]+x[2]: + * e0 = ilogb(z)-23 + * z = scalbn(z,-e0) + * for i = 0,1,2 + * x[i] = floor(z) + * z = (z-x[i])*2**24 + * + * + * y[] ouput result in an array of double precision numbers. + * The dimension of y[] is: + * 24-bit precision 1 + * 53-bit precision 2 + * 64-bit precision 2 + * 113-bit precision 3 + * The actual value is the sum of them. Thus for 113-bit + * precsion, one may have to do something like: + * + * long double t,w,r_head, r_tail; + * t = (long double)y[2] + (long double)y[1]; + * w = (long double)y[0]; + * r_head = t+w; + * r_tail = w - (r_head - t); + * + * e0 The exponent of x[0] + * + * nx dimension of x[] + * + * prec an interger indicating the precision: + * 0 24 bits (single) + * 1 53 bits (double) + * 2 64 bits (extended) + * 3 113 bits (quad) + * + * ipio2[] + * integer array, contains the (24*i)-th to (24*i+23)-th + * bit of 2/pi or 2/PI after binary point. The corresponding + * floating value is + * + * ipio2[i] * 2^(-24(i+1)). + * + * External function: + * double scalbn( ), floor( ); + * + * + * Here is the description of some local variables: + * + * jk jk+1 is the initial number of terms of ipio2[] needed + * in the computation. The recommended value is 3,4,4, + * 6 for single, double, extended,and quad. + * + * jz local integer variable indicating the number of + * terms of ipio2[] used. + * + * jx nx - 1 + * + * jv index for pointing to the suitable ipio2[] for the + * computation. In general, we want + * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 + * is an integer. Thus + * e0-3-24*jv >= 0 or (e0-3)/24 >= jv + * Hence jv = max(0,(e0-3)/24). + * + * jp jp+1 is the number of terms in pio2[] needed, jp = jk. + * + * q[] double array with integral value, representing the + * 24-bits chunk of the product of x and 2/pi. + * + * q0 the corresponding exponent of q[0]. Note that the + * exponent for q[i] would be q0-24*i. + * + * pio2[] double precision array, obtained by cutting pi/2 + * into 24 bits chunks. + * + * f[] ipio2[] in floating point + * + * iq[] integer array by breaking up q[] in 24-bits chunk. + * + * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] + * + * ih integer. If >0 it indicats q[] is >= 0.5, hence + * it also indicates the *sign* of the result. + * + */ + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const int init_jk[] = { 3, 4, 4, 6 }; /* initial value for jk */ + +static const double pio2[] = { + 1.57079625129699707031e+00, + 7.54978941586159635335e-08, + 5.39030252995776476554e-15, + 3.28200341580791294123e-22, + 1.27065575308067607349e-29, + 1.22933308981111328932e-36, + 2.73370053816464559624e-44, + 2.16741683877804819444e-51, +}; + +static const double + zero = 0.0, + one = 1.0, + half = 0.5, + eight = 8.0, + eighth = 0.125, + two24 = 16777216.0, + twon24 = 5.960464477539062500E-8; + +int +__rem_pio2m(double *x, double *y, int e0, int nx, int prec, const int *ipio2) +{ + int jz, jx, jv, jp, jk, carry, n, iq[20]; + int i, j, k, m, q0, ih; + double z, fw, f[20], fq[20], q[20]; +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + + /* initialize jk */ + jp = jk = init_jk[prec]; + + /* determine jx,jv,q0, note that 3>q0 */ + jx = nx - 1; + jv = (e0 - 3) / 24; + if (jv < 0) + jv = 0; + q0 = e0 - 24 * (jv + 1); + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + j = jv - jx; + m = jx + jk; + for (i = 0; i <= m; i++, j++) + f[i] = (j < 0)? zero : (double)ipio2[j]; + + /* compute q[0],q[1],...q[jk] */ + for (i = 0; i <= jk; i++) { + for (j = 0, fw = zero; j <= jx; j++) + fw += x[j] * f[jx+i-j]; + q[i] = fw; + } + + jz = jk; +recompute: + /* distill q[] into iq[] reversingly */ + for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { + fw = (double)((int)(twon24 * z)); + iq[i] = (int)(z - two24 * fw); + z = q[j-1] + fw; + } + + /* compute n */ + z = scalbn(z, q0); /* actual value of z */ + z -= eight * floor(z * eighth); /* trim off integer >= 8 */ + n = (int)z; + z -= (double)n; + ih = 0; + if (q0 > 0) { /* need iq[jz-1] to determine n */ + i = (iq[jz-1] >> (24 - q0)); + n += i; + iq[jz-1] -= i << (24 - q0); + ih = iq[jz-1] >> (23 - q0); + } else if (q0 == 0) { + ih = iq[jz-1] >> 23; + } else if (z >= half) { + ih = 2; + } + + if (ih > 0) { /* q > 0.5 */ + n += 1; + carry = 0; + for (i = 0; i < jz; i++) { /* compute 1-q */ + j = iq[i]; + if (carry == 0) { + if (j != 0) { + carry = 1; + iq[i] = 0x1000000 - j; + } + } else { + iq[i] = 0xffffff - j; + } + } + if (q0 > 0) { /* rare case: chance is 1 in 12 */ + switch (q0) { + case 1: + iq[jz-1] &= 0x7fffff; + break; + case 2: + iq[jz-1] &= 0x3fffff; + break; + } + } + if (ih == 2) { + z = one - z; + if (carry != 0) + z -= scalbn(one, q0); + } + } + + /* check if recomputation is needed */ + if (z == zero) { + j = 0; + for (i = jz - 1; i >= jk; i--) + j |= iq[i]; + if (j == 0) { /* need recomputation */ + /* set k to no. of terms needed */ + for (k = 1; iq[jk-k] == 0; k++) + ; + + /* add q[jz+1] to q[jz+k] */ + for (i = jz + 1; i <= jz + k; i++) { + f[jx+i] = (double)ipio2[jv+i]; + for (j = 0, fw = zero; j <= jx; j++) + fw += x[j] * f[jx+i-j]; + q[i] = fw; + } + jz += k; + goto recompute; + } + } + + /* cut out zero terms */ + if (z == zero) { + jz -= 1; + q0 -= 24; + while (iq[jz] == 0) { + jz--; + q0 -= 24; + } + } else { /* break z into 24-bit if neccessary */ + z = scalbn(z, -q0); + if (z >= two24) { + fw = (double)((int)(twon24 * z)); + iq[jz] = (int)(z - two24 * fw); + jz += 1; + q0 += 24; + iq[jz] = (int)fw; + } else { + iq[jz] = (int)z; + } + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(one, q0); + for (i = jz; i >= 0; i--) { + q[i] = fw * (double)iq[i]; + fw *= twon24; + } + + /* compute pio2[0,...,jp]*q[jz,...,0] */ + for (i = jz; i >= 0; i--) { + for (fw = zero, k = 0; k <= jp && k <= jz - i; k++) + fw += pio2[k] * q[i+k]; + fq[jz-i] = fw; + } + + /* compress fq[] into y[] */ + switch (prec) { + case 0: + fw = zero; + for (i = jz; i >= 0; i--) + fw += fq[i]; + y[0] = (ih == 0)? fw : -fw; + break; + + case 1: + case 2: + fw = zero; + for (i = jz; i >= 0; i--) + fw += fq[i]; + y[0] = (ih == 0)? fw : -fw; + fw = fq[0] - fw; + for (i = 1; i <= jz; i++) + fw += fq[i]; + y[1] = (ih == 0)? fw : -fw; + break; + + default: + for (i = jz; i > 0; i--) { + fw = fq[i-1] + fq[i]; + fq[i] += fq[i-1] - fw; + fq[i-1] = fw; + } + for (i = jz; i > 1; i--) { + fw = fq[i-1] + fq[i]; + fq[i] += fq[i-1] - fw; + fq[i-1] = fw; + } + for (fw = zero, i = jz; i >= 2; i--) + fw += fq[i]; + if (ih == 0) { + y[0] = fq[0]; + y[1] = fq[1]; + y[2] = fw; + } else { + y[0] = -fq[0]; + y[1] = -fq[1]; + y[2] = -fw; + } + } + +#if defined(__i386) && !defined(__amd64) + (void) __swapRP(rp); +#endif + return (n & 7); +} diff --git a/usr/src/libm/src/C/__sin.c b/usr/src/libm/src/C/__sin.c new file mode 100644 index 0000000..50fe96d --- /dev/null +++ b/usr/src/libm/src/C/__sin.c @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sin.c 1.10 06/01/23 SMI" + +/* INDENT OFF */ +/* + * __k_sin( double x; double y ) + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see __sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* + * |cos(x) - (1+q1*x^2+...+q4*x^8)| <= 2^-55.86 for |x| <= 0.1640625 (10.5/64) + */ +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +__k_sin(double x, double y) { + double z, w, s, v, p, q; + int i, j, n, hx, ix; + + hx = ((int *)&x)[HIWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) /* |x| < 2**-27 */ + if ((int)x == 0) + return (x + y); + z = x * x; + if (ix < 0x3f800000) /* |x| < 0.008 */ + p = (x * z) * (PP1 + z * PP2) + y; + else + p = (x * z) * ((P1 + z * P2) + (z * z) * (P3 + + z * P4)) + y; + return (x + p); + } else { /* 0.164062500 < |x| < ~pi/4 */ + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (hx < 0) + v = -y - (_TBL_sincosx[j] + x); + else + v = y - (_TBL_sincosx[j] - x); + s = v * v; + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = s * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + p = v + v * p; + s = w * q + z * p; + return ((hx >= 0)? w + s : -(w + s)); + } +} diff --git a/usr/src/libm/src/C/__sincos.c b/usr/src/libm/src/C/__sincos.c new file mode 100644 index 0000000..d6ced7e --- /dev/null +++ b/usr/src/libm/src/C/__sincos.c @@ -0,0 +1,163 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sincos.c 1.15 06/01/23 SMI" + +/* INDENT OFF */ +/* + * double __k_sincos(double x, double y, double *c); + * kernel sincos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * return sin(x) with *c = cos(x) + * + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * 1. Reduce x to x>0 by sin(-x)=-sin(x),cos(-x)=cos(x). + * 2. For 0<= x < pi/4, let i = (64*x chopped)-10. Let d = x - a[i], where + * a[i] is a double that is close to (i+10.5)/64 and such that + * sin(a[i]) and cos(a[i]) is close to a double (with error less + * than 2**-8 ulp). Then + * cos(x) = cos(a[i]+d) = cos(a[i])cos(d) - sin(a[i])*sin(d) + * = TBL_cos_a[i]*(1+QQ1*d^2+QQ2*d^4) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_cos_a[i] + (TBL_cos_a[i]*d^2*(QQ1+QQ2*d^2) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5)) + * sin(x) = sin(a[i]+d) = sin(a[i])cos(d) + cos(a[i])*sin(d) + * = TBL_sin_a[i]*(1+QQ1*d^2+QQ2*d^4) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_sin_a[i] + (TBL_sin_a[i]*d^2*(QQ1+QQ2*d^2) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5)) + * + * For |y| less than 10.5/64 = 0.1640625, use + * sin(y) = y + y^3*(p1+y^2*(p2+y^2*(p3+y^2*p4))) + * cos(y) = 1 + y^2*(q1+y^2*(q2+y^2*(q3+y^2*q4))) + * + * For |y| less than 0.008, use + * sin(y) = y + y^3*(pp1+y^2*pp2) + * cos(y) = 1 + y^2*(qq1+y^2*qq2) + * + * Accuracy: + * TRIG(x) returns trig(x) nearly rounded (less than 1 ulp) + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* + * |cos(x) - (1+q1*x^2+...+q4*x^8)| <= 2^-55.86 for |x| <= 0.1640625 (10.5/64) + */ +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +__k_sincos(double x, double y, double *c) { + double z, w, s, v, p, q; + int i, j, n, hx, ix; + + hx = ((int *)&x)[HIWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + if ((int)x == 0) + *c = ONE; + return (x + y); + } else { + z = x * x; + if (ix < 0x3f800000) { /* |x| < 0.008 */ + q = z * (QQ1 + z * QQ2); + p = (x * z) * (PP1 + z * PP2) + y; + } else { + q = z * ((Q1 + z * Q2) + (z * z) * (Q3 + + z * Q4)); + p = (x * z) * ((P1 + z * P2) + (z * z) * (P3 + + z * P4)) + y; + } + *c = ONE + q; + return (x + p); + } + } else { /* 0.164062500 < |x| < ~pi/4 */ + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (hx < 0) + v = -y - (_TBL_sincosx[j] + x); + else + v = y - (_TBL_sincosx[j] - x); + s = v * v; + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = s * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + p = v + v * p; + *c = z - (w * p - z * q); + s = w * q + z * p; + return ((hx >= 0)? w + s : -(w + s)); + } +} diff --git a/usr/src/libm/src/C/__tan.c b/usr/src/libm/src/C/__tan.c new file mode 100644 index 0000000..d9697e2 --- /dev/null +++ b/usr/src/libm/src/C/__tan.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__tan.c 1.15 06/01/31 SMI" + +/* INDENT OFF */ +/* + * __k_tan( double x; double y; int k ) + * kernel tan/cotan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicate -- tan if k=0; else -1/tan + * + * Table look up algorithm + * 1. by tan(-x) = -tan(x), need only to consider positive x + * 2. if x < 5/32 = [0x3fc40000, 0] = 0.15625 , then + * if x < 2^-27 (hx < 0x3e400000 0), set w=x with inexact if x!= 0 + * else + * z = x*x; + * w = x + (y+(x*z)*(t1+z*(t2+z*(t3+z*(t4+z*(t5+z*t6)))))) + * return (k==0)? w: 1/w; + * 3. else + * ht = (hx + 0x4000)&0x7fff8000 (round x to a break point t) + * lt = 0 + * i = (hy-0x3fc40000)>>15; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7) + * By + * tan(t+x') + * = (tan(t)+tan(x'))/(1-tan(x')tan(t)) + * We have + * sin(x')+tan(t)*(tan(t)*sin(x')) + * = tan(t) + ------------------------------- for k=0 + * cos(x') - tan(t)*sin(x') + * + * cos(x') - tan(t)*sin(x') + * = - -------------------------------------- for k=1 + * tan(t) + tan(t)*(cos(x')-1) + sin(x') + * + * + * where tan(t) is from the table, + * sin(x') = x + pp1*x^3 + pp2*x^5 + * cos(x') = 1 + qq1*x^2 + qq2*x^4 + */ + +#include "libm.h" + +extern const double _TBL_tan_hi[], _TBL_tan_lo[]; +static const double q[] = { +/* one = */ 1.0, +/* + * 2 2 -59.56 + * |sin(x) - pp1*x*(pp2+x *(pp3+x )| <= 2 for |x|<1/64 + */ +/* pp1 = */ 8.33326120969096230395312119298978359438478946686e-0003, +/* pp2 = */ 1.20001038589438965215025680596868692381425944526e+0002, +/* pp3 = */ -2.00001730975089451192161504877731204032897949219e+0001, + +/* + * 2 2 -56.19 + * |cos(x) - (1+qq1*x (qq2+x ))| <= 2 for |x|<=1/128 + */ +/* qq1 = */ 4.16665486385721928197511942926212213933467864990e-0002, +/* qq2 = */ -1.20000339921340035687080671777948737144470214844e+0001, + +/* + * |tan(x) - PF(x)| + * |--------------| <= 2^-58.57 for |x|<0.15625 + * | x | + * + * where (let z = x*x) + * PF(x) = x + (t1*x*z)(t2 + z(t3 + z))(t4 + z)(t5 + z(t6 + z)) + */ +/* t1 = */ 3.71923358986516816929168705030406272271648049355e-0003, +/* t2 = */ 6.02645120354857866118436504621058702468872070312e+0000, +/* t3 = */ 2.42627327587398156083509093150496482849121093750e+0000, +/* t4 = */ 2.44968983934252770851003333518747240304946899414e+0000, +/* t5 = */ 6.07089252571767978849948121933266520500183105469e+0000, +/* t6 = */ -2.49403756995593761658369658107403665781021118164e+0000, +}; + + +#define one q[0] +#define pp1 q[1] +#define pp2 q[2] +#define pp3 q[3] +#define qq1 q[4] +#define qq2 q[5] +#define t1 q[6] +#define t2 q[7] +#define t3 q[8] +#define t4 q[9] +#define t5 q[10] +#define t6 q[11] + +/* INDENT ON */ + + +double +__k_tan(double x, double y, int k) { + double a, t, z, w, s, c, r, rh, xh, xl; + int i, j, hx, ix; + + t = one; + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix < 0x3fc40000) { + if (ix < 0x3e400000) { + if ((i = (int) x) == 0) /* generate inexact */ + w = x; + t = y; + } else { + z = x * x; + t = y + (((t1 * x) * z) * (t2 + z * (t3 + z))) * + ((t4 + z) * (t5 + z * (t6 + z))); + w = x + t; + } + if (k == 0) + return (w); + /* + * Compute -1/(x+T) with great care + * Let r = -1/(x+T), rh = r chopped to 20 bits. + * Also let xh = x+T chopped to 20 bits, xl = (x-xh)+T. Then + * -1/(x+T) = rh + (-1/(x+T)-rh) = rh + r*(1+rh*(x+T)) + * = rh + r*((1+rh*xh)+rh*xl). + */ + rh = r = -one / w; + ((int *) &rh)[LOWORD] = 0; + xh = w; + ((int *) &xh)[LOWORD] = 0; + xl = (x - xh) + t; + return (rh + r * ((one + rh * xh) + rh * xl)); + } + j = (ix + 0x4000) & 0x7fff8000; + i = (j - 0x3fc40000) >> 15; + ((int *) &t)[HIWORD] = j; + if (hx > 0) + x = y - (t - x); + else + x = -y - (t + x); + a = _TBL_tan_hi[i]; + z = x * x; + s = (pp1 * x) * (pp2 + z * (pp3 + z)); /* sin(x) */ + t = (qq1 * z) * (qq2 + z); /* cos(x) - 1 */ + if (k == 0) { + w = a * s; + t = _TBL_tan_lo[i] + (s + a * w) / (one - (w - t)); + return (hx < 0 ? -a - t : a + t); + } else { + w = s + a * t; + c = w + _TBL_tan_lo[i]; + t = a * s - t; + /* + * Now try to compute [(1-T)/(a+c)] accurately + * + * Let r = 1/(a+c), rh = (1-T)*r chopped to 20 bits. + * Also let xh = a+c chopped to 20 bits, xl = (a-xh)+c. Then + * (1-T)/(a+c) = rh + ((1-T)/(a+c)-rh) + * = rh + r*(1-T-rh*(a+c)) + * = rh + r*((1-T-rh*xh)-rh*xl) + * = rh + r*(((1-rh*xh)-T)-rh*xl) + */ + r = one / (a + c); + rh = (one - t) * r; + ((int *) &rh)[LOWORD] = 0; + xh = a + c; + ((int *) &xh)[LOWORD] = 0; + xl = (a - xh) + c; + z = rh + r * (((one - rh * xh) - t) - rh * xl); + return (hx >= 0 ? -z : z); + } +} diff --git a/usr/src/libm/src/C/__xpg6.c b/usr/src/libm/src/C/__xpg6.c new file mode 100644 index 0000000..5cc8f69 --- /dev/null +++ b/usr/src/libm/src/C/__xpg6.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__xpg6.c 1.3 06/01/31 SMI (lib/libc/port/gen/xpg6.c 1.1 02/09/24)" + +/*LINTLIBRARY*/ + +/* + * See /ws/unix200x-gate/usr/src/lib/libc/port/gen/xpg6.c for libc default. + * __xpg6 (C99/SUSv3) is first included in Solaris 10 libc and libm + * as well as the K2 (S1S8) libsunmath and libmopt. + * + * The default setting, _C99SUSv3_mode_OFF, means to retain current Solaris + * behavior which is NOT C99/SUSv3 compliant. This is normal. These libraries + * determine which standard to use based on how applications are built. These + * libraries at runtime determine which behavior to choose based on the value + * of __xpg6. By default they retain their original Solaris behavior. + * + * __xpg6 is used to control certain behaviors between the C99 standard, the + * SUSv3 standard, and Solaris. More explanation in lib/libc/inc/xpg6.h. + * The XPG6 C compiler utility (c99) will add an object file that contains + * an alternate definition for __xpg6. The symbol interposition provided + * by the linker will allow these libraries to find that symbol instead. + * + * Possible settings are available and documented in lib/libc/inc/xpg6.h. + */ + +#include "xpg6.h" + +unsigned int __xpg6 = _C99SUSv3_mode_OFF; diff --git a/usr/src/libm/src/C/_lib_version.c b/usr/src/libm/src/C/_lib_version.c new file mode 100644 index 0000000..3ee4d60 --- /dev/null +++ b/usr/src/libm/src/C/_lib_version.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_lib_version.c 1.8 06/01/31 SMI" + +/* + * values-{X,x}?.o should define + initialize an *actual* symbol _lib_version. + */ + +#include + +#pragma weak _lib_version = __libm_lib_version + +const enum version __libm_lib_version = libm_ieee; diff --git a/usr/src/libm/src/C/acos.c b/usr/src/libm/src/C/acos.c new file mode 100644 index 0000000..7f7d18d --- /dev/null +++ b/usr/src/libm/src/C/acos.c @@ -0,0 +1,162 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acos.c 1.18 06/01/31 SMI" + +#pragma weak acos = __acos + +/* INDENT OFF */ +/* acos(x) + * Method : + * acos(x) = pi/2 - asin(x) + * acos(-x) = pi/2 + asin(x) + * For |x|<=0.5 + * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) + * For x>0.5 + * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) + * = 2asin(sqrt((1-x)/2)) + * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) + * = 2f + (2c + 2s*z*R(z)) + * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term + * for f so that f+c ~ sqrt(z). + * For x<-0.5 + * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) + * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + * Function needed: sqrt + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __acos, __sqrt, __isnan */ +#include "libm_protos.h" /* _SVID_libm_error */ +#include + +/* INDENT OFF */ +static const double xxx[] = { +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* pi */ 3.14159265358979311600e+00, /* 400921FB, 54442D18 */ +/* pio2_hi */ 1.57079632679489655800e+00, /* 3FF921FB, 54442D18 */ +/* pio2_lo */ 6.12323399573676603587e-17, /* 3C91A626, 33145C07 */ +/* pS0 */ 1.66666666666666657415e-01, /* 3FC55555, 55555555 */ +/* pS1 */ -3.25565818622400915405e-01, /* BFD4D612, 03EB6F7D */ +/* pS2 */ 2.01212532134862925881e-01, /* 3FC9C155, 0E884455 */ +/* pS3 */ -4.00555345006794114027e-02, /* BFA48228, B5688F3B */ +/* pS4 */ 7.91534994289814532176e-04, /* 3F49EFE0, 7501B288 */ +/* pS5 */ 3.47933107596021167570e-05, /* 3F023DE1, 0DFDF709 */ +/* qS1 */ -2.40339491173441421878e+00, /* C0033A27, 1C8A2D4B */ +/* qS2 */ 2.02094576023350569471e+00, /* 40002AE5, 9C598AC8 */ +/* qS3 */ -6.88283971605453293030e-01, /* BFE6066C, 1B8D0159 */ +/* qS4 */ 7.70381505559019352791e-02 /* 3FB3B8C5, B12E9282 */ +}; +#define one xxx[0] +#define pi xxx[1] +#define pio2_hi xxx[2] +#define pio2_lo xxx[3] +#define pS0 xxx[4] +#define pS1 xxx[5] +#define pS2 xxx[6] +#define pS3 xxx[7] +#define pS4 xxx[8] +#define pS5 xxx[9] +#define qS1 xxx[10] +#define qS2 xxx[11] +#define qS3 xxx[12] +#define qS4 xxx[13] +/* INDENT ON */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +acos(double x) { + double z, p, q, r, w, s, c, df; + int hx, ix; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x3ff00000) { /* |x| >= 1 */ + if (((ix - 0x3ff00000) | ((int *) &x)[LOWORD]) == 0) { + /* |x| == 1 */ + if (hx > 0) /* acos(1) = 0 */ + return 0.0; + else /* acos(-1) = pi */ + return pi + 2.0 * pio2_lo; + } + else if (isnan(x)) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ix >= 0x7ff80000 ? x : (x - x) / (x - x); + /* assumes sparc-like QNaN */ +#else + return (x - x) / (x - x); /* acos(|x|>1) is NaN */ +#endif + else + return _SVID_libm_err(x, x, 1); + } + if (ix < 0x3fe00000) { /* |x| < 0.5 */ + if (ix <= 0x3c600000) + return pio2_hi + pio2_lo; /* if |x| < 2**-57 */ + z = x * x; + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + + z * (pS4 + z * pS5))))); + q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); + r = p / q; + return pio2_hi - (x - (pio2_lo - x * r)); + } + else if (hx < 0) { /* x < -0.5 */ + z = (one + x) * 0.5; + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + + z * (pS4 + z * pS5))))); + q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); + s = sqrt(z); + r = p / q; + w = r * s - pio2_lo; + return pi - 2.0 * (s + w); + } + else { /* x > 0.5 */ + z = (one - x) * 0.5; + s = sqrt(z); + df = s; + ((int *) &df)[LOWORD] = 0; + c = (z - df * df) / (s + df); + p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + + z * (pS4 + z * pS5))))); + q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); + r = p / q; + w = r * s + c; + return 2.0 * (df + w); + } +} diff --git a/usr/src/libm/src/C/acosh.c b/usr/src/libm/src/C/acosh.c new file mode 100644 index 0000000..4e84b93 --- /dev/null +++ b/usr/src/libm/src/C/acosh.c @@ -0,0 +1,105 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acosh.c 1.19 06/01/31 SMI" + +#pragma weak acosh = __acosh + +/* INDENT OFF */ +/* acosh(x) + * Method : + * Based on + * acosh(x) = log [ x + sqrt(x*x-1) ] + * we have + * acosh(x) := log(x)+ln2, if x is large; else + * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x > 2; else + * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t = x-1. + * + * Special cases: + * acosh(x) is NaN with signal if x < 1. + * acosh(NaN) is NaN without signal. + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __acosh, __log, __log1p */ +#include "libm_protos.h" /* _SVID_libm_error */ +#include + +static const double + one = 1.0, + ln2 = 6.93147180559945286227e-01; /* 3FE62E42, FEFA39EF */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +acosh(double x) { + double t; + int hx; + + hx = ((int *) &x)[HIWORD]; + if (hx < 0x3ff00000) { /* x < 1 */ + if (isnan(x)) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return hx >= 0xfff80000 ? x : (x - x) / (x - x); + /* assumes sparc-like QNaN */ +#else + return (x - x) / (x - x); +#endif + else + return _SVID_libm_err(x, x, 29); + } + else if (hx >= 0x41b00000) { /* x > 2**28 */ + if (hx >= 0x7ff00000) { /* x is inf of NaN */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return hx >= 0x7ff80000 ? x : x + x; + /* assumes sparc-like QNaN */ +#else + return x + x; +#endif + } + else /* acosh(huge)=log(2x) */ + return log(x) + ln2; + } + else if (((hx - 0x3ff00000) | ((int *) &x)[LOWORD]) == 0) { + return 0.0; /* acosh(1) = 0 */ + } + else if (hx > 0x40000000) { /* 2**28 > x > 2 */ + t = x * x; + return log(2.0 * x - one / (x + sqrt(t - one))); + } + else { /* 1 < x < 2 */ + t = x - one; + return log1p(t + sqrt(2.0 * t + t * t)); + } +} diff --git a/usr/src/libm/src/C/asin.c b/usr/src/libm/src/C/asin.c new file mode 100644 index 0000000..222bf87 --- /dev/null +++ b/usr/src/libm/src/C/asin.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asin.c 1.20 06/01/31 SMI" + +#pragma weak asin = __asin + +/* INDENT OFF */ +/* asin(x) + * Method : + * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we approximate asin(x) on [0,0.5] by + * asin(x) = x + x*x^2*R(x^2) + * where + * R(x^2) is a rational approximation of (asin(x)-x)/x^3 + * and its remez error is bounded by + * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) + * + * For x in [0.5,1] + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; + * then for x>0.98 + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) + * For x<=0.98, let pio4_hi = pio2_hi/2, then + * f = hi part of s; + * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) + * and + * asin(x) = pi/2 - 2*(s+s*z*R(z)) + * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) + * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + * + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __asin, __sqrt, __isnan */ +#include "libm_protos.h" /* _SVID_libm_error */ +#include + +/* INDENT OFF */ +static const double xxx[] = { +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* huge */ 1.000e+300, +/* pio2_hi */ 1.57079632679489655800e+00, /* 3FF921FB, 54442D18 */ +/* pio2_lo */ 6.12323399573676603587e-17, /* 3C91A626, 33145C07 */ +/* pio4_hi */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */ +/* coefficient for R(x^2) */ +/* pS0 */ 1.66666666666666657415e-01, /* 3FC55555, 55555555 */ +/* pS1 */ -3.25565818622400915405e-01, /* BFD4D612, 03EB6F7D */ +/* pS2 */ 2.01212532134862925881e-01, /* 3FC9C155, 0E884455 */ +/* pS3 */ -4.00555345006794114027e-02, /* BFA48228, B5688F3B */ +/* pS4 */ 7.91534994289814532176e-04, /* 3F49EFE0, 7501B288 */ +/* pS5 */ 3.47933107596021167570e-05, /* 3F023DE1, 0DFDF709 */ +/* qS1 */ -2.40339491173441421878e+00, /* C0033A27, 1C8A2D4B */ +/* qS2 */ 2.02094576023350569471e+00, /* 40002AE5, 9C598AC8 */ +/* qS3 */ -6.88283971605453293030e-01, /* BFE6066C, 1B8D0159 */ +/* qS4 */ 7.70381505559019352791e-02 /* 3FB3B8C5, B12E9282 */ +}; +#define one xxx[0] +#define huge xxx[1] +#define pio2_hi xxx[2] +#define pio2_lo xxx[3] +#define pio4_hi xxx[4] +#define pS0 xxx[5] +#define pS1 xxx[6] +#define pS2 xxx[7] +#define pS3 xxx[8] +#define pS4 xxx[9] +#define pS5 xxx[10] +#define qS1 xxx[11] +#define qS2 xxx[12] +#define qS3 xxx[13] +#define qS4 xxx[14] +/* INDENT ON */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +asin(double x) { + double t, w, p, q, c, r, s; + int hx, ix; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x3ff00000) { /* |x| >= 1 */ + if (((ix - 0x3ff00000) | ((int *) &x)[LOWORD]) == 0) + /* asin(1)=+-pi/2 with inexact */ + return x * pio2_hi + x * pio2_lo; + else if (isnan(x)) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ix >= 0x7ff80000 ? x : (x - x) / (x - x); + /* assumes sparc-like QNaN */ +#else + return (x - x) / (x - x); /* asin(|x|>1) is NaN */ +#endif + else + return _SVID_libm_err(x, x, 2); + } + else if (ix < 0x3fe00000) { /* |x| < 0.5 */ + if (ix < 0x3e400000) { /* if |x| < 2**-27 */ + if (huge + x > one) + return x; /* return x with inexact if + * x != 0 */ + } + else + t = x * x; + p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + + t * (pS4 + t * pS5))))); + q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4))); + w = p / q; + return x + x * w; + } + /* 1 > |x| >= 0.5 */ + w = one - fabs(x); + t = w * 0.5; + p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5))))); + q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4))); + s = sqrt(t); + if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */ + w = p / q; + t = pio2_hi - (2.0 * (s + s * w) - pio2_lo); + } + else { + w = s; + ((int *) &w)[LOWORD] = 0; + c = (t - w * w) / (s + w); + r = p / q; + p = 2.0 * s * r - (pio2_lo - 2.0 * c); + q = pio4_hi - 2.0 * w; + t = pio4_hi - (p - q); + } + return hx > 0 ? t : -t; +} diff --git a/usr/src/libm/src/C/asinh.c b/usr/src/libm/src/C/asinh.c new file mode 100644 index 0000000..29301ba --- /dev/null +++ b/usr/src/libm/src/C/asinh.c @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asinh.c 1.18 06/01/31 SMI" + +#pragma weak asinh = __asinh + +/* INDENT OFF */ +/* asinh(x) + * Method : + * Based on + * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] + * we have + * asinh(x) := x if 1+x*x == 1, + * := sign(x)*(log(x)+ln2)) for large |x|, else + * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x| > 2, else + * := sign(x)*log1p(|x|+x^2/(1+sqrt(1+x^2))) + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __asinh */ +#include + +static const double xxx[] = { +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* ln2 */ 6.93147180559945286227e-01, /* 3FE62E42, FEFA39EF */ +/* huge */ 1.00000000000000000000e+300 +}; +#define one xxx[0] +#define ln2 xxx[1] +#define huge xxx[2] + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +asinh(double x) { + double t, w; + int hx, ix; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ix >= 0x7ff80000 ? x : x + x; + /* assumes sparc-like QNaN */ +#else + return x + x; /* x is inf or NaN */ +#endif + if (ix < 0x3e300000) { /* |x|<2**-28 */ + if (huge + x > one) + return x; /* return x inexact except 0 */ + } + if (ix > 0x41b00000) { /* |x| > 2**28 */ + w = log(fabs(x)) + ln2; + } + else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */ + t = fabs(x); + w = log(2.0 * t + one / (sqrt(x * x + one) + t)); + } + else { /* 2.0 > |x| > 2**-28 */ + t = x * x; + w = log1p(fabs(x) + t / (one + sqrt(one + t))); + } + return hx > 0 ? w : -w; +} diff --git a/usr/src/libm/src/C/atan.c b/usr/src/libm/src/C/atan.c new file mode 100644 index 0000000..a398953 --- /dev/null +++ b/usr/src/libm/src/C/atan.c @@ -0,0 +1,197 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan.c 1.22 06/01/31 SMI" + +#pragma weak atan = __atan + +/* INDENT OFF */ +/* + * atan(x) + * Accurate Table look-up algorithm with polynomial approximation in + * partially product form. + * + * -- K.C. Ng, October 17, 2004 + * + * Algorithm + * + * (1). Purge off Inf and NaN and 0 + * (2). Reduce x to positive by atan(x) = -atan(-x). + * (3). For x <= 1/8 and let z = x*x, return + * (2.1) if x < 2^(-prec/2), atan(x) = x with inexact flag raised + * (2.2) if x < 2^(-prec/4-1), atan(x) = x+(x/3)(x*x) + * (2.3) if x < 2^(-prec/6-2), atan(x) = x+(z-5/3)(z*x/5) + * (2.4) Otherwise + * atan(x) = poly1(x) = x + A * B, + * where + * A = (p1*x*z) * (p2+z(p3+z)) + * B = (p4+z)+z*z) * (p5+z(p6+z)) + * Note: (i) domain of poly1 is [0, 1/8], (ii) remez relative + * approximation error of poly1 is bounded by + * |(atan(x)-poly1(x))/x| <= 2^-57.61 + * (4). For x >= 8 then + * (3.1) if x >= 2^prec, atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3), atan(x) = atan(inf) - 1/x + * (3.3) if x <= 65, atan(x) = atan(inf) - poly1(1/x) + * (3.4) otherwise atan(x) = atan(inf) - poly2(1/x) + * where + * poly2(r) = (q1*r) * (q2+z(q3+z)) * (q4+z), + * its domain is [0, 0.0154]; and its remez absolute + * approximation error is bounded by + * |atan(x)-poly2(x)|<= 2^-59.45 + * + * (5). Now x is in (0.125, 8). + * Recall identity + * atan(x) = atan(y) + atan((x-y)/(1+x*y)). + * Let j = (ix - 0x3fc00000) >> 16, 0 <= j < 96, where ix is the high + * part of x in IEEE double format. Then + * atan(x) = atan(y[j]) + poly2((x-y[j])/(1+x*y[j])) + * where y[j] are carefully chosen so that it matches x to around 4.5 + * bits and at the same time atan(y[j]) is very close to an IEEE double + * floating point number. Calculation indicates that + * max|(x-y[j])/(1+x*y[j])| < 0.0154 + * j,x + * + * Accuracy: Maximum error observed is bounded by 0.6 ulp after testing + * more than 10 million random arguments + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" + +extern const double _TBL_atan[]; +static const double g[] = { +/* one = */ 1.0, +/* p1 = */ 8.02176624254765935351230154992663301527500152588e-0002, +/* p2 = */ 1.27223421700559402580665846471674740314483642578e+0000, +/* p3 = */ -1.20606901800503640842521235754247754812240600586e+0000, +/* p4 = */ -2.36088967922325565496066701598465442657470703125e+0000, +/* p5 = */ 1.38345799501389166152875986881554126739501953125e+0000, +/* p6 = */ 1.06742368078953453469637224770849570631980895996e+0000, +/* q1 = */ -1.42796626333911796935538518482644576579332351685e-0001, +/* q2 = */ 3.51427110447873227059810477159863497078605962912e+0000, +/* q3 = */ 5.92129112708164262457444237952586263418197631836e-0001, +/* q4 = */ -1.99272234785683144409063061175402253866195678711e+0000, +/* pio2hi */ 1.570796326794896558e+00, +/* pio2lo */ 6.123233995736765886e-17, +/* t1 = */ -0.333333333333333333333333333333333, +/* t2 = */ 0.2, +/* t3 = */ -1.666666666666666666666666666666666, +}; + +#define one g[0] +#define p1 g[1] +#define p2 g[2] +#define p3 g[3] +#define p4 g[4] +#define p5 g[5] +#define p6 g[6] +#define q1 g[7] +#define q2 g[8] +#define q3 g[9] +#define q4 g[10] +#define pio2hi g[11] +#define pio2lo g[12] +#define t1 g[13] +#define t2 g[14] +#define t3 g[15] + + +double +atan(double x) { + double y, z, r, p, s; + int ix, lx, hx, j; + + hx = ((int *) &x)[HIWORD]; + lx = ((int *) &x)[LOWORD]; + ix = hx & ~0x80000000; + j = ix >> 20; + + /* for |x| < 1/8 */ + if (j < 0x3fc) { + if (j < 0x3f5) { /* when |x| < 2**(-prec/6-2) */ + if (j < 0x3e3) { /* if |x| < 2**(-prec/2-2) */ + return ((int) x == 0 ? x : one); + } + if (j < 0x3f1) { /* if |x| < 2**(-prec/4-1) */ + return (x + (x * t1) * (x * x)); + } else { /* if |x| < 2**(-prec/6-2) */ + z = x * x; + s = t2 * x; + return (x + (t3 + z) * (s * z)); + } + } + z = x * x; s = p1 * x; + return (x + ((s * z) * (p2 + z * (p3 + z))) * + (((p4 + z) + z * z) * (p5 + z * (p6 + z)))); + } + + /* for |x| >= 8.0 */ + if (j >= 0x402) { + if (j < 0x436) { + r = one / x; + if (hx >= 0) { + y = pio2hi; p = pio2lo; + } else { + y = -pio2hi; p = -pio2lo; + } + if (ix < 0x40504000) { /* x < 65 */ + z = r * r; + s = p1 * r; + return (y + ((p - r) - ((s * z) * + (p2 + z * (p3 + z))) * + (((p4 + z) + z * z) * + (p5 + z * (p6 + z))))); + } else if (j < 0x412) { + z = r * r; + return (y + (p - ((q1 * r) * (q4 + z)) * + (q2 + z * (q3 + z)))); + } else + return (y + (p - r)); + } else { + if (j >= 0x7ff) /* x is inf or NaN */ + if (((ix - 0x7ff00000) | lx) != 0) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x - x); + /* assumes sparc-like QNaN */ +#else + return (x - x); +#endif + y = -pio2lo; + return (hx >= 0 ? pio2hi - y : y - pio2hi); + } + } else { /* now x is between 1/8 and 8 */ + double *w, w0, w1, s, z; + w = (double *) _TBL_atan + (((ix - 0x3fc00000) >> 16) << 1); + w0 = (hx >= 0)? w[0] : -w[0]; + s = (x - w0) / (one + x * w0); + w1 = (hx >= 0)? w[1] : -w[1]; + z = s * s; + return (((q1 * s) * (q4 + z)) * (q2 + z * (q3 + z)) + w1); + } +} diff --git a/usr/src/libm/src/C/atan2.c b/usr/src/libm/src/C/atan2.c new file mode 100644 index 0000000..9767f3a --- /dev/null +++ b/usr/src/libm/src/C/atan2.c @@ -0,0 +1,498 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2.c 1.22 06/01/23 SMI" + +#pragma weak atan2 = __atan2 + +#include "libm.h" + +/* + * Let t(0) = 1 and for i = 1, ..., 160, let t(i) be the slope of + * the line bisecting the conical hull of the set of points (x,y) + * where x and y are positive normal floating point numbers and + * the high order words hx and hy of their binary representations + * satisfy |hx - hy - i * 0x8000| <= 0x4000. Then: + * + * TBL[4*i+2] is t(i) rounded to 21 significant bits (i.e., the + * low order word is zero), and + * + * TBL[4*i] + TBL[4*i+1] form a doubled-double approximation to + * atan(TBL[4*i+2]). + * + * Finally, TBL[4*161] = TBL[4*161+1] = TBL[4*161+2] = 0. + * + * Now for any (x,y) with 0 < y <= x and any 0 < t <= 1, we have + * atan(y/x) = atan(t) + atan((y-t*x)/(x+t*y)). By choosing t = + * TBL[4*i+2], where i is the multiple of 0x8000 nearest hx - hy, + * if this multiple is less than 161, and i = 161 otherwise, we + * find that |(y-t*x)/(x+t*y)| <~ 2^-5. + */ +static const double TBL[] = { + 7.8539816339744827900e-01, +3.0616169978683830179e-17, + 1.0000000000000000000e+00, +0, + 7.7198905126506112140e-01, +2.6989956960083153652e-16, + 9.7353506088256835938e-01, +0, + 7.6068143954461309164e-01, -3.5178810518941914972e-16, + 9.5174932479858398438e-01, +0, + 7.4953661876353638860e-01, -3.2548100004524337476e-16, + 9.3073129653930664062e-01, +0, + 7.3854614984728339522e-01, -2.0775571023910406668e-16, + 9.1042709350585937500e-01, +0, + 7.2770146962041337702e-01, +3.8883249403168348802e-16, + 8.9078664779663085938e-01, +0, + 7.1699492488093774512e-01, -4.0468841511547224071e-16, + 8.7176513671875000000e-01, +0, + 7.0641813488653149022e-01, +5.6902424353981484031e-17, + 8.5331964492797851562e-01, +0, + 6.9596351101035658360e-01, +2.8245513321075021303e-16, + 8.3541154861450195312e-01, +0, + 6.8562363680534943455e-01, -4.2316970721658854064e-16, + 8.1800508499145507812e-01, +0, + 6.7539055666438230219e-01, +4.3535917281300047233e-16, + 8.0106592178344726562e-01, +0, + 6.6525763346931832132e-01, +1.1830431602404727977e-17, + 7.8456401824951171875e-01, +0, + 6.5521767574310185722e-01, -1.7435923100651044208e-16, + 7.6847028732299804688e-01, +0, + 6.4526390999481897381e-01, -1.4741927403093983947e-16, + 7.5275802612304687500e-01, +0, + 6.3538979894204850041e-01, +1.5734535069995660853e-16, + 7.3740243911743164062e-01, +0, + 6.2558914346942717799e-01, -2.8175588856316910960e-16, + 7.2238063812255859375e-01, +0, + 6.1585586476157949676e-01, -4.3056167357725226449e-16, + 7.0767116546630859375e-01, +0, + 6.0618408027576098362e-01, +1.5018013918429320289e-16, + 6.9325399398803710938e-01, +0, + 5.9656817827486730010e-01, +5.5271942033557644157e-17, + 6.7911052703857421875e-01, +0, + 5.8700289083426504533e-01, -8.2411369282676383293e-17, + 6.6522359848022460938e-01, +0, + 5.7748303053627658699e-01, +4.9400383775709159558e-17, + 6.5157699584960937500e-01, +0, + 5.6800353968303252117e-01, +2.9924431103311109543e-16, + 6.3815546035766601562e-01, +0, + 5.5855953863493823519e-01, -2.0306003403868777403e-16, + 6.2494468688964843750e-01, +0, + 5.4914706708329674711e-01, +2.8255378613779667461e-17, + 6.1193227767944335938e-01, +0, + 5.3976176660618069292e-01, +1.6370248781078747995e-16, + 5.9910583496093750000e-01, +0, + 5.3039888601412332747e-01, -7.6196097360093680134e-17, + 5.8645296096801757812e-01, +0, + 5.2105543924318808990e-01, -2.2400815668154739561e-16, + 5.7396411895751953125e-01, +0, + 5.1172778873967050828e-01, -3.6888136019899681185e-16, + 5.6162929534912109375e-01, +0, + 5.0241199666452196482e-01, -2.5412891474397011281e-16, + 5.4943847656250000000e-01, +0, + 4.9310493954293743712e-01, +4.4132186128251152229e-16, + 5.3738307952880859375e-01, +0, + 4.8380436844750995817e-01, -2.7844387907776656488e-16, + 5.2545595169067382812e-01, +0, + 4.7450670361463753721e-01, -2.0494355197368286028e-16, + 5.1364850997924804688e-01, +0, + 4.6367660027976320691e-01, +3.1709878607954760668e-16, + 5.0003623962402343750e-01, +0, + 4.5304753104003925301e-01, +3.3593436122420574865e-16, + 4.8681926727294921875e-01, +0, + 4.4423658037407065535e-01, +2.1987183192008082015e-17, + 4.7596645355224609375e-01, +0, + 4.3567016972500294258e-01, +3.0118422805369552650e-16, + 4.6550178527832031250e-01, +0, + 4.2733152672544871820e-01, -3.2667693224866479909e-16, + 4.5539522171020507812e-01, +0, + 4.1920540176693954493e-01, -2.2454273841113897647e-16, + 4.4561982154846191406e-01, +0, + 4.1127722812701872357e-01, -3.1620568973494653391e-16, + 4.3615055084228515625e-01, +0, + 4.0353384063084263289e-01, -3.5932009901481421723e-16, + 4.2696499824523925781e-01, +0, + 3.9596319345246833166e-01, -4.0281533417458698585e-16, + 4.1804289817810058594e-01, +0, + 3.8855405220339722661e-01, +1.6132231486045176674e-16, + 4.0936565399169921875e-01, +0, + 3.8129566313738116889e-01, +1.7684657060650804570e-16, + 4.0091586112976074219e-01, +0, + 3.7417884791401867517e-01, +2.6897604227426977619e-16, + 3.9267849922180175781e-01, +0, + 3.6719421967585041955e-01, -4.5886151448673745001e-17, + 3.8463878631591796875e-01, +0, + 3.6033388248727771241e-01, +1.5804115573136074946e-16, + 3.7678408622741699219e-01, +0, + 3.5358982224579182940e-01, +1.2624619863035782939e-16, + 3.6910200119018554688e-01, +0, + 3.4695498404186952968e-01, +9.3221684607372865177e-17, + 3.6158156394958496094e-01, +0, + 3.4042268308109679964e-01, +2.7697913559445449137e-16, + 3.5421252250671386719e-01, +0, + 3.3398684598563566084e-01, +3.6085337449716011085e-16, + 3.4698557853698730469e-01, +0, + 3.2764182824591436827e-01, +2.0581506352606456186e-16, + 3.3989214897155761719e-01, +0, + 3.2138200938788497041e-01, -1.9015787485430693661e-16, + 3.3292388916015625000e-01, +0, + 3.1520245348069497737e-01, +2.6961839659264087022e-16, + 3.2607340812683105469e-01, +0, + 3.0909871873117023000e-01, -1.5641891686756272625e-16, + 3.1933403015136718750e-01, +0, + 3.0306644308947827682e-01, +2.8801634211591956223e-16, + 3.1269931793212890625e-01, +0, + 2.9710135482774191473e-01, -4.3148994478973365819e-16, + 3.0616307258605957031e-01, +0, + 2.9120015759141004708e-01, -6.8539854790808585159e-17, + 2.9972028732299804688e-01, +0, + 2.8535879880370362827e-01, -1.2231638445300492682e-16, + 2.9336524009704589844e-01, +0, + 2.7957422506893880865e-01, -4.6707752931043135528e-17, + 2.8709340095520019531e-01, +0, + 2.7384352102802367313e-01, -4.1215636366229625876e-16, + 2.8090047836303710938e-01, +0, + 2.6816369484161040049e-01, -2.3700583122400495333e-16, + 2.7478218078613281250e-01, +0, + 2.6253212627627764419e-01, +2.3123213692190889610e-16, + 2.6873469352722167969e-01, +0, + 2.5694635355759309903e-01, -4.0638513814701264145e-16, + 2.6275444030761718750e-01, +0, + 2.5140385572454615470e-01, -3.4795333793554943723e-16, + 2.5683784484863281250e-01, +0, + 2.4500357070096612233e-01, +6.6542334848010259289e-17, + 2.5002646446228027344e-01, +0, + 2.3877766609573036760e-01, -2.7756633678549343650e-16, + 2.4342155456542968750e-01, +0, + 2.3365669377188336142e-01, +3.2700803838522067998e-16, + 2.3800384998321533203e-01, +0, + 2.2870810463931334766e-01, -4.4279127662219799521e-16, + 2.3278105258941650391e-01, +0, + 2.2391820542294382790e-01, +3.7558889374284208052e-16, + 2.2773718833923339844e-01, +0, + 2.1927501815429550902e-01, -1.4829838176513811186e-16, + 2.2285830974578857422e-01, +0, + 2.1476740847367459253e-01, -2.0535381496063397578e-17, + 2.1813154220581054688e-01, +0, + 2.1038568111737454558e-01, -4.2826767738736168650e-16, + 2.1354568004608154297e-01, +0, + 2.0612057974373865221e-01, +4.2108051749502232359e-16, + 2.0909011363983154297e-01, +0, + 2.0196410359405447821e-01, +3.5157118083511092869e-16, + 2.0475566387176513672e-01, +0, + 1.9790861144712756925e-01, +3.7894950972257700994e-16, + 2.0053362846374511719e-01, +0, + 1.9394752160084305359e-01, +2.8270367403478935534e-16, + 1.9641649723052978516e-01, +0, + 1.9007440763641536563e-01, -2.0842758095683676397e-16, + 1.9239699840545654297e-01, +0, + 1.8628369629742813629e-01, +3.4710917040399448932e-16, + 1.8846881389617919922e-01, +0, + 1.8256998712939509488e-01, +1.1053834120570125251e-16, + 1.8462586402893066406e-01, +0, + 1.7892875067284830237e-01, +3.0486232913366680305e-16, + 1.8086302280426025391e-01, +0, + 1.7535529778449010507e-01, -2.3810135019970148624e-16, + 1.7717504501342773438e-01, +0, + 1.7184559192514736736e-01, +5.1432582846210893916e-17, + 1.7355740070343017578e-01, +0, + 1.6839590847744290159e-01, +3.1605623296041433586e-18, + 1.7000591754913330078e-01, +0, + 1.6500283902547518977e-01, +1.5405422268770998251e-16, + 1.6651678085327148438e-01, +0, + 1.6166306303174859949e-01, +4.0042241517254928672e-16, + 1.6308629512786865234e-01, +0, + 1.5837358268281231943e-01, -2.2786616251622967291e-16, + 1.5971112251281738281e-01, +0, + 1.5513160990288810126e-01, -3.7547723514797166336e-16, + 1.5638816356658935547e-01, +0, + 1.5193468535499299321e-01, +4.3497510505554267446e-16, + 1.5311467647552490234e-01, +0, + 1.4878033155427861089e-01, -2.3102860235324261895e-16, + 1.4988791942596435547e-01, +0, + 1.4566628729590647140e-01, +9.9227592950040279415e-17, + 1.4670538902282714844e-01, +0, + 1.4259050967286590605e-01, -3.3869909683813096906e-18, + 1.4356482028961181641e-01, +0, + 1.3955105903633846509e-01, +1.5500435650773331566e-17, + 1.4046406745910644531e-01, +0, + 1.3654610022831903393e-01, +3.3965918616682805753e-16, + 1.3740110397338867188e-01, +0, + 1.3357402082462854764e-01, +2.7572431581527535421e-16, + 1.3437414169311523438e-01, +0, + 1.3063319828908959153e-01, -3.4667213797076707331e-16, + 1.3138139247894287109e-01, +0, + 1.2772200049776749609e-01, +3.1089261947725651968e-16, + 1.2842106819152832031e-01, +0, + 1.2436931430778752627e-01, -4.0654251891464630059e-16, + 1.2501454353332519531e-01, +0, + 1.2111683701666819957e-01, -3.9381654342464836012e-16, + 1.2171256542205810547e-01, +0, + 1.1844801833536511282e-01, -3.6673155595150283444e-16, + 1.1900508403778076172e-01, +0, + 1.1587365536613614125e-01, -1.5026628801318421951e-16, + 1.1639505624771118164e-01, +0, + 1.1338607085741525538e-01, +1.2886806274050538880e-16, + 1.1387449502944946289e-01, +0, + 1.1097844020819369604e-01, +2.3848343623577768044e-16, + 1.1143630743026733398e-01, +0, + 1.0864456107308662069e-01, +4.2065430313285469408e-16, + 1.0907405614852905273e-01, +0, + 1.0637891628473727934e-01, -4.6883543790348472687e-18, + 1.0678201913833618164e-01, +0, + 1.0417650062205296990e-01, +1.4774925414624453292e-16, + 1.0455501079559326172e-01, +0, + 1.0203276464730581807e-01, -1.5677032794816452332e-16, + 1.0238832235336303711e-01, +0, + 9.9943617083734892503e-02, +3.4511310907979792828e-16, + 1.0027772188186645508e-01, +0, + 9.7905249824711049200e-02, +3.4489485563461708496e-16, + 9.8219275474548339844e-02, +0, + 9.5914316649349906641e-02, -1.3214510886789011569e-17, + 9.6209526062011718750e-02, +0, + 9.3967698614664918466e-02, +1.1048427091217964090e-16, + 9.4245254993438720703e-02, +0, + 9.2062564267554769515e-02, -3.7297463814697759309e-16, + 9.2323541641235351562e-02, +0, + 9.0196252506350660383e-02, -3.5280143043576718079e-16, + 9.0441644191741943359e-02, +0, + 8.8366391663268650802e-02, -6.1140673227541621183e-17, + 8.8597118854522705078e-02, +0, + 8.6570782100201526532e-02, -2.0998844594957629702e-16, + 8.6787700653076171875e-02, +0, + 8.4807337678923566671e-02, +3.9530981588194673068e-16, + 8.5011243820190429688e-02, +0, + 8.3074323040850828193e-02, -4.3022503210464894539e-17, + 8.3265960216522216797e-02, +0, + 8.1369880712663267275e-02, -6.3063867569127169744e-18, + 8.1549942493438720703e-02, +0, + 7.9692445771216036121e-02, -5.0787623072962671502e-17, + 7.9861581325531005859e-02, +0, + 7.8040568735575632786e-02, -3.8810063021216721741e-16, + 7.8199386596679687500e-02, +0, + 7.6412797391314235540e-02, +4.1246529500495762995e-16, + 7.6561868190765380859e-02, +0, + 7.4807854772808823896e-02, -3.7025599052186724156e-16, + 7.4947714805603027344e-02, +0, + 7.3224639528778112663e-02, +4.2209138483206712401e-17, + 7.3355793952941894531e-02, +0, + 7.1661929761571485642e-02, -3.2074473649855177622e-16, + 7.1784853935241699219e-02, +0, + 7.0118738881148168218e-02, -2.5371257235753296804e-16, + 7.0233881473541259766e-02, +0, + 6.8594137996416115755e-02, +3.3796987842548399135e-16, + 6.8701922893524169922e-02, +0, + 6.7087137393172291411e-02, +5.5061492696328852397e-17, + 6.7187964916229248047e-02, +0, + 6.5596983299946565182e-02, -2.1580863111502565280e-16, + 6.5691232681274414062e-02, +0, + 6.4122802037412718335e-02, -3.1315661827469233434e-16, + 6.4210832118988037109e-02, +0, + 6.2426231582525915087e-02, -2.5758980071296622188e-16, + 6.2507450580596923828e-02, +0, + 6.0781559928021700046e-02, +1.3736899336217710591e-16, + 6.0856521129608154297e-02, +0, + 5.9432882624005145544e-02, +2.2246097394328856474e-16, + 5.9502959251403808594e-02, +0, + 5.8132551274581167888e-02, -6.2525053236379489390e-18, + 5.8198124170303344727e-02, +0, + 5.6876611930681164608e-02, -2.6589930995607417149e-16, + 5.6938022375106811523e-02, +0, + 5.5661522654748551986e-02, -4.2736362859832186197e-16, + 5.5719077587127685547e-02, +0, + 5.4484124463757943602e-02, -1.6708067365310384253e-16, + 5.4538100957870483398e-02, +0, + 5.3341582449436764080e-02, +3.3271673004611311850e-17, + 5.3392231464385986328e-02, +0, + 5.2231267345892007370e-02, -3.5593396674200571616e-16, + 5.2278816699981689453e-02, +0, + 5.1150874758829623090e-02, +1.4432815841187114832e-16, + 5.1195532083511352539e-02, +0, + 5.0098306612679444072e-02, +9.4680943793589404083e-17, + 5.0140261650085449219e-02, +0, + 4.9071641675614507960e-02, +2.1131168520301896817e-16, + 4.9111068248748779297e-02, +0, + 4.8069135772851545596e-02, +1.6035336741307516296e-16, + 4.8106193542480468750e-02, +0, + 4.7089192241088539959e-02, -2.2491738698796901479e-16, + 4.7124028205871582031e-02, +0, + 4.6130362086062248750e-02, -1.5111423469578965206e-16, + 4.6163111925125122070e-02, +0, + 4.5191314382707403752e-02, +4.1989325207399786612e-16, + 4.5222103595733642578e-02, +0, + 4.4270836390474244126e-02, -4.1432635292331004454e-16, + 4.4299781322479248047e-02, +0, + 4.3367774164955186222e-02, -3.0615383054587355892e-16, + 4.3394982814788818359e-02, +0, + 4.2481121875321825598e-02, -3.6730166956273555173e-16, + 4.2506694793701171875e-02, +0, + 4.1609902899457651415e-02, -4.4226425958068821782e-16, + 4.1633933782577514648e-02, +0, + 4.0753259129372665370e-02, +1.9801161516527046872e-16, + 4.0775835514068603516e-02, +0, + 3.9910361780060910064e-02, +8.2560620036613164573e-18, + 3.9931565523147583008e-02, +0, + 3.9080441183869218946e-02, +3.9908991939242971628e-17, + 3.9100348949432373047e-02, +0, + 3.8262816593271686827e-02, +9.5182237812195590276e-17, + 3.8281500339508056641e-02, +0, + 3.7456806948784837630e-02, +1.5213508760679563439e-16, + 3.7474334239959716797e-02, +0, + 3.6661849947035918262e-02, +7.3335516005184616486e-17, + 3.6678284406661987305e-02, +0, + 3.5877353272533163420e-02, -1.3007348019891714540e-16, + 3.5892754793167114258e-02, +0, + 3.5102754135096780885e-02, -2.9903662298950558656e-16, + 3.5117179155349731445e-02, +0, + 3.4337638360670830195e-02, +2.9656295131966114331e-16, + 3.4351140260696411133e-02, +0, + 3.3581472523789734907e-02, +3.4810947205572817820e-16, + 3.3594101667404174805e-02, +0, + 3.2833871859357266487e-02, -3.8885440174405159838e-16, + 3.2845675945281982422e-02, +0, + 3.2094421679560447558e-02, +5.8805134853032009978e-17, + 3.2105445861816406250e-02, +0, + 3.1243584858944295490e-02, +2.8737383773884313066e-17, + 3.1253755092620849609e-02, +0, + 0, 0, 0, 0 +}; + +static const double C[] = { + 0.0, + 0.125, + 1.2980742146337069071e+33, + 7.8539816339744827900e-01, + 1.5707963267948965580e+00, + 6.1232339957367658860e-17, + -3.1415926535897931160e+00, + -1.2246467991473531772e-16, + -3.33333333333327571893331786354179101074860633009e-0001, + +1.99999999942671624230086497610394721817438631379e-0001, + -1.42856965565428636896183013324727205980484158356e-0001, + +1.10894981496317081405107718475040168084164825641e-0001, +}; + +#define zero C[0] +#define twom3 C[1] +#define two110 C[2] +#define pio4 C[3] +#define pio2 C[4] +#define pio2_lo C[5] +#define mpi C[6] +#define mpi_lo C[7] +#define p1 C[8] +#define p2 C[9] +#define p3 C[10] +#define p4 C[11] + +double +atan2(double oy, double ox) { + double ah, al, t, xh, x, y, z; + int i, k, hx, hy, sx, sy; +#ifndef lint + volatile int inexact; +#endif + + hy = ((int *)&oy)[HIWORD]; + sy = hy & 0x80000000; + hy &= ~0x80000000; + + hx = ((int *)&ox)[HIWORD]; + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && ((unsigned *)&oy)[LOWORD] > + ((unsigned *)&ox)[LOWORD])) { + i = hx; + hx = hy; + hy = i; + x = fabs(oy); + y = fabs(ox); + if (sx) { + ah = pio2; + al = pio2_lo; + } else { + ah = -pio2; + al = -pio2_lo; + sy ^= 0x80000000; + } + } else { + x = fabs(ox); + y = fabs(oy); + if (sx) { + ah = mpi; + al = mpi_lo; + sy ^= 0x80000000; + } else { + ah = al = zero; + } + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) { + if (hx >= 0x7ff00000) { + if (((hx ^ 0x7ff00000) | ((int *)&x)[LOWORD]) != 0) + return (ox * oy); + if (hy >= 0x7ff00000) + ah += pio4; +#ifndef lint + inexact = (int)ah; /* inexact if ah != 0 */ +#endif + return ((sy)? -ah : ah); + } + if (hx - hy >= 0x03600000) { + if ((int)ah == 0) + ah = y / x; + return ((sy)? -ah : ah); + } + y *= twom3; + x *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } else if (hy < 0x00100000) { + if ((hy | ((int *)&y)[LOWORD]) == 0) { + if ((hx | ((int *)&x)[LOWORD]) == 0) + return (_SVID_libm_err(ox, oy, 3)); +#ifndef lint + inexact = (int)ah; /* inexact if ah != 0 */ +#endif + return ((sy)? -ah : ah); + } + y *= two110; + x *= two110; + hy = ((int *)&y)[HIWORD]; + hx = ((int *)&x)[HIWORD]; + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah += TBL[k]; + al += TBL[k+1]; + t = TBL[k+2]; + + xh = x; + ((int *)&xh)[LOWORD] = 0; + z = ((y - t * xh) - t * (x - xh)) / (x + y * t); + x = z * z; + t = ah + (z + (al + (z * x) * (p1 + x * (p2 + x * (p3 + x * p4))))); + return ((sy)? -t : t); +} diff --git a/usr/src/libm/src/C/atan2pi.c b/usr/src/libm/src/C/atan2pi.c new file mode 100644 index 0000000..85a6171 --- /dev/null +++ b/usr/src/libm/src/C/atan2pi.c @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2pi.c 1.6 06/01/23 SMI" + +#pragma weak atan2pi = __atan2pi + +/* + * atan2pi(x) = atan2(x)/pi + */ + +#include "libm.h" + +static const double invpi = 0.3183098861837906715377675; + +double +atan2pi(double y, double x) { + int ix, iy; + + if (x == 0.0 && y == 0.0) { + ix = ((int *)&x)[HIWORD]; + iy = ((int *)&y)[HIWORD]; + if (ix >= 0) + return (y); + return ((iy >= 0)? 1.0 : -1.0); + } + return (atan2(y, x) * invpi); +} diff --git a/usr/src/libm/src/C/atanh.c b/usr/src/libm/src/C/atanh.c new file mode 100644 index 0000000..23607cd --- /dev/null +++ b/usr/src/libm/src/C/atanh.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atanh.c 1.17 06/01/31 SMI" + +#pragma weak atanh = __atanh + +/* INDENT OFF */ +/* atanh(x) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Method : + * 1 2x x + * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) + * 2 1 - x 1 - x + * Note: to guarantee atanh(-x) = -atanh(x), we use + * sign(x) |x| + * atanh(x) = ------- * log1p(2*-------). + * 2 1 - |x| + * + * Special cases: + * atanh(x) is NaN if |x| > 1 with signal; + * atanh(NaN) is that NaN with no signal; + * atanh(+-1) is +-INF with signal. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include + +double +atanh(double x) { + double t; + + if (isnan(x)) + return x * x; /* switched from x + x for Cheetah */ + t = fabs(x); + if (t > 1.0) + return _SVID_libm_err(x, x, 30); /* sNaN */ + if (t == 1.0) + return _SVID_libm_err(x, x, 31); /* x/0; */ + t = t / (1.0 - t); + return copysign(0.5, x) * log1p(t + t); +} diff --git a/usr/src/libm/src/C/cbrt.c b/usr/src/libm/src/C/cbrt.c new file mode 100644 index 0000000..7969a29 --- /dev/null +++ b/usr/src/libm/src/C/cbrt.c @@ -0,0 +1,283 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cbrt.c 1.16 06/01/31 SMI" + +/* INDENT OFF */ + +/* + * cbrt: double precision cube root + * + * Algorithm: bit hacking, table lookup, and polynomial approximation + * + * For normal x, write x = s*2^(3j)*z where s = +/-1, j is an integer, + * and 1 <= z < 8. Let y := s*2^j. From a table, find u such that + * u^3 is computable exactly and |(z-u^3)/u^3| <~ 2^-8. We construct + * y, z, and the table index from x by a few integer operations. + * + * Now cbrt(x) = y*u*(1+t)^(1/3) where t = (z-u^3)/u^3. We approximate + * (1+t)^(1/3) by a polynomial 1+p(t), where p(t) := t*(p1+t*(p2+...+ + * (p5+t*p6))). By computing the result as y*(u+u*p(t)), we can bound + * the worst case error by .51 ulp. + * + * Notes: + * + * 1. For subnormal x, we scale x by 2^54, compute the cube root, and + * scale the result by 2^-18. + * + * 2. cbrt(+/-inf) = +/-inf and cbrt(NaN) is NaN. + */ + +/* + * for i = 0, ..., 385 + * form x(i) with high word 0x3ff00000 + (i << 13) and low word 0; + * then TBL[i] = cbrt(x(i)) rounded to 17 significant bits + */ +static const double __libm_TBL_cbrt[] = { + 1.00000000000000000e+00, 1.00259399414062500e+00, 1.00518798828125000e+00, + 1.00775146484375000e+00, 1.01031494140625000e+00, 1.01284790039062500e+00, + 1.01538085937500000e+00, 1.01791381835937500e+00, 1.02041625976562500e+00, + 1.02290344238281250e+00, 1.02539062500000000e+00, 1.02786254882812500e+00, + 1.03031921386718750e+00, 1.03277587890625000e+00, 1.03520202636718750e+00, + 1.03762817382812500e+00, 1.04003906250000000e+00, 1.04244995117187500e+00, + 1.04483032226562500e+00, 1.04721069335937500e+00, 1.04959106445312500e+00, + 1.05194091796875000e+00, 1.05429077148437500e+00, 1.05662536621093750e+00, + 1.05895996093750000e+00, 1.06127929687500000e+00, 1.06358337402343750e+00, + 1.06587219238281250e+00, 1.06816101074218750e+00, 1.07044982910156250e+00, + 1.07270812988281250e+00, 1.07496643066406250e+00, 1.07722473144531250e+00, + 1.07945251464843750e+00, 1.08168029785156250e+00, 1.08390808105468750e+00, + 1.08612060546875000e+00, 1.08831787109375000e+00, 1.09051513671875000e+00, + 1.09269714355468750e+00, 1.09487915039062500e+00, 1.09704589843750000e+00, + 1.09921264648437500e+00, 1.10136413574218750e+00, 1.10350036621093750e+00, + 1.10563659667968750e+00, 1.10775756835937500e+00, 1.10987854003906250e+00, + 1.11198425292968750e+00, 1.11408996582031250e+00, 1.11618041992187500e+00, + 1.11827087402343750e+00, 1.12034606933593750e+00, 1.12242126464843750e+00, + 1.12448120117187500e+00, 1.12654113769531250e+00, 1.12858581542968750e+00, + 1.13063049316406250e+00, 1.13265991210937500e+00, 1.13468933105468750e+00, + 1.13670349121093750e+00, 1.13871765136718750e+00, 1.14073181152343750e+00, + 1.14273071289062500e+00, 1.14471435546875000e+00, 1.14669799804687500e+00, + 1.14868164062500000e+00, 1.15065002441406250e+00, 1.15260314941406250e+00, + 1.15457153320312500e+00, 1.15650939941406250e+00, 1.15846252441406250e+00, + 1.16040039062500000e+00, 1.16232299804687500e+00, 1.16424560546875000e+00, + 1.16616821289062500e+00, 1.16807556152343750e+00, 1.16998291015625000e+00, + 1.17189025878906250e+00, 1.17378234863281250e+00, 1.17567443847656250e+00, + 1.17755126953125000e+00, 1.17942810058593750e+00, 1.18128967285156250e+00, + 1.18315124511718750e+00, 1.18501281738281250e+00, 1.18685913085937500e+00, + 1.18870544433593750e+00, 1.19055175781250000e+00, 1.19238281250000000e+00, + 1.19421386718750000e+00, 1.19602966308593750e+00, 1.19786071777343750e+00, + 1.19966125488281250e+00, 1.20147705078125000e+00, 1.20327758789062500e+00, + 1.20507812500000000e+00, 1.20686340332031250e+00, 1.20864868164062500e+00, + 1.21043395996093750e+00, 1.21220397949218750e+00, 1.21397399902343750e+00, + 1.21572875976562500e+00, 1.21749877929687500e+00, 1.21925354003906250e+00, + 1.22099304199218750e+00, 1.22274780273437500e+00, 1.22448730468750000e+00, + 1.22621154785156250e+00, 1.22795104980468750e+00, 1.22967529296875000e+00, + 1.23138427734375000e+00, 1.23310852050781250e+00, 1.23481750488281250e+00, + 1.23652648925781250e+00, 1.23822021484375000e+00, 1.23991394042968750e+00, + 1.24160766601562500e+00, 1.24330139160156250e+00, 1.24497985839843750e+00, + 1.24665832519531250e+00, 1.24833679199218750e+00, 1.25000000000000000e+00, + 1.25166320800781250e+00, 1.25332641601562500e+00, 1.25497436523437500e+00, + 1.25663757324218750e+00, 1.25828552246093750e+00, 1.25991821289062500e+00, + 1.26319885253906250e+00, 1.26644897460937500e+00, 1.26968383789062500e+00, + 1.27290344238281250e+00, 1.27612304687500000e+00, 1.27931213378906250e+00, + 1.28248596191406250e+00, 1.28564453125000000e+00, 1.28878784179687500e+00, + 1.29191589355468750e+00, 1.29502868652343750e+00, 1.29812622070312500e+00, + 1.30120849609375000e+00, 1.30427551269531250e+00, 1.30732727050781250e+00, + 1.31036376953125000e+00, 1.31340026855468750e+00, 1.31640625000000000e+00, + 1.31941223144531250e+00, 1.32238769531250000e+00, 1.32536315917968750e+00, + 1.32832336425781250e+00, 1.33126831054687500e+00, 1.33419799804687500e+00, + 1.33712768554687500e+00, 1.34002685546875000e+00, 1.34292602539062500e+00, + 1.34580993652343750e+00, 1.34867858886718750e+00, 1.35153198242187500e+00, + 1.35437011718750000e+00, 1.35720825195312500e+00, 1.36003112792968750e+00, + 1.36283874511718750e+00, 1.36564636230468750e+00, 1.36842346191406250e+00, + 1.37120056152343750e+00, 1.37396240234375000e+00, 1.37672424316406250e+00, + 1.37945556640625000e+00, 1.38218688964843750e+00, 1.38491821289062500e+00, + 1.38761901855468750e+00, 1.39031982421875000e+00, 1.39302062988281250e+00, + 1.39569091796875000e+00, 1.39836120605468750e+00, 1.40101623535156250e+00, + 1.40367126464843750e+00, 1.40631103515625000e+00, 1.40893554687500000e+00, + 1.41156005859375000e+00, 1.41416931152343750e+00, 1.41676330566406250e+00, + 1.41935729980468750e+00, 1.42193603515625000e+00, 1.42449951171875000e+00, + 1.42706298828125000e+00, 1.42962646484375000e+00, 1.43215942382812500e+00, + 1.43469238281250000e+00, 1.43722534179687500e+00, 1.43974304199218750e+00, + 1.44224548339843750e+00, 1.44474792480468750e+00, 1.44723510742187500e+00, + 1.44972229003906250e+00, 1.45219421386718750e+00, 1.45466613769531250e+00, + 1.45712280273437500e+00, 1.45956420898437500e+00, 1.46200561523437500e+00, + 1.46444702148437500e+00, 1.46687316894531250e+00, 1.46928405761718750e+00, + 1.47169494628906250e+00, 1.47409057617187500e+00, 1.47648620605468750e+00, + 1.47886657714843750e+00, 1.48124694824218750e+00, 1.48361206054687500e+00, + 1.48597717285156250e+00, 1.48834228515625000e+00, 1.49067687988281250e+00, + 1.49302673339843750e+00, 1.49536132812500000e+00, 1.49768066406250000e+00, + 1.50000000000000000e+00, 1.50230407714843750e+00, 1.50460815429687500e+00, + 1.50691223144531250e+00, 1.50920104980468750e+00, 1.51148986816406250e+00, + 1.51376342773437500e+00, 1.51603698730468750e+00, 1.51829528808593750e+00, + 1.52055358886718750e+00, 1.52279663085937500e+00, 1.52503967285156250e+00, + 1.52728271484375000e+00, 1.52951049804687500e+00, 1.53173828125000000e+00, + 1.53395080566406250e+00, 1.53616333007812500e+00, 1.53836059570312500e+00, + 1.54055786132812500e+00, 1.54275512695312500e+00, 1.54493713378906250e+00, + 1.54711914062500000e+00, 1.54928588867187500e+00, 1.55145263671875000e+00, + 1.55361938476562500e+00, 1.55577087402343750e+00, 1.55792236328125000e+00, + 1.56005859375000000e+00, 1.56219482421875000e+00, 1.56433105468750000e+00, + 1.56645202636718750e+00, 1.56857299804687500e+00, 1.57069396972656250e+00, + 1.57279968261718750e+00, 1.57490539550781250e+00, 1.57699584960937500e+00, + 1.57908630371093750e+00, 1.58117675781250000e+00, 1.58325195312500000e+00, + 1.58532714843750000e+00, 1.58740234375000000e+00, 1.59152221679687500e+00, + 1.59562683105468750e+00, 1.59970092773437500e+00, 1.60375976562500000e+00, + 1.60780334472656250e+00, 1.61183166503906250e+00, 1.61582946777343750e+00, + 1.61981201171875000e+00, 1.62376403808593750e+00, 1.62770080566406250e+00, + 1.63162231445312500e+00, 1.63552856445312500e+00, 1.63941955566406250e+00, + 1.64328002929687500e+00, 1.64714050292968750e+00, 1.65097045898437500e+00, + 1.65476989746093750e+00, 1.65856933593750000e+00, 1.66235351562500000e+00, + 1.66610717773437500e+00, 1.66986083984375000e+00, 1.67358398437500000e+00, + 1.67729187011718750e+00, 1.68098449707031250e+00, 1.68466186523437500e+00, + 1.68832397460937500e+00, 1.69197082519531250e+00, 1.69560241699218750e+00, + 1.69921875000000000e+00, 1.70281982421875000e+00, 1.70640563964843750e+00, + 1.70997619628906250e+00, 1.71353149414062500e+00, 1.71707153320312500e+00, + 1.72059631347656250e+00, 1.72410583496093750e+00, 1.72760009765625000e+00, + 1.73109436035156250e+00, 1.73455810546875000e+00, 1.73800659179687500e+00, + 1.74145507812500000e+00, 1.74488830566406250e+00, 1.74829101562500000e+00, + 1.75169372558593750e+00, 1.75508117675781250e+00, 1.75846862792968750e+00, + 1.76182556152343750e+00, 1.76516723632812500e+00, 1.76850891113281250e+00, + 1.77183532714843750e+00, 1.77514648437500000e+00, 1.77844238281250000e+00, + 1.78173828125000000e+00, 1.78500366210937500e+00, 1.78826904296875000e+00, + 1.79151916503906250e+00, 1.79476928710937500e+00, 1.79798889160156250e+00, + 1.80120849609375000e+00, 1.80441284179687500e+00, 1.80760192871093750e+00, + 1.81079101562500000e+00, 1.81396484375000000e+00, 1.81712341308593750e+00, + 1.82026672363281250e+00, 1.82341003417968750e+00, 1.82653808593750000e+00, + 1.82965087890625000e+00, 1.83276367187500000e+00, 1.83586120605468750e+00, + 1.83894348144531250e+00, 1.84201049804687500e+00, 1.84507751464843750e+00, + 1.84812927246093750e+00, 1.85118103027343750e+00, 1.85421752929687500e+00, + 1.85723876953125000e+00, 1.86026000976562500e+00, 1.86326599121093750e+00, + 1.86625671386718750e+00, 1.86924743652343750e+00, 1.87222290039062500e+00, + 1.87518310546875000e+00, 1.87814331054687500e+00, 1.88108825683593750e+00, + 1.88403320312500000e+00, 1.88696289062500000e+00, 1.88987731933593750e+00, + 1.89279174804687500e+00, 1.89569091796875000e+00, 1.89859008789062500e+00, + 1.90147399902343750e+00, 1.90435791015625000e+00, 1.90722656250000000e+00, + 1.91007995605468750e+00, 1.91293334960937500e+00, 1.91577148437500000e+00, + 1.91860961914062500e+00, 1.92143249511718750e+00, 1.92425537109375000e+00, + 1.92706298828125000e+00, 1.92985534667968750e+00, 1.93264770507812500e+00, + 1.93544006347656250e+00, 1.93821716308593750e+00, 1.94097900390625000e+00, + 1.94374084472656250e+00, 1.94650268554687500e+00, 1.94924926757812500e+00, + 1.95198059082031250e+00, 1.95471191406250000e+00, 1.95742797851562500e+00, + 1.96014404296875000e+00, 1.96286010742187500e+00, 1.96556091308593750e+00, + 1.96824645996093750e+00, 1.97093200683593750e+00, 1.97361755371093750e+00, + 1.97628784179687500e+00, 1.97894287109375000e+00, 1.98159790039062500e+00, + 1.98425292968750000e+00, 1.98689270019531250e+00, 1.98953247070312500e+00, + 1.99215698242187500e+00, 1.99478149414062500e+00, 1.99739074707031250e+00, + 2.00000000000000000e+00, +}; + +/* + * The polynomial p(x) := p1*x + p2*x^2 + ... + p6*x^6 satisfies + * + * |(1+x)^(1/3) - 1 - p(x)| < 2^-63 for |x| < 0.003914 + */ +static const double C[] = { + 3.33333333333333340735623180707664400321413178600e-0001, + -1.11111111111111111992797989129069515334791432304e-0001, + 6.17283950578506695710302115234720605072083379082e-0002, + -4.11522633731005164138964638666647311514892319010e-0002, + 3.01788343105268728151735586597807324859173704847e-0002, + -2.34723340038386971009665073968507263074215090751e-0002, + 18014398509481984.0 +}; + +#define p1 C[0] +#define p2 C[1] +#define p3 C[2] +#define p4 C[3] +#define p5 C[4] +#define p6 C[5] +#define two54 C[6] + +/* INDENT ON */ + +#if defined(__sparc) + +#define HIWORD 0 +#define LOWORD 1 + +#elif defined(__i386) + +#define HIWORD 1 +#define LOWORD 0 + +#else +#error Unknown architecture +#endif + +#pragma weak cbrt = __cbrt + +double __cbrt(double x) +{ + union { + unsigned int i[2]; + double d; + } xx, yy; + double t, u, w; + unsigned int hx, sx, ex, j, offset; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + + /* handle special cases */ + if (hx >= 0x7ff00000) /* x is inf or nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return hx >= 0x7ff80000 ? x : x + x; + /* assumes sparc-like QNaN */ +#else + return x + x; +#endif + + if (hx < 0x00100000) { /* x is subnormal or zero */ + if ((hx | xx.i[LOWORD]) == 0) + return x; + + /* scale x to normal range */ + xx.d = x * two54; + hx = xx.i[HIWORD] & ~0x80000000; + offset = 0x29800000; + } + else + offset = 0x2aa00000; + + ex = hx & 0x7ff00000; + j = (ex >> 2) + (ex >> 4) + (ex >> 6); + j = j + (j >> 6); + j = 0x7ff00000 & (j + 0x2aa00); /* j is ex/3 */ + hx -= (j + j + j); + xx.i[HIWORD] = 0x3ff00000 + hx; + + u = __libm_TBL_cbrt[(hx + 0x1000) >> 13]; + w = u * u * u; + t = (xx.d - w) / w; + + yy.i[HIWORD] = sx | (j + offset); + yy.i[LOWORD] = 0; + + w = t * t; + return yy.d * (u + u * (t * (p1 + t * p2 + w * p3) + + (w * w) * (p4 + t * p5 + w * p6))); +} diff --git a/usr/src/libm/src/C/ceil.c b/usr/src/libm/src/C/ceil.c new file mode 100644 index 0000000..ea8d6b6 --- /dev/null +++ b/usr/src/libm/src/C/ceil.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ceil.c 1.8 06/01/23 SMI" + +#pragma weak ceil = __ceil + +/* + * ceil(x) returns the least integral value bigger than or equal to x. + * NOTE: ceil(x) returns result with the same sign as x's, including 0. + * + * Modified 8/4/04 for performance. + */ + +#include "libm.h" + +static const double + zero = 0.0, + one = 1.0, + two52 = 4503599627370496.0; + +double +ceil(double x) { + double t, w; + int hx, lx, ix; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + if (ix >= 0x43300000) /* return x if |x| >= 2^52, or x is NaN */ + return (x * one); + t = (hx >= 0)? two52 : -two52; + w = x + t; + t = w - t; + if (ix < 0x3ff00000) { + if ((ix | lx) == 0) + return (x); + else + return ((hx < 0)? -zero : one); + } + return ((t >= x)? t : t + one); +} diff --git a/usr/src/libm/src/C/copysign.c b/usr/src/libm/src/C/copysign.c new file mode 100644 index 0000000..7478fcb --- /dev/null +++ b/usr/src/libm/src/C/copysign.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)copysign.c 1.8 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak copysign = __copysign +#endif + +#include "libm.h" + +double +copysign(double x, double y) { + int hx, hy; + + hx = ((int *) &x)[HIWORD]; + hy = ((int *) &y)[HIWORD]; + return (hx ^ hy) >= 0 ? (x) : (-x); +} diff --git a/usr/src/libm/src/C/cos.c b/usr/src/libm/src/C/cos.c new file mode 100644 index 0000000..6c184ab --- /dev/null +++ b/usr/src/libm/src/C/cos.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cos.c 1.13 06/01/23 SMI" + +#pragma weak cos = __cos + +/* INDENT OFF */ +/* + * cos(x) + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +/* PIO2_H = */ 1.570796326794896557999, +/* PIO2_L = */ 6.123233995736765886130e-17, +/* PIO2_L0 = */ 6.123233995727922165564e-17, +/* PIO2_L1 = */ 8.843720566135701120255e-29, +/* PI3O2_H = */ 4.712388980384689673997, +/* PI3O2_L = */ 1.836970198721029765839e-16, +/* PI3O2_L0 = */ 1.836970198720396133587e-16, +/* PI3O2_L1 = */ 6.336322524749201142226e-29, +/* PI5O2_H = */ 7.853981633974482789995, +/* PI5O2_L = */ 3.061616997868382943065e-16, +/* PI5O2_L0 = */ 3.061616997861941598865e-16, +/* PI5O2_L1 = */ 6.441344200433640781982e-28, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] +#define PIO2_H sc[14] +#define PIO2_L sc[15] +#define PIO2_L0 sc[16] +#define PIO2_L1 sc[17] +#define PI3O2_H sc[18] +#define PI3O2_L sc[19] +#define PI3O2_L0 sc[20] +#define PI3O2_L1 sc[21] +#define PI5O2_H sc[22] +#define PI5O2_L sc[23] +#define PI5O2_L0 sc[24] +#define PI5O2_L1 sc[25] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +cos(double x) { + double z, y[2], w, s, v, p, q; + int i, j, n, hx, ix, lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + if ((int)x == 0) + return (ONE); + } + z = x * x; + if (ix < 0x3f800000) /* |x| < 0.008 */ + w = z * (QQ1 + z * QQ2); + else + w = z * ((Q1 + z * Q2) + (z * z) * (Q3 + z * Q4)); + return (ONE + w); + } + + /* for 0.164062500 < x < M, */ + n = ix >> 20; + if (n < 0x402) { /* x < 8 */ + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + x = fabs(x); + v = x - _TBL_sincosx[j]; + if (((j - 81) ^ (j - 101)) < 0) { + /* near pi/2, cos(pi/2-x)=sin(x) */ + p = PIO2_H - x; + i = ix - 0x3ff921fb; + x = p + PIO2_L; + if ((i | ((lx - 0x54442D00) & 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PIO2_L0; + return (x + PIO2_L1); + } + z = x * x; + if (((ix - 0x3ff92000) >> 12) == 0) { + /* |pi/2-x|<2**-8 */ + w = PIO2_L + (z * x) * (PP1 + z * PP2); + } else { + w = PIO2_L + (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)); + } + return (p + w); + } + s = v * v; + if (((j - 282) ^ (j - 302)) < 0) { + /* near 3/2pi, cos(x-3/2pi)=sin(x) */ + p = x - PI3O2_H; + i = ix - 0x4012D97C; + x = p - PI3O2_L; + if ((i | ((lx - 0x7f332100) & 0xffffff00)) == 0) { + /* very close to 3/2pi */ + x = p - PI3O2_L0; + return (x - PI3O2_L1); + } + z = x * x; + if (((ix - 0x4012D800) >> 9) == 0) { + /* |x-3/2pi|<2**-8 */ + w = (z * x) * (PP1 + z * PP2) - PI3O2_L; + } else { + w = (z * x) * ((P1 + z * P2) + (z * z) + * (P3 + z * P4)) - PI3O2_L; + } + return (p + w); + } + if (((j - 483) ^ (j - 503)) < 0) { + /* near 5pi/2, cos(5pi/2-x)=sin(x) */ + p = PI5O2_H - x; + i = ix - 0x401F6A7A; + x = p + PI5O2_L; + if ((i | ((lx - 0x29553800) & 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PI5O2_L0; + return (x + PI5O2_L1); + } + z = x * x; + if (((ix - 0x401F6A7A) >> 7) == 0) { + /* |pi/2-x|<2**-8 */ + w = PI5O2_L + (z * x) * (PP1 + z * PP2); + } else { + w = PI5O2_L + (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)); + } + return (p + w); + } + j <<= 1; + w = _TBL_sincos[j]; + z = _TBL_sincos[j+1]; + p = v + (v * s) * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + return (z - (w * p - z * q)); + } + + if (ix >= 0x7ff00000) /* cos(Inf or NaN) is NaN */ + return (x / x); + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n & 3) { + case 0: + return (__k_cos(y[0], y[1])); + case 1: + return (-__k_sin(y[0], y[1])); + case 2: + return (-__k_cos(y[0], y[1])); + default: + return (__k_sin(y[0], y[1])); + } +} diff --git a/usr/src/libm/src/C/cosh.c b/usr/src/libm/src/C/cosh.c new file mode 100644 index 0000000..f4d0532 --- /dev/null +++ b/usr/src/libm/src/C/cosh.c @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cosh.c 1.17 06/01/23 SMI" + +#pragma weak cosh = __cosh + +/* INDENT OFF */ +/* + * cosh(x) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Method : + * 1. Replace x by |x| (cosh(x) = cosh(-x)). + * 2. + * [ exp(x) - 1 ]^2 + * 0 <= x <= 0.3465 : cosh(x) := 1 + ------------------- + * 2*exp(x) + * + * exp(x) + 1/exp(x) + * 0.3465 <= x <= 22 : cosh(x) := ------------------- + * 2 + * 22 <= x <= lnovft : cosh(x) := exp(x)/2 + * lnovft <= x < INF : cosh(x) := scalbn(exp(x-1024*ln2),1023) + * + * Note: .3465 is a number near one half of ln2. + * + * Special cases: + * cosh(x) is |x| if x is +INF, -INF, or NaN. + * only cosh(0)=1 is exact for finite x. + */ +/* INDENT ON */ + +#include "libm.h" + +static const double + ln2 = 6.93147180559945286227e-01, + ln2hi = 6.93147180369123816490e-01, + ln2lo = 1.90821492927058770002e-10, + lnovft = 7.09782712893383973096e+02; + +double +cosh(double x) { + double t, w; + + w = fabs(x); + if (!finite(w)) + return (w * w); + if (w < 0.3465) { + t = expm1(w); + w = 1.0 + t; + if (w != 1.0) + w = 1.0 + (t * t) / (w + w); + return (w); + } else if (w < 22.0) { + t = exp(w); + return (0.5 * (t + 1.0 / t)); + } else if (w <= lnovft) { + return (0.5 * exp(w)); + } else { + w = (w - 1024 * ln2hi) - 1024 * ln2lo; + if (w >= ln2) + return (_SVID_libm_err(x, x, 5)); + else + return (scalbn(exp(w), 1023)); + } +} diff --git a/usr/src/libm/src/C/erf.c b/usr/src/libm/src/C/erf.c new file mode 100644 index 0000000..f4c680d --- /dev/null +++ b/usr/src/libm/src/C/erf.c @@ -0,0 +1,435 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)erf.c 1.17 06/01/31 SMI" + +#pragma weak erf = __erf +#pragma weak erfc = __erfc + +/* INDENT OFF */ +/* double erf(double x) + * double erfc(double x) + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * Note that + * erf(-x) = -erf(x) + * erfc(-x) = 2 - erfc(x) + * + * Method: + * 1. For |x| in [0, 0.84375] + * erf(x) = x + x*R(x^2) + * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] + * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] + * where R = P/Q where P is an odd poly of degree 8 and + * Q is an odd poly of degree 10. + * -57.90 + * | R - (erf(x)-x)/x | <= 2 + * + * + * Remark. The formula is derived by noting + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) + * and that + * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 + * is close to one. The interval is chosen because the fix + * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is + * near 0.6174), and by some experiment, 0.84375 is chosen to + * guarantee the error is less than one ulp for erf. + * + * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = sign(x) * (c + P1(s)/Q1(s)) + * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 + * 1+(c+P1(s)/Q1(s)) if x < 0 + * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 + * Remark: here we use the taylor series expansion at x=1. + * erf(1+s) = erf(1) + s*Poly(s) + * = 0.845.. + P1(s)/Q1(s) + * That is, we use rational approximation to approximate + * erf(1+s) - (c = (single)0.84506291151) + * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] + * where + * P1(s) = degree 6 poly in s + * Q1(s) = degree 6 poly in s + * + * 3. For x in [1.25,1/0.35(~2.857143)], + * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) + * erf(x) = 1 - erfc(x) + * where + * R1(z) = degree 7 poly in z, (z=1/x^2) + * S1(z) = degree 8 poly in z + * + * 4. For x in [1/0.35,28] + * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 + * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 + * erf(x) = sign(x) *(1 - tiny) (raise inexact) + * erfc(x) = tiny*tiny (raise underflow) if x > 0 + * = 2 - tiny if x<0 + * + * 7. Special case: + * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, + * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, + * erfc/erf(NaN) is NaN + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __erf, __erfc, __exp */ +#include + +static const double xxx[] = { +/* tiny */ 1e-300, +/* half */ 5.00000000000000000000e-01, /* 3FE00000, 00000000 */ +/* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ +/* two */ 2.00000000000000000000e+00, /* 40000000, 00000000 */ +/* erx */ 8.45062911510467529297e-01, /* 3FEB0AC1, 60000000 */ +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +/* efx */ 1.28379167095512586316e-01, /* 3FC06EBA, 8214DB69 */ +/* efx8 */ 1.02703333676410069053e+00, /* 3FF06EBA, 8214DB69 */ +/* pp0 */ 1.28379167095512558561e-01, /* 3FC06EBA, 8214DB68 */ +/* pp1 */ -3.25042107247001499370e-01, /* BFD4CD7D, 691CB913 */ +/* pp2 */ -2.84817495755985104766e-02, /* BF9D2A51, DBD7194F */ +/* pp3 */ -5.77027029648944159157e-03, /* BF77A291, 236668E4 */ +/* pp4 */ -2.37630166566501626084e-05, /* BEF8EAD6, 120016AC */ +/* qq1 */ 3.97917223959155352819e-01, /* 3FD97779, CDDADC09 */ +/* qq2 */ 6.50222499887672944485e-02, /* 3FB0A54C, 5536CEBA */ +/* qq3 */ 5.08130628187576562776e-03, /* 3F74D022, C4D36B0F */ +/* qq4 */ 1.32494738004321644526e-04, /* 3F215DC9, 221C1A10 */ +/* qq5 */ -3.96022827877536812320e-06, /* BED09C43, 42A26120 */ +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +/* pa0 */ -2.36211856075265944077e-03, /* BF6359B8, BEF77538 */ +/* pa1 */ 4.14856118683748331666e-01, /* 3FDA8D00, AD92B34D */ +/* pa2 */ -3.72207876035701323847e-01, /* BFD7D240, FBB8C3F1 */ +/* pa3 */ 3.18346619901161753674e-01, /* 3FD45FCA, 805120E4 */ +/* pa4 */ -1.10894694282396677476e-01, /* BFBC6398, 3D3E28EC */ +/* pa5 */ 3.54783043256182359371e-02, /* 3FA22A36, 599795EB */ +/* pa6 */ -2.16637559486879084300e-03, /* BF61BF38, 0A96073F */ +/* qa1 */ 1.06420880400844228286e-01, /* 3FBB3E66, 18EEE323 */ +/* qa2 */ 5.40397917702171048937e-01, /* 3FE14AF0, 92EB6F33 */ +/* qa3 */ 7.18286544141962662868e-02, /* 3FB2635C, D99FE9A7 */ +/* qa4 */ 1.26171219808761642112e-01, /* 3FC02660, E763351F */ +/* qa5 */ 1.36370839120290507362e-02, /* 3F8BEDC2, 6B51DD1C */ +/* qa6 */ 1.19844998467991074170e-02, /* 3F888B54, 5735151D */ +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +/* ra0 */ -9.86494403484714822705e-03, /* BF843412, 600D6435 */ +/* ra1 */ -6.93858572707181764372e-01, /* BFE63416, E4BA7360 */ +/* ra2 */ -1.05586262253232909814e+01, /* C0251E04, 41B0E726 */ +/* ra3 */ -6.23753324503260060396e+01, /* C04F300A, E4CBA38D */ +/* ra4 */ -1.62396669462573470355e+02, /* C0644CB1, 84282266 */ +/* ra5 */ -1.84605092906711035994e+02, /* C067135C, EBCCABB2 */ +/* ra6 */ -8.12874355063065934246e+01, /* C0545265, 57E4D2F2 */ +/* ra7 */ -9.81432934416914548592e+00, /* C023A0EF, C69AC25C */ +/* sa1 */ 1.96512716674392571292e+01, /* 4033A6B9, BD707687 */ +/* sa2 */ 1.37657754143519042600e+02, /* 4061350C, 526AE721 */ +/* sa3 */ 4.34565877475229228821e+02, /* 407B290D, D58A1A71 */ +/* sa4 */ 6.45387271733267880336e+02, /* 40842B19, 21EC2868 */ +/* sa5 */ 4.29008140027567833386e+02, /* 407AD021, 57700314 */ +/* sa6 */ 1.08635005541779435134e+02, /* 405B28A3, EE48AE2C */ +/* sa7 */ 6.57024977031928170135e+00, /* 401A47EF, 8E484A93 */ +/* sa8 */ -6.04244152148580987438e-02, /* BFAEEFF2, EE749A62 */ +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +/* rb0 */ -9.86494292470009928597e-03, /* BF843412, 39E86F4A */ +/* rb1 */ -7.99283237680523006574e-01, /* BFE993BA, 70C285DE */ +/* rb2 */ -1.77579549177547519889e+01, /* C031C209, 555F995A */ +/* rb3 */ -1.60636384855821916062e+02, /* C064145D, 43C5ED98 */ +/* rb4 */ -6.37566443368389627722e+02, /* C083EC88, 1375F228 */ +/* rb5 */ -1.02509513161107724954e+03, /* C0900461, 6A2E5992 */ +/* rb6 */ -4.83519191608651397019e+02, /* C07E384E, 9BDC383F */ +/* sb1 */ 3.03380607434824582924e+01, /* 403E568B, 261D5190 */ +/* sb2 */ 3.25792512996573918826e+02, /* 40745CAE, 221B9F0A */ +/* sb3 */ 1.53672958608443695994e+03, /* 409802EB, 189D5118 */ +/* sb4 */ 3.19985821950859553908e+03, /* 40A8FFB7, 688C246A */ +/* sb5 */ 2.55305040643316442583e+03, /* 40A3F219, CEDF3BE6 */ +/* sb6 */ 4.74528541206955367215e+02, /* 407DA874, E79FE763 */ +/* sb7 */ -2.24409524465858183362e+01 /* C03670E2, 42712D62 */ +}; + +#define tiny xxx[0] +#define half xxx[1] +#define one xxx[2] +#define two xxx[3] +#define erx xxx[4] +/* + * Coefficients for approximation to erf on [0,0.84375] + */ +#define efx xxx[5] +#define efx8 xxx[6] +#define pp0 xxx[7] +#define pp1 xxx[8] +#define pp2 xxx[9] +#define pp3 xxx[10] +#define pp4 xxx[11] +#define qq1 xxx[12] +#define qq2 xxx[13] +#define qq3 xxx[14] +#define qq4 xxx[15] +#define qq5 xxx[16] +/* + * Coefficients for approximation to erf in [0.84375,1.25] + */ +#define pa0 xxx[17] +#define pa1 xxx[18] +#define pa2 xxx[19] +#define pa3 xxx[20] +#define pa4 xxx[21] +#define pa5 xxx[22] +#define pa6 xxx[23] +#define qa1 xxx[24] +#define qa2 xxx[25] +#define qa3 xxx[26] +#define qa4 xxx[27] +#define qa5 xxx[28] +#define qa6 xxx[29] +/* + * Coefficients for approximation to erfc in [1.25,1/0.35] + */ +#define ra0 xxx[30] +#define ra1 xxx[31] +#define ra2 xxx[32] +#define ra3 xxx[33] +#define ra4 xxx[34] +#define ra5 xxx[35] +#define ra6 xxx[36] +#define ra7 xxx[37] +#define sa1 xxx[38] +#define sa2 xxx[39] +#define sa3 xxx[40] +#define sa4 xxx[41] +#define sa5 xxx[42] +#define sa6 xxx[43] +#define sa7 xxx[44] +#define sa8 xxx[45] +/* + * Coefficients for approximation to erfc in [1/.35,28] + */ +#define rb0 xxx[46] +#define rb1 xxx[47] +#define rb2 xxx[48] +#define rb3 xxx[49] +#define rb4 xxx[50] +#define rb5 xxx[51] +#define rb6 xxx[52] +#define sb1 xxx[53] +#define sb2 xxx[54] +#define sb3 xxx[55] +#define sb4 xxx[56] +#define sb5 xxx[57] +#define sb6 xxx[58] +#define sb7 xxx[59] + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +erf(double x) { + int hx, ix, i; + double R, S, P, Q, s, y, z, r; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) { /* erf(nan)=nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */ + return x; +#endif + i = ((unsigned) hx >> 31) << 1; + return (double) (1 - i) + one / x; /* erf(+-inf)=+-1 */ + } + + if (ix < 0x3feb0000) { /* |x|<0.84375 */ + if (ix < 0x3e300000) { /* |x|<2**-28 */ + if (ix < 0x00800000) /* avoid underflow */ + return 0.125 * (8.0 * x + efx8 * x); + return x + efx * x; + } + z = x * x; + r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4))); + s = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + y = r / s; + return x + x * y; + } + if (ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x) - one; + P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + + s * (pa5 + s * pa6))))); + Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + + s * (qa5 + s * qa6))))); + if (hx >= 0) + return erx + P / Q; + else + return -erx - P / Q; + } + if (ix >= 0x40180000) { /* inf > |x| >= 6 */ + if (hx >= 0) + return one - tiny; + else + return tiny - one; + } + x = fabs(x); + s = one / (x * x); + if (ix < 0x4006DB6E) { /* |x| < 1/0.35 */ + R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + + s * (ra5 + s * (ra6 + s * ra7)))))); + S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + + s * (sa5 + s * (sa6 + s * (sa7 + s * sa8))))))); + } + else { /* |x| >= 1/0.35 */ + R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + + s * (rb5 + s * rb6))))); + S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + + s * (sb5 + s * (sb6 + s * sb7)))))); + } + z = x; + ((int *) &z)[LOWORD] = 0; + r = exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S); + if (hx >= 0) + return one - r / x; + else + return r / x - one; +} + +double +erfc(double x) { + int hx, ix; + double R, S, P, Q, s, y, z, r; + + hx = ((int *) &x)[HIWORD]; + ix = hx & 0x7fffffff; + if (ix >= 0x7ff00000) { /* erfc(nan)=nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */ + return x; +#endif + /* erfc(+-inf)=0,2 */ + return (double) (((unsigned) hx >> 31) << 1) + one / x; + } + + if (ix < 0x3feb0000) { /* |x| < 0.84375 */ + if (ix < 0x3c700000) /* |x| < 2**-56 */ + return one - x; + z = x * x; + r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4))); + s = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + y = r / s; + if (hx < 0x3fd00000) { /* x < 1/4 */ + return one - (x + x * y); + } + else { + r = x * y; + r += (x - half); + return half - r; + } + } + if (ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */ + s = fabs(x) - one; + P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + + s * (pa5 + s * pa6))))); + Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + + s * (qa5 + s * qa6))))); + if (hx >= 0) { + z = one - erx; + return z - P / Q; + } + else { + z = erx + P / Q; + return one + z; + } + } + if (ix < 0x403c0000) { /* |x|<28 */ + x = fabs(x); + s = one / (x * x); + if (ix < 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143 */ + R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + + s * (ra5 + s * (ra6 + s * ra7)))))); + S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + + s * (sa5 + s * (sa6 + s * (sa7 + s * sa8))))))); + } + else { /* |x| >= 1/.35 ~ 2.857143 */ + if (hx < 0 && ix >= 0x40180000) + return two - tiny; /* x < -6 */ + R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + + s * (rb5 + s * rb6))))); + S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + + s * (sb5 + s * (sb6 + s * sb7)))))); + } + z = x; + ((int *) &z)[LOWORD] = 0; + r = exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S); + if (hx > 0) + return r / x; + else + return two - r / x; + } + else { + if (hx > 0) + return tiny * tiny; + else + return two - tiny; + } +} diff --git a/usr/src/libm/src/C/exp.c b/usr/src/libm/src/C/exp.c new file mode 100644 index 0000000..f090009 --- /dev/null +++ b/usr/src/libm/src/C/exp.c @@ -0,0 +1,356 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp.c 1.25 06/01/24 SMI" + +#pragma weak exp = __exp + +/* + * exp(x) + * Hybrid algorithm of Peter Tang's Table driven method (for large + * arguments) and an accurate table (for small arguments). + * Written by K.C. Ng, November 1988. + * Method (large arguments): + * 1. Argument Reduction: given the input x, find r and integer k + * and j such that + * x = (k+j/32)*(ln2) + r, |r| <= (1/64)*ln2 + * + * 2. exp(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r)) + * a. expm1(r) is approximated by a polynomial: + * expm1(r) ~ r + t1*r^2 + t2*r^3 + ... + t5*r^6 + * Here t1 = 1/2 exactly. + * b. 2^(j/32) is represented to twice double precision + * as TBL[2j]+TBL[2j+1]. + * + * Note: If divide were fast enough, we could use another approximation + * in 2.a: + * expm1(r) ~ (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) + * (for the same t1 and t2 as above) + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF)= 0; + * for finite argument, only exp(0)=1 is exact. + * + * Accuracy: + * According to an error analysis, the error is always less than + * an ulp (unit in the last place). The largest errors observed + * are less than 0.55 ulp for normal results and less than 0.75 ulp + * for subnormal results. + * + * Misc. info. + * For IEEE double + * if x > 7.09782712893383973096e+02 then exp(x) overflow + * if x < -7.45133219101941108420e+02 then exp(x) underflow + */ + +#include "libm.h" + +static const double TBL[] = { + 1.00000000000000000000e+00, 0.00000000000000000000e+00, + 1.02189714865411662714e+00, 5.10922502897344389359e-17, + 1.04427378242741375480e+00, 8.55188970553796365958e-17, + 1.06714040067682369717e+00, -7.89985396684158212226e-17, + 1.09050773266525768967e+00, -3.04678207981247114697e-17, + 1.11438674259589243221e+00, 1.04102784568455709549e-16, + 1.13878863475669156458e+00, 8.91281267602540777782e-17, + 1.16372485877757747552e+00, 3.82920483692409349872e-17, + 1.18920711500272102690e+00, 3.98201523146564611098e-17, + 1.21524735998046895524e+00, -7.71263069268148813091e-17, + 1.24185781207348400201e+00, 4.65802759183693679123e-17, + 1.26905095719173321989e+00, 2.66793213134218609523e-18, + 1.29683955465100964055e+00, 2.53825027948883149593e-17, + 1.32523664315974132322e+00, -2.85873121003886075697e-17, + 1.35425554693689265129e+00, 7.70094837980298946162e-17, + 1.38390988196383202258e+00, -6.77051165879478628716e-17, + 1.41421356237309514547e+00, -9.66729331345291345105e-17, + 1.44518080697704665027e+00, -3.02375813499398731940e-17, + 1.47682614593949934623e+00, -3.48399455689279579579e-17, + 1.50916442759342284141e+00, -1.01645532775429503911e-16, + 1.54221082540794074411e+00, 7.94983480969762085616e-17, + 1.57598084510788649659e+00, -1.01369164712783039808e-17, + 1.61049033194925428347e+00, 2.47071925697978878522e-17, + 1.64575547815396494578e+00, -1.01256799136747726038e-16, + 1.68179283050742900407e+00, 8.19901002058149652013e-17, + 1.71861929812247793414e+00, -1.85138041826311098821e-17, + 1.75625216037329945351e+00, 2.96014069544887330703e-17, + 1.79470907500310716820e+00, 1.82274584279120867698e-17, + 1.83400808640934243066e+00, 3.28310722424562658722e-17, + 1.87416763411029996256e+00, -6.12276341300414256164e-17, + 1.91520656139714740007e+00, -1.06199460561959626376e-16, + 1.95714412417540017941e+00, 8.96076779103666776760e-17, +}; + +/* + * For i = 0, ..., 66, + * TBL2[2*i] is a double precision number near (i+1)*2^-6, and + * TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less + * than 2^-60. + * + * For i = 67, ..., 133, + * TBL2[2*i] is a double precision number near -(i+1)*2^-6, and + * TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less + * than 2^-60. + */ +static const double TBL2[] = { + 1.56249999999984491572e-02, 1.01574770858668417262e+00, + 3.12499999999998716305e-02, 1.03174340749910253834e+00, + 4.68750000000011102230e-02, 1.04799100201663386578e+00, + 6.24999999999990632493e-02, 1.06449445891785843266e+00, + 7.81249999999999444888e-02, 1.08125780744903954300e+00, + 9.37500000000013322676e-02, 1.09828514030782731226e+00, + 1.09375000000001346145e-01, 1.11558061464248226002e+00, + 1.24999999999999417133e-01, 1.13314845306682565607e+00, + 1.40624999999995337063e-01, 1.15099294469117108264e+00, + 1.56249999999996141975e-01, 1.16911844616949989195e+00, + 1.71874999999992894573e-01, 1.18752938276309216725e+00, + 1.87500000000000888178e-01, 1.20623024942098178158e+00, + 2.03124999999361649516e-01, 1.22522561187652545556e+00, + 2.18750000000000416334e-01, 1.24452010776609567344e+00, + 2.34375000000003524958e-01, 1.26411844775347081971e+00, + 2.50000000000006328271e-01, 1.28402541668774961003e+00, + 2.65624999999982791543e-01, 1.30424587476761533189e+00, + 2.81249999999993727240e-01, 1.32478475872885725906e+00, + 2.96875000000003275158e-01, 1.34564708304941493822e+00, + 3.12500000000002886580e-01, 1.36683794117380030819e+00, + 3.28124999999993394173e-01, 1.38836250675661765364e+00, + 3.43749999999998612221e-01, 1.41022603492570874906e+00, + 3.59374999999992450483e-01, 1.43243386356506730017e+00, + 3.74999999999991395772e-01, 1.45499141461818881638e+00, + 3.90624999999997613020e-01, 1.47790419541173490003e+00, + 4.06249999999991895372e-01, 1.50117780000011058483e+00, + 4.21874999999996613820e-01, 1.52481791053132154090e+00, + 4.37500000000004607426e-01, 1.54883029863414023453e+00, + 4.53125000000004274359e-01, 1.57322082682725961078e+00, + 4.68750000000008326673e-01, 1.59799544995064657371e+00, + 4.84374999999985456078e-01, 1.62316021661928200359e+00, + 4.99999999999997335465e-01, 1.64872127070012375327e+00, + 5.15625000000000222045e-01, 1.67468485281178436352e+00, + 5.31250000000003441691e-01, 1.70105730184840653330e+00, + 5.46874999999999111822e-01, 1.72784505652716169344e+00, + 5.62499999999999333866e-01, 1.75505465696029738787e+00, + 5.78124999999993338662e-01, 1.78269274625180318417e+00, + 5.93749999999999666933e-01, 1.81076607211938656050e+00, + 6.09375000000003441691e-01, 1.83928148854178719063e+00, + 6.24999999999995559108e-01, 1.86824595743221411048e+00, + 6.40625000000009103829e-01, 1.89766655033813602671e+00, + 6.56249999999993782751e-01, 1.92755045016753268072e+00, + 6.71875000000002109424e-01, 1.95790495294292221651e+00, + 6.87499999999992450483e-01, 1.98873746958227681780e+00, + 7.03125000000004996004e-01, 2.02005552770870666635e+00, + 7.18750000000007105427e-01, 2.05186677348799140219e+00, + 7.34375000000008770762e-01, 2.08417897349558689513e+00, + 7.49999999999983901766e-01, 2.11700001661264058939e+00, + 7.65624999999997002398e-01, 2.15033791595229351046e+00, + 7.81250000000005884182e-01, 2.18420081081563077774e+00, + 7.96874999999991451283e-01, 2.21859696867912603579e+00, + 8.12500000000000000000e-01, 2.25353478721320854561e+00, + 8.28125000000008215650e-01, 2.28902279633221983346e+00, + 8.43749999999997890576e-01, 2.32506966027711614586e+00, + 8.59374999999999444888e-01, 2.36168417973090827289e+00, + 8.75000000000003219647e-01, 2.39887529396710563745e+00, + 8.90625000000013433699e-01, 2.43665208303232461162e+00, + 9.06249999999980571097e-01, 2.47502376996297712708e+00, + 9.21874999999984456878e-01, 2.51399972303748420188e+00, + 9.37500000000001887379e-01, 2.55358945806293169412e+00, + 9.53125000000003330669e-01, 2.59380264069854327147e+00, + 9.68749999999989119814e-01, 2.63464908881560244680e+00, + 9.84374999999997890576e-01, 2.67613877489447116176e+00, + 1.00000000000001154632e+00, 2.71828182845907662113e+00, + 1.01562499999999333866e+00, 2.76108853855008318234e+00, + 1.03124999999995980993e+00, 2.80456935623711389738e+00, + 1.04687499999999933387e+00, 2.84873489717039740654e+00, + -1.56249999999999514277e-02, 9.84496437005408453480e-01, + -3.12499999999955972718e-02, 9.69233234476348348707e-01, + -4.68749999999993824384e-02, 9.54206665969188905230e-01, + -6.24999999999976130205e-02, 9.39413062813478028090e-01, + -7.81249999999989314103e-02, 9.24848813216205822840e-01, + -9.37499999999995975442e-02, 9.10510361380034494161e-01, + -1.09374999999998584466e-01, 8.96394206635151680196e-01, + -1.24999999999998556710e-01, 8.82496902584596676355e-01, + -1.40624999999999361622e-01, 8.68815056262843721235e-01, + -1.56249999999999111822e-01, 8.55345327307423297647e-01, + -1.71874999999924144012e-01, 8.42084427143446223596e-01, + -1.87499999999996752598e-01, 8.29029118180403035154e-01, + -2.03124999999988037347e-01, 8.16176213022349550386e-01, + -2.18749999999995947686e-01, 8.03522573689063990265e-01, + -2.34374999999996419531e-01, 7.91065110850298847112e-01, + -2.49999999999996280753e-01, 7.78800783071407765057e-01, + -2.65624999999999888978e-01, 7.66726596070820165529e-01, + -2.81249999999989397370e-01, 7.54839601989015340777e-01, + -2.96874999999996114219e-01, 7.43136898668761203268e-01, + -3.12499999999999555911e-01, 7.31615628946642115871e-01, + -3.28124999999993782751e-01, 7.20272979955444259126e-01, + -3.43749999999997946087e-01, 7.09106182437399867879e-01, + -3.59374999999994337863e-01, 6.98112510068129799023e-01, + -3.74999999999994615418e-01, 6.87289278790975899369e-01, + -3.90624999999999000799e-01, 6.76633846161729612945e-01, + -4.06249999999947264406e-01, 6.66143610703522903727e-01, + -4.21874999999988453681e-01, 6.55816011271509125002e-01, + -4.37499999999999111822e-01, 6.45648526427892610613e-01, + -4.53124999999999278355e-01, 6.35638673826052436056e-01, + -4.68749999999999278355e-01, 6.25784009604591573428e-01, + -4.84374999999992894573e-01, 6.16082127790682609891e-01, + -4.99999999999998168132e-01, 6.06530659712634534486e-01, + -5.15625000000000000000e-01, 5.97127273421627413619e-01, + -5.31249999999989785948e-01, 5.87869673122352498496e-01, + -5.46874999999972688514e-01, 5.78755598612500032907e-01, + -5.62500000000000000000e-01, 5.69782824730923009859e-01, + -5.78124999999992339461e-01, 5.60949160814475100700e-01, + -5.93749999999948707696e-01, 5.52252450163048691500e-01, + -6.09374999999552580121e-01, 5.43690569513243682209e-01, + -6.24999999999984789945e-01, 5.35261428518998383375e-01, + -6.40624999999983457677e-01, 5.26962969243379708573e-01, + -6.56249999999998334665e-01, 5.18793165653890220312e-01, + -6.71874999999943378626e-01, 5.10750023129039609771e-01, + -6.87499999999997002398e-01, 5.02831577970942467104e-01, + -7.03124999999991118216e-01, 4.95035896926202978463e-01, + -7.18749999999991340260e-01, 4.87361076713623331269e-01, + -7.34374999999985678123e-01, 4.79805243559684402310e-01, + -7.49999999999997335465e-01, 4.72366552741015965911e-01, + -7.65624999999993782751e-01, 4.65043188134059204408e-01, + -7.81249999999863220523e-01, 4.57833361771676883301e-01, + -7.96874999999998112621e-01, 4.50735313406363247157e-01, + -8.12499999999990119015e-01, 4.43747310081084256339e-01, + -8.28124999999996003197e-01, 4.36867645705559026759e-01, + -8.43749999999988120614e-01, 4.30094640640067360504e-01, + -8.59374999999994115818e-01, 4.23426641285265303871e-01, + -8.74999999999977129406e-01, 4.16862019678517936594e-01, + -8.90624999999983346655e-01, 4.10399173096376801428e-01, + -9.06249999999991784350e-01, 4.04036523663345414903e-01, + -9.21874999999994004796e-01, 3.97772517966614058693e-01, + -9.37499999999994337863e-01, 3.91605626676801210628e-01, + -9.53124999999999444888e-01, 3.85534344174578935682e-01, + -9.68749999999986677324e-01, 3.79557188183094640355e-01, + -9.84374999999992339461e-01, 3.73672699406045860648e-01, + -9.99999999999995892175e-01, 3.67879441171443832825e-01, + -1.01562499999994315658e+00, 3.62175999080846300338e-01, + -1.03124999999991096011e+00, 3.56560980663978732697e-01, + -1.04687499999999067413e+00, 3.51033015038813400732e-01, +}; + +static const double C[] = { + 0.5, + 4.61662413084468283841e+01, /* 0x40471547, 0x652b82fe */ + 2.16608493865351192653e-02, /* 0x3f962e42, 0xfee00000 */ + 5.96317165397058656257e-12, /* 0x3d9a39ef, 0x35793c76 */ + 1.6666666666526086527e-1, /* 3fc5555555548f7c */ + 4.1666666666226079285e-2, /* 3fa5555555545d4e */ + 8.3333679843421958056e-3, /* 3f811115b7aa905e */ + 1.3888949086377719040e-3, /* 3f56c1728d739765 */ + 1.0, + 0.0, + 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ + 7.45133219101941108420e+02, /* 0x40874910, 0xD52D3051 */ + 5.55111512312578270212e-17, /* 0x3c900000, 0x00000000 */ +}; + +#define half C[0] +#define invln2_32 C[1] +#define ln2_32hi C[2] +#define ln2_32lo C[3] +#define t2 C[4] +#define t3 C[5] +#define t4 C[6] +#define t5 C[7] +#define one C[8] +#define zero C[9] +#define threshold1 C[10] +#define threshold2 C[11] +#define twom54 C[12] + +double +exp(double x) { + double y, z, t; + int hx, ix, k, j, m; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx < 0x3ff0a2b2) { /* |x| < 3/2 ln 2 */ + if (hx < 0x3f862e42) { /* |x| < 1/64 ln 2 */ + if (hx < 0x3ed00000) { /* |x| < 2^-18 */ + volatile int dummy; + + dummy = (int)x; /* raise inexact if x != 0 */ +#ifdef lint + dummy = dummy; +#endif + if (hx < 0x3e300000) + return (one + x); + return (one + x * (one + half * x)); + } + t = x * x; + y = x + (t * (half + x * t2) + + (t * t) * (t3 + x * t4 + t * t5)); + return (one + y); + } + + /* find the multiple of 2^-6 nearest x */ + k = hx >> 20; + j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k); + j = (j - 1) & ~1; + if (ix < 0) + j += 134; + z = x - TBL2[j]; + t = z * z; + y = z + (t * (half + z * t2) + + (t * t) * (t3 + z * t4 + t * t5)); + return (TBL2[j+1] + TBL2[j+1] * y); + } + + if (hx >= 0x40862e42) { /* x is large, infinite, or nan */ + if (hx >= 0x7ff00000) { + if (ix == 0xfff00000 && ((int *)&x)[LOWORD] == 0) + return (zero); + return (x * x); + } + if (x > threshold1) + return (_SVID_libm_err(x, x, 6)); + if (-x > threshold2) + return (_SVID_libm_err(x, x, 7)); + } + + t = invln2_32 * x; + if (ix < 0) + t -= half; + else + t += half; + k = (int)t; + j = (k & 0x1f) << 1; + m = k >> 5; + z = (x - k * ln2_32hi) - k * ln2_32lo; + + /* z is now in primary range */ + t = z * z; + y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5)); + y = TBL[j] + (TBL[j+1] + TBL[j] * y); + if (m < -1021) { + ((int *)&y)[HIWORD] += (m + 54) << 20; + return (twom54 * y); + } + ((int *)&y)[HIWORD] += m << 20; + return (y); +} diff --git a/usr/src/libm/src/C/exp10.c b/usr/src/libm/src/C/exp10.c new file mode 100644 index 0000000..e7fecb1 --- /dev/null +++ b/usr/src/libm/src/C/exp10.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp10.c 1.15 06/01/23 SMI" + +#pragma weak exp10 = __exp10 + +/* INDENT OFF */ +/* + * exp10(x) + * Code by K.C. Ng for SUN 4.0 libm. + * Method : + * n = nint(x*(log10/log2)); + * exp10(x) = 10**x = exp(x*ln(10)) = exp(n*ln2+(x*ln10-n*ln2)) + * = 2**n*exp(ln10*(x-n*log2/log10))) + * If x is an integer < 23 then use repeat multiplication. For + * 10**22 is the largest representable integer. + */ +/* INDENT ON */ + +#include "libm.h" + +static const double C[] = { + 3.3219280948736234787, /* log(10)/log(2) */ + 2.3025850929940456840, /* log(10) */ + 3.0102999565860955045E-1, /* log(2)/log(10) high */ + 5.3716447674669983622E-12, /* log(2)/log(10) low */ + 0.0, + 0.5, + 1.0, + 10.0, + 1.0e300, + 1.0e-300, +}; + +#define lg10 C[0] +#define ln10 C[1] +#define logt2hi C[2] +#define logt2lo C[3] +#define zero C[4] +#define half C[5] +#define one C[6] +#define ten C[7] +#define huge C[8] +#define tiny C[9] + +double +exp10(double x) { + double t, pt; + int ix, hx, k; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx >= 0x4074a000) { /* |x| >= 330 or x is nan */ + if (hx >= 0x7ff00000) { /* x is inf or nan */ + if (ix == 0xfff00000 && ((int *)&x)[LOWORD] == 0) + return (zero); + return (x * x); + } + t = (ix < 0)? tiny : huge; + return (t * t); + } + + if (hx < 0x3c000000) + return (one + x); + + k = (int)x; + if (0 <= k && k < 23 && (double)k == x) { + /* x is a small positive integer */ + t = one; + pt = ten; + if (k & 1) + t = ten; + k >>= 1; + while (k) { + pt *= pt; + if (k & 1) + t *= pt; + k >>= 1; + } + return (t); + } + t = x * lg10; + k = (int)((ix < 0)? t - half : t + half); + return (scalbn(exp(ln10 * ((x - k * logt2hi) - k * logt2lo)), k)); +} diff --git a/usr/src/libm/src/C/exp2.c b/usr/src/libm/src/C/exp2.c new file mode 100644 index 0000000..b85dbf6 --- /dev/null +++ b/usr/src/libm/src/C/exp2.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp2.c 1.17 06/01/23 SMI" + +#pragma weak exp2 = __exp2 + +/* INDENT OFF */ +/* + * exp2(x) + * Code by K.C. Ng for SUN 4.0 libm. + * Method : + * exp2(x) = 2**x = 2**((x-anint(x))+anint(x)) + * = 2**anint(x)*2**(x-anint(x)) + * = 2**anint(x)*exp((x-anint(x))*ln2) + */ +/* INDENT ON */ + +#include "libm.h" + +static const double C[] = { + 0.0, + 1.0, + 0.5, + 6.93147180559945286227e-01, + 1.0e300, + 1.0e-300, +}; + +#define zero C[0] +#define one C[1] +#define half C[2] +#define ln2 C[3] +#define huge C[4] +#define tiny C[5] + +double +exp2(double x) { + int ix, hx, k; + double t; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx >= 0x4090e000) { /* |x| >= 1080 or x is nan */ + if (hx >= 0x7ff00000) { /* x is inf or nan */ + if (ix == 0xfff00000 && ((int *)&x)[LOWORD] == 0) + return (zero); + return (x * x); + } + t = (ix < 0)? tiny : huge; + return (t * t); + } + + if (hx < 0x3fe00000) { /* |x| < 0.5 */ + if (hx < 0x3c000000) + return (one + x); + return (exp(ln2 * x)); + } + + k = (int)x; + if (x != (double)k) + k = (int)((ix < 0)? x - half : x + half); + return (scalbn(exp(ln2 * (x - (double)k)), k)); +} diff --git a/usr/src/libm/src/C/expm1.c b/usr/src/libm/src/C/expm1.c new file mode 100644 index 0000000..f8daddb --- /dev/null +++ b/usr/src/libm/src/C/expm1.c @@ -0,0 +1,270 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)expm1.c 2.5 06/01/31 SMI" + +#pragma weak expm1 = __expm1 + +/* INDENT OFF */ +/* expm1(x) + * Returns exp(x)-1, the exponential of x minus 1. + * + * Method + * 1. Arugment reduction: + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 + * + * Here a correction term c will be computed to compensate + * the error in r when rounded to a floating-point number. + * + * 2. Approximating expm1(r) by a special rational function on + * the interval [0,0.34658]: + * Since + * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ... + * we define R1(r*r) by + * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r) + * That is, + * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) + * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) + * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ... + * We use a special Reme algorithm on [0,0.347] to generate + * a polynomial of degree 5 in r*r to approximate R1. The + * maximum error of this polynomial approximation is bounded + * by 2**-61. In other words, + * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 + * where Q1 = -1.6666666666666567384E-2, + * Q2 = 3.9682539681370365873E-4, + * Q3 = -9.9206344733435987357E-6, + * Q4 = 2.5051361420808517002E-7, + * Q5 = -6.2843505682382617102E-9; + * (where z=r*r, and the values of Q1 to Q5 are listed below) + * with error bounded by + * | 5 | -61 + * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 + * | | + * + * expm1(r) = exp(r)-1 is then computed by the following + * specific way which minimize the accumulation rounding error: + * 2 3 + * r r [ 3 - (R1 + R1*r/2) ] + * expm1(r) = r + --- + --- * [--------------------] + * 2 2 [ 6 - r*(3 - R1*r/2) ] + * + * To compensate the error in the argument reduction, we use + * expm1(r+c) = expm1(r) + c + expm1(r)*c + * ~ expm1(r) + c + r*c + * Thus c+r*c will be added in as the correction terms for + * expm1(r+c). Now rearrange the term to avoid optimization + * screw up: + * ( 2 2 ) + * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) + * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) + * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) + * ( ) + * + * = r - E + * 3. Scale back to obtain expm1(x): + * From step 1, we have + * expm1(x) = either 2^k*[expm1(r)+1] - 1 + * = or 2^k*[expm1(r) + (1-2^-k)] + * 4. Implementation notes: + * (A). To save one multiplication, we scale the coefficient Qi + * to Qi*2^i, and replace z by (x^2)/2. + * (B). To achieve maximum accuracy, we compute expm1(x) by + * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) + * (ii) if k=0, return r-E + * (iii) if k=-1, return 0.5*(r-E)-0.5 + * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) + * else return 1.0+2.0*(r-E); + * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1) + * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else + * (vii) return 2^k(1-((E+2^-k)-r)) + * + * Special cases: + * expm1(INF) is INF, expm1(NaN) is NaN; + * expm1(-INF) is -1, and + * for finite argument, only expm1(0)=0 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 7.09782712893383973096e+02 then expm1(x) overflow + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ +/* INDENT ON */ + +#include "libm_synonyms.h" /* __expm1 */ +#include + +static const double xxx[] = { +/* one */ 1.0, +/* huge */ 1.0e+300, +/* tiny */ 1.0e-300, +/* o_threshold */ 7.09782712893383973096e+02, /* 40862E42 FEFA39EF */ +/* ln2_hi */ 6.93147180369123816490e-01, /* 3FE62E42 FEE00000 */ +/* ln2_lo */ 1.90821492927058770002e-10, /* 3DEA39EF 35793C76 */ +/* invln2 */ 1.44269504088896338700e+00, /* 3FF71547 652B82FE */ +/* scaled coefficients related to expm1 */ +/* Q1 */ -3.33333333333331316428e-02, /* BFA11111 111110F4 */ +/* Q2 */ 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ +/* Q3 */ -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ +/* Q4 */ 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ +/* Q5 */ -2.01099218183624371326e-07 /* BE8AFDB7 6E09C32D */ +}; +#define one xxx[0] +#define huge xxx[1] +#define tiny xxx[2] +#define o_threshold xxx[3] +#define ln2_hi xxx[4] +#define ln2_lo xxx[5] +#define invln2 xxx[6] +#define Q1 xxx[7] +#define Q2 xxx[8] +#define Q3 xxx[9] +#define Q4 xxx[10] +#define Q5 xxx[11] + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +expm1(double x) { + double y, hi, lo, c, t, e, hxs, hfx, r1; + int k, xsb; + unsigned hx; + + hx = ((unsigned *) &x)[HIWORD]; /* high word of x */ + xsb = hx & 0x80000000; /* sign bit of x */ + if (xsb == 0) + y = x; + else + y = -x; /* y = |x| */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out huge and non-finite arugment */ + if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */ + if (hx >= 0x40862E42) { /* if |x|>=709.78... */ + if (hx >= 0x7ff00000) { + if (((hx & 0xfffff) | ((int *) &x)[LOWORD]) + != 0) + return x * x; /* + -> * for Cheetah */ + else + return xsb == 0 ? x : -1.0; /* exp(+-inf)={inf,-1} */ + } + if (x > o_threshold) + return huge * huge; /* overflow */ + } + if (xsb != 0) { /* x < -56*ln2, return -1.0 w/inexact */ + if (x + tiny < 0.0) /* raise inexact */ + return tiny - one; /* return -1 */ + } + } + + /* argument reduction */ + if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ + if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ + if (xsb == 0) { + hi = x - ln2_hi; + lo = ln2_lo; + k = 1; + } + else { + hi = x + ln2_hi; + lo = -ln2_lo; + k = -1; + } + } + else { + k = (int) (invln2 * x + (xsb == 0 ? 0.5 : -0.5)); + t = k; + hi = x - t * ln2_hi; /* t*ln2_hi is exact here */ + lo = t * ln2_lo; + } + x = hi - lo; + c = (hi - x) - lo; + } + else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */ + t = huge + x; /* return x w/inexact when x != 0 */ + return x - (t - (huge + x)); + } + else + k = 0; + + /* x is now in primary range */ + hfx = 0.5 * x; + hxs = x * hfx; + r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); + t = 3.0 - r1 * hfx; + e = hxs * ((r1 - t) / (6.0 - x * t)); + if (k == 0) + return x - (x * e - hxs); /* c is 0 */ + else { + e = (x * (e - c) - c); + e -= hxs; + if (k == -1) + return 0.5 * (x - e) - 0.5; + if (k == 1) + if (x < -0.25) + return -2.0 * (e - (x + 0.5)); + else + return one + 2.0 * (x - e); + if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */ + y = one - (e - x); + ((int *) &y)[HIWORD] += k << 20; + return y - one; + } + t = one; + if (k < 20) { + ((int *) &t)[HIWORD] = 0x3ff00000 - (0x200000 >> k); + /* t = 1 - 2^-k */ + y = t - (e - x); + ((int *) &y)[HIWORD] += k << 20; + } + else { + ((int *) &t)[HIWORD] = (0x3ff - k) << 20; /* 2^-k */ + y = x - (e + t); + y += one; + ((int *) &y)[HIWORD] += k << 20; + } + } + return y; +} diff --git a/usr/src/libm/src/C/fabs.c b/usr/src/libm/src/C/fabs.c new file mode 100644 index 0000000..1c8f733 --- /dev/null +++ b/usr/src/libm/src/C/fabs.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fabs.c 1.13 06/01/31 SMI" + +#pragma weak fabs = __fabs + +#include "libm.h" +#include "libm_synonyms.h" +#include + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +double +fabs(double x) { + int *px = (int *) &x; + + px[HIWORD] &= ~0x80000000; + return x; +} diff --git a/usr/src/libm/src/C/floor.c b/usr/src/libm/src/C/floor.c new file mode 100644 index 0000000..b5a84e7 --- /dev/null +++ b/usr/src/libm/src/C/floor.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)floor.c 1.10 06/01/23 SMI" + +#pragma weak floor = __floor + +/* + * floor(x) returns the biggest integral value less than or equal to x. + * NOTE: floor(x) returns result with the same sign as x's, including 0. + * + * Modified 8/4/04 for performance. + */ + +#include "libm.h" + +static const double + zero = 0.0, + one = 1.0, + two52 = 4503599627370496.0; + +double +floor(double x) { + double t, w; + int hx, lx, ix; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + if (ix >= 0x43300000) /* return x if |x| >= 2^52, or x is NaN */ + return (x * one); + t = (hx >= 0)? two52 : -two52; + w = x + t; + t = w - t; + if (ix < 0x3ff00000) { + if ((ix | lx) == 0) + return (x); + else + return ((hx < 0)? -one : zero); + } + return ((t <= x)? t : t - one); +} diff --git a/usr/src/libm/src/C/fmod.c b/usr/src/libm/src/C/fmod.c new file mode 100644 index 0000000..20c8702 --- /dev/null +++ b/usr/src/libm/src/C/fmod.c @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmod.c 1.20 06/01/24 SMI" + +#pragma weak fmod = __fmod + +#include "libm.h" + +static const double zero = 0.0; + +/* + * The following implementation assumes fast 64-bit integer arith- + * metic. This is fine for sparc because we build libm in v8plus + * mode. It's also fine for sparcv9 and amd64, although we have + * assembly code on amd64. For x86, it would be better to use + * 32-bit code, but we have assembly for x86, too. + */ +double +fmod(double x, double y) { + double w; + long long hx, ix, iy, iz; + int nd, k, ny; + + hx = *(long long *)&x; + ix = hx & ~0x8000000000000000ull; + iy = *(long long *)&y & ~0x8000000000000000ull; + + /* handle special cases */ + if (iy == 0ll) + return (_SVID_libm_err(x, y, 27)); + + if (ix >= 0x7ff0000000000000ll || iy > 0x7ff0000000000000ll) + return ((x * y) * zero); + + if (ix <= iy) + return ((ix < iy)? x : x * zero); + + /* + * Set: + * ny = true exponent of y + * nd = true exponent of x minus true exponent of y + * ix = normalized significand of x + * iy = normalized significand of y + */ + ny = iy >> 52; + k = ix >> 52; + if (ny == 0) { + /* y is subnormal, x could be normal or subnormal */ + ny = 1; + while (iy < 0x0010000000000000ll) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < 0x0010000000000000ll) { + nd -= 1; + ix += ix; + } + } else { + ix = 0x0010000000000000ll | (ix & 0x000fffffffffffffll); + } + } else { + /* both x and y are normal */ + nd = k - ny; + ix = 0x0010000000000000ll | (ix & 0x000fffffffffffffll); + iy = 0x0010000000000000ll | (iy & 0x000fffffffffffffll); + } + + /* perform fixed point mod */ + while (nd--) { + iz = ix - iy; + if (iz >= 0) + ix = iz; + ix += ix; + } + iz = ix - iy; + if (iz >= 0) + ix = iz; + + /* convert back to floating point and restore the sign */ + if (ix == 0ll) + return (x * zero); + while (ix < 0x0010000000000000ll) { + ix += ix; + ny -= 1; + } + while (ix > 0x0020000000000000ll) { /* XXX can this ever happen? */ + ny += 1; + ix >>= 1; + } + if (ny <= 0) { + /* result is subnormal */ + k = -ny + 1; + ix >>= k; + *(long long *)&w = (hx & 0x8000000000000000ull) | ix; + return (w); + } + *(long long *)&w = (hx & 0x8000000000000000ull) | + ((long long)ny << 52) | (ix & 0x000fffffffffffffll); + return (w); +} diff --git a/usr/src/libm/src/C/gamma.c b/usr/src/libm/src/C/gamma.c new file mode 100644 index 0000000..b2a8b05 --- /dev/null +++ b/usr/src/libm/src/C/gamma.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gamma.c 1.7 06/01/23 SMI" + +#pragma weak gamma = __gamma + +#include "libm.h" + +extern int signgam; + +double +gamma(double x) { + double g; + + if (!finite(x)) + return (x * x); + + g = rint(x); + if (x == g && x <= 0.0) { + signgam = 1; + return (_SVID_libm_err(x, x, 41)); + } + + g = __k_lgamma(x, &signgam); + if (!finite(g)) + g = _SVID_libm_err(x, x, 40); + return (g); +} diff --git a/usr/src/libm/src/C/gamma_r.c b/usr/src/libm/src/C/gamma_r.c new file mode 100644 index 0000000..c648635 --- /dev/null +++ b/usr/src/libm/src/C/gamma_r.c @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gamma_r.c 1.6 06/01/23 SMI" + +#pragma weak gamma_r = __gamma_r + +#include "libm.h" + +double +gamma_r(double x, int *signgamp) { + return (lgamma_r(x, signgamp)); +} diff --git a/usr/src/libm/src/C/hypot.c b/usr/src/libm/src/C/hypot.c new file mode 100644 index 0000000..4f99512 --- /dev/null +++ b/usr/src/libm/src/C/hypot.c @@ -0,0 +1,211 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)hypot.c 1.31 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak hypot = __hypot +#endif + +/* INDENT OFF */ +/* + * Hypot(x, y) + * by K.C. Ng for SUN 4.0 libm, updated 3/11/2003. + * Method : + * A. When rounding is rounded-to-nearest: + * If z = x * x + y * y has error less than sqrt(2) / 2 ulp than + * sqrt(z) has error less than 1 ulp. + * So, compute sqrt(x*x+y*y) with some care as follows: + * Assume x > y > 0; + * 1. Check whether save and set rounding to round-to-nearest + * 2. if x > 2y use + * xh*xh+(y*y+((x-xh)*(x+xh))) for x*x+y*y + * where xh = x with lower 32 bits cleared; else + * 3. if x <= 2y use + * x2h*yh+((x-y)*(x-y)+(x2h*(y-yh)+(x2-x2h)*y)) + * where x2 = 2*x, x2h = 2x with lower 32 bits cleared, yh = y with + * lower 32 bits chopped. + * + * B. When rounding is not rounded-to-nearest: + * The following (magic) formula will yield an error less than 1 ulp. + * z = sqrt(x * x + y * y) + * hypot(x, y) = x + (y / ((x + z) / y)) + * + * NOTE: DO NOT remove parenthsis! + * + * Special cases: + * hypot(x, y) is INF if x or y is +INF or -INF; else + * hypot(x, y) is NAN if x or y is NAN. + * + * Accuracy: + * hypot(x, y) returns sqrt(x^2+y^2) with error less than 1 ulps + * (units in the last place) + */ + +#include "libm.h" + +static const double + zero = 0.0, + onep1u = 1.00000000000000022204e+00, /* 0x3ff00000 1 = 1+2**-52 */ + twom53 = 1.11022302462515654042e-16, /* 0x3ca00000 0 = 2**-53 */ + twom768 = 6.441148769597133308e-232, /* 2^-768 */ + two768 = 1.552518092300708935e+231; /* 2^768 */ + +/* INDENT ON */ + +double +hypot(double x, double y) { + double xh, yh, w, ax, ay; + int i, j, nx, ny, ix, iy, iscale = 0; + unsigned lx, ly; + + ix = ((int *) &x)[HIWORD] & ~0x80000000; + lx = ((int *) &x)[LOWORD]; + iy = ((int *) &y)[HIWORD] & ~0x80000000; + ly = ((int *) &y)[LOWORD]; +/* + * Force ax = |x| ~>~ ay = |y| + */ + if (iy > ix) { + ax = fabs(y); + ay = fabs(x); + i = ix; + ix = iy; + iy = i; + i = lx; + lx = ly; + ly = i; + } else { + ax = fabs(x); + ay = fabs(y); + } + nx = ix >> 20; + ny = iy >> 20; + j = nx - ny; +/* + * x >= 2^500 (x*x or y*y may overflow) + */ + if (nx >= 0x5f3) { + if (nx == 0x7ff) { /* inf or NaN, signal of sNaN */ + if (((ix - 0x7ff00000) | lx) == 0) + return (ax == ay ? ay : ax); + else if (((iy - 0x7ff00000) | ly) == 0) + return (ay == ax ? ax : ay); + else + return (ax * ay); /* + -> * for Cheetah */ + } else if (j > 32) { /* x >> y */ + if (j <= 53) + ay *= twom53; + ax += ay; + if (((int *) &ax)[HIWORD] == 0x7ff00000) + ax = _SVID_libm_err(x, y, 4); + return (ax); + } + ax *= twom768; + ay *= twom768; + iscale = 2; + ix -= 768 << 20; + iy -= 768 << 20; + } +/* + * y < 2^-450 (x*x or y*y may underflow) + */ + else if (ny < 0x23d) { + if ((ix | lx) == 0) + return (ay); + if ((iy | ly) == 0) + return (ax); + if (j > 53) /* x >> y */ + return (ax + ay); + iscale = 1; + ax *= two768; + ay *= two768; + if (nx == 0) { + if (ax == zero) /* guard subnormal flush to zero */ + return (ax); + ix = ((int *) &ax)[HIWORD]; + } else + ix += 768 << 20; + if (ny == 0) { + if (ay == zero) /* guard subnormal flush to zero */ + return (ax * twom768); + iy = ((int *) &ay)[HIWORD]; + } else + iy += 768 << 20; + j = (ix >> 20) - (iy >> 20); + if (j > 32) { /* x >> y */ + if (j <= 53) + ay *= twom53; + return ((ax + ay) * twom768); + } + } else if (j > 32) { /* x >> y */ + if (j <= 53) + ay *= twom53; + return (ax + ay); + } +/* + * Medium range ax and ay with max{|ax/ay|,|ay/ax|} bounded by 2^32 + * First check rounding mode by comparing onep1u*onep1u with onep1u+twom53. + * Make sure the computation is done at run-time. + */ + if (((lx | ly) << 5) == 0) { + ay = ay * ay; + ax += ay / (ax + sqrt(ax * ax + ay)); + } else + if (onep1u * onep1u != onep1u + twom53) { + /* round-to-zero, positive, negative mode */ + /* magic formula with less than an ulp error */ + w = sqrt(ax * ax + ay * ay); + ax += ay / ((ax + w) / ay); + } else { + /* round-to-nearest mode */ + w = ax - ay; + if (w > ay) { + ((int *) &xh)[HIWORD] = ix; + ((int *) &xh)[LOWORD] = 0; + ay = ay * ay + (ax - xh) * (ax + xh); + ax = sqrt(xh * xh + ay); + } else { + ax = ax + ax; + ((int *) &xh)[HIWORD] = ix + 0x00100000; + ((int *) &xh)[LOWORD] = 0; + ((int *) &yh)[HIWORD] = iy; + ((int *) &yh)[LOWORD] = 0; + ay = w * w + ((ax - xh) * yh + (ay - yh) * ax); + ax = sqrt(xh * yh + ay); + } + } + if (iscale > 0) { + if (iscale == 1) + ax *= twom768; + else { + ax *= two768; /* must generate side effect here */ + if (((int *) &ax)[HIWORD] == 0x7ff00000) + ax = _SVID_libm_err(x, y, 4); + } + } + return (ax); +} diff --git a/usr/src/libm/src/C/ilogb.c b/usr/src/libm/src/C/ilogb.c new file mode 100644 index 0000000..00818bc --- /dev/null +++ b/usr/src/libm/src/C/ilogb.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ilogb.c 1.34 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ilogb = __ilogb +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ + +#if defined(USE_FPSCALE) || defined(__i386) +static const double two52 = 4503599627370496.0; +#else +/* + * v: high part of a non-zero subnormal |x|; w: low part of |x| + */ +static int +ilogb_subnormal(unsigned v, unsigned w) { + int r = -1022 - 52; + + if (v) + r += 32; + else + v = w; + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +static int +raise_invalid(int v) { /* SUSv3 requires ilogb(0,+/-Inf,NaN) raise invalid */ +#ifndef lint + if ((__xpg6 & _C99SUSv3_ilogb_0InfNaN_raises_invalid) != 0) { + static const double zero = 0.0; + volatile double dummy; + + dummy = zero / zero; + } +#endif + return (v); +} + +int +ilogb(double x) { + int *px = (int *) &x, k = px[HIWORD] & ~0x80000000; + + if (k < 0x00100000) { + if ((px[LOWORD] | k) == 0) + return (raise_invalid(0x80000001)); + else { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two52; + return (((px[HIWORD] & 0x7ff00000) >> 20) - 1075); +#else + return (ilogb_subnormal(k, px[LOWORD])); +#endif + } + } else if (k < 0x7ff00000) + return ((k >> 20) - 1023); + else + return (raise_invalid(0x7fffffff)); +} diff --git a/usr/src/libm/src/C/isnan.c b/usr/src/libm/src/C/isnan.c new file mode 100644 index 0000000..3d3954d --- /dev/null +++ b/usr/src/libm/src/C/isnan.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)isnan.c 1.10 06/01/25 SMI" + +#pragma weak isnan = __isnan +#pragma weak _isnan = __isnan +#pragma weak _isnand = __isnan +#pragma weak isnand = __isnan + +#include "libm.h" + +/* + * The following implementation assumes fast 64-bit integer arith- + * metic. This is fine for sparc because we build libm in v8plus + * mode. It's also fine for sparcv9 and amd64. For x86, it would + * be better to use 32-bit code, but we have assembly for x86. + */ +int +__isnan(double x) { + long long llx; + + llx = *(long long *)&x & ~0x8000000000000000ull; + return ((unsigned long long)(0x7ff0000000000000ll - llx) >> 63); +} diff --git a/usr/src/libm/src/C/j0.c b/usr/src/libm/src/C/j0.c new file mode 100644 index 0000000..f1f34bf --- /dev/null +++ b/usr/src/libm/src/C/j0.c @@ -0,0 +1,311 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)j0.c 1.15 06/01/31 SMI" + +/* + * Floating point Bessel's function of the first and second kinds + * of order zero: j0(x),y0(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j0 = __j0 +#pragma weak y0 = __y0 + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include +#include + +#define GENERIC double +static const GENERIC +zero = 0.0, +small = 1.0e-5, +tiny = 1.0e-18, +one = 1.0, +eight = 8.0, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001, +tpi = 0.636619772367581343075535053490057448; + +static GENERIC pzero(GENERIC), qzero(GENERIC); +static const GENERIC r0[4] = { /* [1.e-5, 1.28] */ + -2.500000000000003622131880894830476755537e-0001, + 1.095597547334830263234433855932375353303e-0002, + -1.819734750463320921799187258987098087697e-0004, + 9.977001946806131657544212501069893930846e-0007, +}; +static const GENERIC s0[4] = { /* [1.e-5, 1.28] */ + 1.0, + 1.867609810662950169966782360588199673741e-0002, + 1.590389206181565490878430827706972074208e-0004, + 6.520867386742583632375520147714499522721e-0007, +}; +static const GENERIC r1[9] = { /* [1.28,8] */ + 9.999999999999999942156495584397047660949e-0001, + -2.389887722731319130476839836908143731281e-0001, + 1.293359476138939027791270393439493640570e-0002, + -2.770985642343140122168852400228563364082e-0004, + 2.905241575772067678086738389169625218912e-0006, + -1.636846356264052597969042009265043251279e-0008, + 5.072306160724884775085431059052611737827e-0011, + -8.187060730684066824228914775146536139112e-0014, + 5.422219326959949863954297860723723423842e-0017, +}; +static const GENERIC s1[9] = { /* [1.28,8] */ + 1.0, + 1.101122772686807702762104741932076228349e-0002, + 6.140169310641649223411427764669143978228e-0005, + 2.292035877515152097976946119293215705250e-0007, + 6.356910426504644334558832036362219583789e-0010, + 1.366626326900219555045096999553948891401e-0012, + 2.280399586866739522891837985560481180088e-0015, + 2.801559820648939665270492520004836611187e-0018, + 2.073101088320349159764410261466350732968e-0021, +}; + +GENERIC +j0(GENERIC x) { + GENERIC z, s,c,ss,cc,r,u,v,ox; + int i; + + if(isnan(x)) return x*x; /* + -> * for Cheetah */ + ox= x; + x = fabs(x); + if(x > 8.0){ + if(!finite(x)) return zero; + s = sin(x); + c = cos(x); + /* j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>8.9e307) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if(signbit(s)!=signbit(c)) { + ss = s - c; + cc = -cos(x+x)/ss; + } else { + cc = s + c; + ss = -cos(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if(x>1.0e40) z= (invsqrtpi*cc)/sqrt(x); + else { + u = pzero(x); v = qzero(x); + z = invsqrtpi*(u*cc-v*ss)/sqrt(x); + } + /* force to pass SVR4 even the result is wrong (sign) */ + if (x > X_TLOSS) + return _SVID_libm_err(ox,z,34); + else + return z; + } + if(x<=small) { + if(x<=tiny) return one-x; + else return one-x*x*0.25; + } + z = x*x; + if(x<=1.28) { + r = r0[0]+z*(r0[1]+z*(r0[2]+z*r0[3])); + s = s0[0]+z*(s0[1]+z*(s0[2]+z*s0[3])); + return one + z*(r/s); + } else { + for(r=r1[8],s=s1[8],i=7;i>=0;i--) { + r = r*z + r1[i]; + s = s*z + s1[i]; + } + return(r/s); + } +} + +static const GENERIC u0[13] = { + -7.380429510868722526754723020704317641941e-0002, + 1.772607102684869924301459663049874294814e-0001, + -1.524370666542713828604078090970799356306e-0002, + 4.650819100693891757143771557629924591915e-0004, + -7.125768872339528975036316108718239946022e-0006, + 6.411017001656104598327565004771515257146e-0008, + -3.694275157433032553021246812379258781665e-0010, + 1.434364544206266624252820889648445263842e-0012, + -3.852064731859936455895036286874139896861e-0015, + 7.182052899726138381739945881914874579696e-0018, + -9.060556574619677567323741194079797987200e-0021, + 7.124435467408860515265552217131230511455e-0024, + -2.709726774636397615328813121715432044771e-0027, +}; +static const GENERIC v0[5] = { + 1.0, + 4.678678931512549002587702477349214886475e-0003, + 9.486828955529948534822800829497565178985e-0006, + 1.001495929158861646659010844136682454906e-0008, + 4.725338116256021660204443235685358593611e-0012, +}; + +GENERIC +y0(GENERIC x) { + GENERIC z, /* d, */ s,c,ss,cc,u,v; + int i; + + if(isnan(x)) return x*x; /* + -> * for Cheetah */ + if(x <= zero){ + if(x==zero) + /* d= -one/(x-x); */ + return _SVID_libm_err(x,x,8); + else + /* d = zero/(x-x); */ + return _SVID_libm_err(x,x,9); + } + if(x > 8.0){ + if(!finite(x)) return zero; + s = sin(x); + c = cos(x); + /* j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>8.9e307) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if(signbit(s)!=signbit(c)) { + ss = s - c; + cc = -cos(x+x)/ss; + } else { + cc = s + c; + ss = -cos(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi*x) * (P(0,x)*cc - Q(0,x)*ss) + * y0(x) = 1/sqrt(pi*x) * (P(0,x)*ss + Q(0,x)*cc) + */ + if(x>1.0e40) + z = (invsqrtpi*ss)/sqrt(x); + else + z = invsqrtpi*(pzero(x)*ss+qzero(x)*cc)/sqrt(x); + if (x > X_TLOSS) + return _SVID_libm_err(x,z,35); + else + return z; + + } + if(x<=tiny) { + return(u0[0] + tpi*log(x)); + } + z = x*x; + for(u=u0[12],i=11;i>=0;i--) u = u*z + u0[i]; + v = v0[0]+z*(v0[1]+z*(v0[2]+z*(v0[3]+z*v0[4]))); + return(u/v + tpi*(j0(x)*log(x))); +} + +static const GENERIC pr[7] = { /* [8 -- inf] pzero 6550 */ + .4861344183386052721391238447e5, + .1377662549407112278133438945e6, + .1222466364088289731869114004e6, + .4107070084315176135583353374e5, + .5026073801860637125889039915e4, + .1783193659125479654541542419e3, + .88010344055383421691677564e0, +}; +static const GENERIC ps[7] = { /* [8 -- inf] pzero 6550 */ + .4861344183386052721414037058e5, + .1378196632630384670477582699e6, + .1223967185341006542748936787e6, + .4120150243795353639995862617e5, + .5068271181053546392490184353e4, + .1829817905472769960535671664e3, + 1.0, +}; +static const GENERIC huge = 1.0e10; + +static GENERIC +pzero(GENERIC x) { + GENERIC s,r,t,z; + int i; + if(x>huge) return one; + t = eight/x; z = t*t; + r = pr[5]+z*pr[6]; + s = ps[5]+z; + for(i=4;i>=0;i--) { + r = r*z + pr[i]; + s = s*z + ps[i]; + } + return r/s; +} + +static const GENERIC qr[7] = { /* [8 -- inf] qzero 6950 */ + -.1731210995701068539185611951e3, + -.5522559165936166961235240613e3, + -.5604935606637346590614529613e3, + -.2200430300226009379477365011e3, + -.323869355375648849771296746e2, + -.14294979207907956223499258e1, + -.834690374102384988158918e-2, +}; +static const GENERIC qs[7] = { /* [8 -- inf] qzero 6950 */ + .1107975037248683865326709645e5, + .3544581680627082674651471873e5, + .3619118937918394132179019059e5, + .1439895563565398007471485822e5, + .2190277023344363955930226234e4, + .106695157020407986137501682e3, + 1.0, +}; + +static GENERIC +qzero(GENERIC x) { + GENERIC s,r,t,z; + int i; + if(x>huge) return -0.125/x; + t = eight/x; z = t*t; + r = qr[5]+z*qr[6]; + s = qs[5]+z; + for(i=4;i>=0;i--) { + r = r*z + qr[i]; + s = s*z + qs[i]; + } + return t*(r/s); +} diff --git a/usr/src/libm/src/C/j1.c b/usr/src/libm/src/C/j1.c new file mode 100644 index 0000000..c88914f --- /dev/null +++ b/usr/src/libm/src/C/j1.c @@ -0,0 +1,329 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)j1.c 1.17 06/01/31 SMI" + +/* + * floating point Bessel's function of the first and second kinds + * of order zero: j1(x),y1(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j1 = __j1 +#pragma weak y1 = __y1 + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include +#include + +#define GENERIC double +static const GENERIC +zero = 0.0, +small = 1.0e-5, +tiny = 1.0e-20, +one = 1.0, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001, +tpi = 0.636619772367581343075535053490057448; + +static GENERIC pone(GENERIC), qone(GENERIC); +static const GENERIC r0[4] = { + -6.250000000000002203053200981413218949548e-0002, + 1.600998455640072901321605101981501263762e-0003, + -1.963888815948313758552511884390162864930e-0005, + 8.263917341093549759781339713418201620998e-0008, +}; +static const GENERIC s0[7] = { + 1.0e0, + 1.605069137643004242395356851797873766927e-0002, + 1.149454623251299996428500249509098499383e-0004, + 3.849701673735260970379681807910852327825e-0007, +}; +static const GENERIC r1[12] = { + 4.999999999999999995517408894340485471724e-0001, + -6.003825028120475684835384519945468075423e-0002, + 2.301719899263321828388344461995355419832e-0003, + -4.208494869238892934859525221654040304068e-0005, + 4.377745135188837783031540029700282443388e-0007, + -2.854106755678624335145364226735677754179e-0009, + 1.234002865443952024332943901323798413689e-0011, + -3.645498437039791058951273508838177134310e-0014, + 7.404320596071797459925377103787837414422e-0017, + -1.009457448277522275262808398517024439084e-0019, + 8.520158355824819796968771418801019930585e-0023, + -3.458159926081163274483854614601091361424e-0026, +}; +static const GENERIC s1[5] = { + 1.0e0, + 4.923499437590484879081138588998986303306e-0003, + 1.054389489212184156499666953501976688452e-0005, + 1.180768373106166527048240364872043816050e-0008, + 5.942665743476099355323245707680648588540e-0012, +}; + +GENERIC +j1(GENERIC x) { + GENERIC z, d, s,c,ss,cc,r; + int i, sgn; + + if(!finite(x)) return one/x; + sgn = signbit(x); + x = fabs(x); + if(x > 8.00){ + s = sin(x); + c = cos(x); + /* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>8.9e307) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if(signbit(s)!=signbit(c)) { + cc = s - c; + ss = cos(x+x)/cc; + } else { + ss = -s-c; + cc = cos(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if(x>1.0e40) + d = (invsqrtpi*cc)/sqrt(x); + else + d = invsqrtpi*(pone(x)*cc-qone(x)*ss)/sqrt(x); + if (x > X_TLOSS) { + if(sgn!=0) {d = -d; x = -x;} + return _SVID_libm_err(x,d,36); + } else + if(sgn==0) return d; else return -d; + } + if(x<=small) { + if(x<=tiny) d = 0.5*x; + else d = x*(0.5-x*x*0.125); + if(sgn==0) return d; else return -d; + } + z = x*x; + if(x<1.28) { + r = r0[3]; + s = s0[3]; + for(i=2;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + d = x*0.5+x*(z*(r/s)); + } else { + r = r1[11]; + for(i=10;i>=0;i--) r = r*z + r1[i]; + s = s1[0]+z*(s1[1]+z*(s1[2]+z*(s1[3]+z*s1[4]))); + d = x*(r/s); + } + if(sgn==0) return d; else return -d; +} + +static const GENERIC u0[4] = { + -1.960570906462389461018983259589655961560e-0001, + 4.931824118350661953459180060007970291139e-0002, + -1.626975871565393656845930125424683008677e-0003, + 1.359657517926394132692884168082224258360e-0005, +}; +static const GENERIC v0[5] = { + 1.0e0, + 2.565807214838390835108224713630901653793e-0002, + 3.374175208978404268650522752520906231508e-0004, + 2.840368571306070719539936935220728843177e-0006, + 1.396387402048998277638900944415752207592e-0008, +}; +static const GENERIC u1[12] = { + -1.960570906462389473336339614647555351626e-0001, + 5.336268030335074494231369159933012844735e-0002, + -2.684137504382748094149184541866332033280e-0003, + 5.737671618979185736981543498580051903060e-0005, + -6.642696350686335339171171785557663224892e-0007, + 4.692417922568160354012347591960362101664e-0009, + -2.161728635907789319335231338621412258355e-0011, + 6.727353419738316107197644431844194668702e-0014, + -1.427502986803861372125234355906790573422e-0016, + 2.020392498726806769468143219616642940371e-0019, + -1.761371948595104156753045457888272716340e-0022, + 7.352828391941157905175042420249225115816e-0026, +}; +static const GENERIC v1[5] = { + 1.0e0, + 5.029187436727947764916247076102283399442e-0003, + 1.102693095808242775074856548927801750627e-0005, + 1.268035774543174837829534603830227216291e-0008, + 6.579416271766610825192542295821308730206e-0012, +}; + + +GENERIC +y1(GENERIC x) { + GENERIC z, d, s,c,ss,cc,u,v; + int i; + + if(isnan(x)) return x*x; /* + -> * for Cheetah */ + if(x <= zero){ + if(x==zero) + /* return -one/zero; */ + return _SVID_libm_err(x,x,10); + else + /* return zero/zero; */ + return _SVID_libm_err(x,x,11); + } + if(x > 8.0){ + if(!finite(x)) return zero; + s = sin(x); + c = cos(x); + /* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>8.9e307) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if(signbit(s)!=signbit(c)) { + cc = s - c; + ss = cos(x+x)/cc; + } else { + ss = -s-c; + cc = cos(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if(x>1.0e91) + d = (invsqrtpi*ss)/sqrt(x); + else + d = invsqrtpi*(pone(x)*ss+qone(x)*cc)/sqrt(x); + if (x > X_TLOSS) + return _SVID_libm_err(x,d,37); + else + return d; + } + if(x<=tiny) { + return(-tpi/x); + } + z = x*x; + if(x<1.28) { + u = u0[3]; v = v0[3]+z*v0[4]; + for(i=2;i>=0;i--){ + u = u*z + u0[i]; + v = v*z + v0[i]; + } + } else { + for (u = u1[11], i=10;i>=0;i--) u = u*z+u1[i]; + v = v1[0]+z*(v1[1]+z*(v1[2]+z*(v1[3]+z*v1[4]))); + } + return(x*(u/v) + tpi*(j1(x)*log(x)-one/x)); +} + +static const GENERIC pr0[6] = { + -.4435757816794127857114720794e7, + -.9942246505077641195658377899e7, + -.6603373248364939109255245434e7, + -.1523529351181137383255105722e7, + -.1098240554345934672737413139e6, + -.1611616644324610116477412898e4, +}; +static const GENERIC ps0[6] = { + -.4435757816794127856828016962e7, + -.9934124389934585658967556309e7, + -.6585339479723087072826915069e7, + -.1511809506634160881644546358e7, + -.1072638599110382011903063867e6, + -.1455009440190496182453565068e4, +}; +static const GENERIC huge = 1.0e10; + +static GENERIC +pone(GENERIC x) { + GENERIC s,r,t,z; + int i; + /* assume x > 8 */ + if(x>huge) return one; + t = 8.0/x; z = t*t; + r = pr0[5]; s = ps0[5]+z; + for(i=4;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + return r/s; +} + + +static const GENERIC qr0[6] = { + 0.3322091340985722351859704442e5, + 0.8514516067533570196555001171e5, + 0.6617883658127083517939992166e5, + 0.1849426287322386679652009819e5, + 0.1706375429020768002061283546e4, + 0.3526513384663603218592175580e2, +}; +static const GENERIC qs0[6] = { + 0.7087128194102874357377502472e6, + 0.1819458042243997298924553839e7, + 0.1419460669603720892855755253e7, + 0.4002944358226697511708610813e6, + 0.3789022974577220264142952256e5, + 0.8638367769604990967475517183e3, +}; + +static GENERIC +qone(GENERIC x) { + GENERIC s,r,t,z; + int i; + if(x>huge) return 0.375/x; + t = 8.0/x; z = t*t; + /* assume x > 8 */ + r = qr0[5]; s = qs0[5]+z; + for(i=4;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + return t*(r/s); +} diff --git a/usr/src/libm/src/C/jn.c b/usr/src/libm/src/C/jn.c new file mode 100644 index 0000000..0afa31f --- /dev/null +++ b/usr/src/libm/src/C/jn.c @@ -0,0 +1,279 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)jn.c 1.23 06/01/31 SMI" + +#pragma weak jn = __jn +#pragma weak yn = __yn + +/* + * floating point Bessel's function of the 1st and 2nd kind + * of order n: jn(n,x),yn(n,x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for nx, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include "libm.h" +#include /* DBL_MIN */ +#include /* X_TLOSS */ +#include "xpg6.h" /* __xpg6 */ + +#define GENERIC double + +static const GENERIC + invsqrtpi = 5.641895835477562869480794515607725858441e-0001, + two = 2.0, + zero = 0.0, + one = 1.0; + +GENERIC +jn(int n, GENERIC x) { + int i, sgn; + GENERIC a, b, temp; + GENERIC z, w, ox, on; + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + ox = x; on = (GENERIC)n; + if(n<0){ + n = -n; + x = -x; + } + if(isnan(x)) return x*x; /* + -> * for Cheetah */ + if (!((int) _lib_version == libm_ieee || + (__xpg6 & _C99SUSv3_math_errexcept) != 0)) { + if(fabs(x) > X_TLOSS) return _SVID_libm_err(on,ox,38); + } + if(n==0) return(j0(x)); + if(n==1) return(j1(x)); + if((n&1)==0) + sgn=0; /* even n */ + else + sgn = signbit(x); /* old n */ + x = fabs(x); + if(x == zero||!finite(x)) b = zero; + else if((GENERIC)n<=x) { /* Safe to use + J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + */ + if(x>1.0e91) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s=sin(x), c=cos(x), + xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch(n&3) { + case 0: temp = cos(x)+sin(x); break; + case 1: temp = -cos(x)+sin(x); break; + case 2: temp = -cos(x)-sin(x); break; + case 3: temp = cos(x)-sin(x); break; + } + b = invsqrtpi*temp/sqrt(x); + } else { + a = j0(x); + b = j1(x); + for(i=1;i 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quaduple + */ + /* determin k */ + GENERIC t,v; + double q0,q1,h,tmp; int k,m; + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; z = w+h; q1 = w*z - 1.0; k=1; + while(q1<1.0e9) { + k += 1; z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for(t=zero, i = 2*(n+k); i>=m; i -= 2) t = one/(i/x-t); + a = t; + b = one; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + hence, if n*(log(2n/x)) > ... + single 8.8722839355e+01 + double 7.09782712893383973096e+02 + long double 1.1356523406294143949491931077970765006170e+04 + then recurrent value may overflow and the result is + likely underflow to zero + */ + tmp = n; + v = two/x; + tmp = tmp*log(fabs(v*tmp)); + if(tmp<7.09782712893383973096e+02) { + for(i=n-1;i>0;i--){ + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + } + } else { + for(i=n-1;i>0;i--){ + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + if(b>1e100) { + a /= b; + t /= b; + b = 1.0; + } + } + } + b = (t*j0(x)/b); + } + } + if(sgn==1) return -b; else return b; +} + +GENERIC +yn(int n, GENERIC x) { + int i; + int sign; + GENERIC a, b, temp, ox, on; + + ox = x; on = (GENERIC)n; + if(isnan(x)) return x*x; /* + -> * for Cheetah */ + if (x <= zero) + if(x==zero) + /* return -one/zero; */ + return _SVID_libm_err((GENERIC)n,x,12); + else + /* return zero/zero; */ + return _SVID_libm_err((GENERIC)n,x,13); + if (!((int) _lib_version == libm_ieee || + (__xpg6 & _C99SUSv3_math_errexcept) != 0)) { + if(x > X_TLOSS) return _SVID_libm_err(on,ox,39); + } + sign = 1; + if(n<0){ + n = -n; + if((n&1) == 1) sign = -1; + } + if(n==0) return(y0(x)); + if(n==1) return(sign*y1(x)); + if(!finite(x)) return zero; + + if(x>1.0e91) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s=sin(x), c=cos(x), + xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch(n&3) { + case 0: temp = sin(x)-cos(x); break; + case 1: temp = -sin(x)-cos(x); break; + case 2: temp = -sin(x)+cos(x); break; + case 3: temp = sin(x)+cos(x); break; + } + b = invsqrtpi*temp/sqrt(x); + } else { + a = y0(x); + b = y1(x); + /* + * fix 1262058 and take care of non-default rounding + */ + for (i = 1; i < n; i++) { + temp = b; + b *= (GENERIC) (i + i) / x; + if (b <= -DBL_MAX) + break; + b -= a; + a = temp; + } + } + if(sign>0) return b; else return -b; +} diff --git a/usr/src/libm/src/C/lgamma.c b/usr/src/libm/src/C/lgamma.c new file mode 100644 index 0000000..9e61d33 --- /dev/null +++ b/usr/src/libm/src/C/lgamma.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgamma.c 1.25 06/01/23 SMI" + +#pragma weak lgamma = __lgamma + +#include "libm.h" + +extern int signgam; + +double +lgamma(double x) { + double g; + + if (!finite(x)) + return (x * x); + + g = rint(x); + if (x == g && x <= 0.0) { + signgam = 1; + return (_SVID_libm_err(x, x, 15)); + } + + g = __k_lgamma(x, &signgam); + if (!finite(g)) + g = _SVID_libm_err(x, x, 14); + return (g); +} diff --git a/usr/src/libm/src/C/lgamma_r.c b/usr/src/libm/src/C/lgamma_r.c new file mode 100644 index 0000000..c6e774f --- /dev/null +++ b/usr/src/libm/src/C/lgamma_r.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgamma_r.c 1.6 06/01/23 SMI" + +#pragma weak lgamma_r = __lgamma_r + +#include "libm.h" + +double +lgamma_r(double x, int *signgamp) { + double g; + + if (isnan(x)) + return (x * x); + + g = rint(x); + if (x == g && x <= 0.0) { + *signgamp = 1; + return (_SVID_libm_err(x, x, 15)); + } + + g = __k_lgamma(x, signgamp); + if (!finite(g)) + g = _SVID_libm_err(x, x, 14); + return (g); +} diff --git a/usr/src/libm/src/C/libm.h b/usr/src/libm/src/C/libm.h new file mode 100644 index 0000000..18e438c --- /dev/null +++ b/usr/src/libm/src/C/libm.h @@ -0,0 +1,203 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_H +#define _LIBM_H + +#pragma ident "@(#)libm.h 1.54 06/01/23 SMI" + +#ifdef _ASM +/* BEGIN CSTYLED */ + +/* + * Disable amd64 assembly code profiling for now. + */ +#if defined(__amd64) +#undef PROF +#endif + +#include + +#define NAME(x) x +#define TEXT .section ".text" +#define DATA .section ".data" +#define RO_DATA .section ".rodata" +#define IDENT(x) .ident x + +#if defined(__sparc) + +#define LIBM_ANSI_PRAGMA_WEAK(sym,stype) \ + .weak sym; \ + .type sym,#stype; \ +sym = __/**/sym + +#ifndef SET_FILE +#define SET_FILE(x) \ + .file x +#endif /* !defined(SET_FILE) */ + +#ifdef PIC +/* + * One should *never* pass o7 to PIC_SETUP. + */ +#define PIC_SETUP(via) \ +9: call 8f; \ + sethi %hi(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ +8: or %via,%lo(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ + add %via,%o7,%via +/* + * Must save/restore %o7 in leaf routines; may *not* use jmpl! + */ +#define PIC_LEAF_SETUP(via) \ + or %g0,%o7,%g1; \ +9: call 8f; \ + sethi %hi(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ +8: or %via,%lo(NAME(_GLOBAL_OFFSET_TABLE_)-(9b-.)),%via; \ + add %via,%o7,%via; \ + or %g0,%g1,%o7 +#ifdef __sparcv9 +#define PIC_SET(via,sym,dst) ldx [%via+sym],%dst +#else /* defined(__sparcv9) */ +#define PIC_SET(via,sym,dst) ld [%via+sym],%dst +#endif /* defined(__sparcv9) */ +#else /* defined(PIC) */ +#define PIC_SETUP(via) +#define PIC_LEAF_SETUP(via) +#ifdef __sparcv9 +/* + * g1 is used as scratch register in V9 mode + */ +#define PIC_SET(via,sym,dst) setx sym,%g1,%dst +#else /* defined(__sparcv9) */ +#define PIC_SET(via,sym,dst) set sym,%dst +#endif /* defined(__sparcv9) */ +#endif /* defined(PIC) */ + +/* + * Workaround for 4337025: MCOUNT in asm_linkage.h does not support __sparcv9 + */ +#if defined(PROF) && defined(__sparcv9) + +#undef MCOUNT_SIZE +#undef MCOUNT + +#if !defined(PIC) +#define MCOUNT_SIZE (9*4) /* 9 instructions */ +#define MCOUNT(x) \ + save %sp, -SA(MINFRAME), %sp; \ + sethi %hh(.L_/**/x/**/1), %o0; \ + sethi %lm(.L_/**/x/**/1), %o1; \ + or %o0, %hm(.L_/**/x/**/1), %o0; \ + or %o1, %lo(.L_/**/x/**/1), %o1; \ + sllx %o0, 32, %o0; \ + call _mcount; \ + or %o0, %o1, %o0; \ + restore; \ + .common .L_/**/x/**/1, 8, 8 +#elif defined(PIC32) +#define MCOUNT_SIZE (10*4) /* 10 instructions */ +#define MCOUNT(x) \ + save %sp,-SA(MINFRAME),%sp; \ +1: call .+8; \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + sethi %hi(.L_/**/x/**/1),%o1; \ + add %o0,%lo(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + add %o1,%lo(.L_/**/x/**/1),%o1; \ + add %o0,%o7,%o0; \ + call _mcount; \ + ldx [%o0+%o1],%o0; \ + restore; \ + .common .L_/**/x/**/1,8,8 +#else /* PIC13 */ +#define MCOUNT_SIZE (8*4) /* 8 instructions */ +#define MCOUNT(x) \ + save %sp,-SA(MINFRAME),%sp; \ +1: call .+8; \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + add %o0,%lo(_GLOBAL_OFFSET_TABLE_-(1b-.)),%o0; \ + add %o0,%o7,%o0; \ + call _mcount; \ + ldx [%o0+%lo(.L_/**/x/**/1)],%o0; \ + restore; \ + .common .L_/**/x/**/1,8,8 +#endif /* !defined(PIC) */ +#endif /* defined(PROF) && defined(__sparcv9) */ + +#elif defined(__i386) || defined(__amd64) + +#define LIBM_ANSI_PRAGMA_WEAK(sym,stype) \ + .weak sym; \ + .type sym,@stype; \ +sym = __/**/sym + +#ifdef PIC +#if defined(__amd64) +#define PIC_SETUP(x) +#define PIC_WRAPUP +#define PIC_F(x) x@PLT +#define PIC_G(x) x@GOTPCREL(%rip) +#define PIC_L(x) x(%rip) +#define PIC_G_LOAD(insn,sym,dst) \ + movq PIC_G(sym),%dst; \ + insn (%dst),%dst +#else +#define PIC_SETUP(label) \ + pushl %ebx; \ + call .label; \ +.label: popl %ebx; \ + addl $_GLOBAL_OFFSET_TABLE_+[.-.label],%ebx +#define PIC_WRAPUP popl %ebx +#define PIC_F(x) x@PLT +#define PIC_G(x) x@GOT(%ebx) +#define PIC_L(x) x@GOTOFF(%ebx) +#define PIC_G_LOAD(insn,sym,dst) \ + mov PIC_G(sym),%dst; \ + insn (%dst),%dst +#endif +#else /* defined(PIC) */ +#define PIC_SETUP(x) +#define PIC_WRAPUP +#define PIC_F(x) x +#define PIC_G(x) x +#define PIC_L(x) x +#define PIC_G_LOAD(insn,sym,dst) insn sym,%dst +#endif /* defined(PIC) */ + +#else +#error Unknown architecture +#endif + +/* END CSTYLED */ +#else /* defined(_ASM) */ + +#include "libm_macros.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include +#include + +#endif /* defined(_ASM) */ + +#endif /* defined(_LIBM_H) */ diff --git a/usr/src/libm/src/C/libm_macros.h b/usr/src/libm/src/C/libm_macros.h new file mode 100644 index 0000000..1061521 --- /dev/null +++ b/usr/src/libm/src/C/libm_macros.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_MACROS_H +#define _LIBM_MACROS_H + +#pragma ident "@(#)libm_macros.h 1.4 06/01/23 SMI" + +#if defined(__sparc) + +#define HIWORD 0 +#define LOWORD 1 +#define HIXWORD 0 /* index of int containing exponent */ +#define XSGNMSK 0x80000000 /* exponent bit mask within the int */ +#define XBIASED_EXP(x) ((((int *)&x)[HIXWORD] & ~0x80000000) >> 16) +#define ISZEROL(x) (((((int *)&x)[0] & ~XSGNMSK) | ((int *)&x)[1] | \ + ((int *)&x)[2] | ((int *)&x)[3]) == 0) + +#elif defined(__i386) || defined(__amd64) + +#define HIWORD 1 +#define LOWORD 0 +#define HIXWORD 2 +#define XSGNMSK 0x8000 +#define XBIASED_EXP(x) (((int *)&x)[HIXWORD] & 0x7fff) +#define ISZEROL(x) (x == 0.0L) + +#define HANDLE_UNSUPPORTED + +/* + * "convert" the high-order 32 bits of a SPARC quad precision + * value ("I") to the sign, exponent, and high-order bits of an + * x86 extended double precision value ("E"); the low-order bits + * in the 12-byte quantity are left intact + */ +#define ITOX(I, E) \ + E[2] = 0xffff & ((I) >> 16); \ + E[1] = (((I) & 0x7fff0000) == 0)? \ + (E[1] & 0x7fff) | (0x7fff8000 & ((I) << 15)) :\ + 0x80000000 | (E[1] & 0x7fff) | (0x7fff8000 & ((I) << 15)) + +/* + * "convert" the sign, exponent, and high-order bits of an x86 + * extended double precision value ("E") to the high-order 32 bits + * of a SPARC quad precision value ("I") + */ +#define XTOI(E, I) \ + I = ((E[2]<<16) | (0xffff & (E[1]>>15))) + +#else +#error Unknown architecture +#endif + +#endif /* !defined(_LIBM_MACROS_H) */ diff --git a/usr/src/libm/src/C/libm_protos.h b/usr/src/libm/src/C/libm_protos.h new file mode 100644 index 0000000..4c77e19 --- /dev/null +++ b/usr/src/libm/src/C/libm_protos.h @@ -0,0 +1,217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)libm_protos.h 1.41 06/01/23 SMI" + +#ifndef _LIBM_PROTOS_H +#define _LIBM_PROTOS_H + +#ifdef LIBMOPT_BUILD +#define _TBL_cos __libmopt_TBL_cos +#define _TBL_exp2_512 __libmopt_TBL_exp2_512 +#define _TBL_ipio2_inf __libmopt_TBL_ipio2_inf +#define _TBL_jlog_n1 __libmopt_TBL_jlog_n1 +#define _TBL_jlog_n2 __libmopt_TBL_jlog_n2 +#define _TBL_jlog_p1 __libmopt_TBL_jlog_p1 +#define _TBL_jlog_p2 __libmopt_TBL_jlog_p2 +#define _TBL_log10 __libmopt_TBL_log10 +#define _TBL_log2_14 __libmopt_TBL_log2_14 +#define _TBL_log2_9 __libmopt_TBL_log2_9 +#define _TBL_sin __libmopt_TBL_sin +#define _TBL_sincosx __libmopt_TBL_sincosx +#define _TBL_xexp __libmopt_TBL_xexp +#define _TBL_xlog __libmopt_TBL_xlog +#define __k_cos_ __libmopt__k_cos_ +#define __k_sin_ __libmopt__k_sin_ +#define __k_sincos_ __libmopt__k_sincos_ +#define __reduction __libmopt__reduction +#define __rem_pio2 __libmopt__rem_pio2 +#define __rem_pio2m __libmopt__rem_pio2m +#else /* defined(LIBMOPT_BUILD) */ +#ifdef LIBM_BUILD +#define _SVID_libm_err __libm_SVID_libm_err /* not used by -lsunmath */ +#define _TBL_atan __libm_TBL_atan +#define _TBL_atan1 __libm_TBL_atan1 +#define _TBL_atan_hi __libm_TBL_atan_hi /* not used by -lsunmath */ +#define _TBL_atan_lo __libm_TBL_atan_lo /* not used by -lsunmath */ +#define _TBL_exp2_hi __libm_TBL_exp2_hi /* not used by -lsunmath */ +#define _TBL_exp2_lo __libm_TBL_exp2_lo /* not used by -lsunmath */ +#define _TBL_ipio2_inf __libm_TBL_ipio2_inf +#define _TBL_log __libm_TBL_log +#define _TBL_log2_hi __libm_TBL_log2_hi /* not used by -lsunmath */ +#define _TBL_log2_lo __libm_TBL_log2_lo /* not used by -lsunmath */ +#define _TBL_log_hi __libm_TBL_log_hi /* not used by -lsunmath */ +#define _TBL_log_lo __libm_TBL_log_lo /* not used by -lsunmath */ +#define _TBL_sincos __libm_TBL_sincos +#define _TBL_sincosx __libm_TBL_sincosx +#define _TBL_tan_hi __libm_TBL_tan_hi /* not used by -lsunmath */ +#define _TBL_tan_lo __libm_TBL_tan_lo /* not used by -lsunmath */ +#define __k_cexp __libm__k_cexp /* C99 libm */ +#define __k_cexpl __libm__k_cexpl /* C99 libm */ +#define __k_clog_r __libm__k_clog_r /* C99 libm */ +#define __k_clog_rl __libm__k_clog_rl /* C99 libm */ +#define __k_atan2 __libm__k_atan2 /* C99 libm */ +#define __k_atan2l __libm__k_atan2l /* C99 libm */ +#define __k_cos __libm__k_cos +#define __k_lgamma __libm__k_lgamma +#define __k_sin __libm__k_sin +#define __k_sincos __libm__k_sincos +#define __k_tan __libm__k_tan +#define __reduction __libm__reduction /* i386 only */ +#define __rem_pio2 __libm__rem_pio2 +#define __rem_pio2m __libm__rem_pio2m +#define __k_cosf __libm__k_cosf /* C99 libm */ +#define __k_cosl __libm__k_cosl /* C99 libm */ +#define __k_lgammal __libm__k_lgammal /* C99 libm */ +#define __k_sincosf __libm__k_sincosf /* C99 libm */ +#define __k_sincosl __libm__k_sincosl /* C99 libm */ +#define __k_sinf __libm__k_sinf /* C99 libm */ +#define __k_sinl __libm__k_sinl /* C99 libm */ +#define __k_tanf __libm__k_tanf /* C99 libm */ +#define __k_tanl __libm__k_tanl /* C99 libm */ +#define __poly_libmq __libm__poly_libmq /* C99 libm */ +#define __rem_pio2l __libm__rem_pio2l /* C99 libm */ +#define _TBL_atanl_hi __libm_TBL_atanl_hi /* C99 libm */ +#define _TBL_atanl_lo __libm_TBL_atanl_lo /* C99 libm */ +#define _TBL_cosl_hi __libm_TBL_cosl_hi /* C99 libm */ +#define _TBL_cosl_lo __libm_TBL_cosl_lo /* C99 libm */ +#define _TBL_expl_hi __libm_TBL_expl_hi /* C99 libm */ +#define _TBL_expl_lo __libm_TBL_expl_lo /* C99 libm */ +#define _TBL_expm1l __libm_TBL_expm1l /* C99 libm */ +#define _TBL_expm1lx __libm_TBL_expm1lx /* C99 libm */ +#define _TBL_ipio2l_inf __libm_TBL_ipio2l_inf /* C99 libm */ +#define _TBL_logl_hi __libm_TBL_logl_hi /* C99 libm */ +#define _TBL_logl_lo __libm_TBL_logl_lo /* C99 libm */ +#define _TBL_r_atan_hi __libm_TBL_r_atan_hi /* C99 libm */ +#define _TBL_r_atan_lo __libm_TBL_r_atan_lo /* C99 libm */ +#define _TBL_sinl_hi __libm_TBL_sinl_hi /* C99 libm */ +#define _TBL_sinl_lo __libm_TBL_sinl_lo /* C99 libm */ +#define _TBL_tanl_hi __libm_TBL_tanl_hi /* C99 libm */ +#define _TBL_tanl_lo __libm_TBL_tanl_lo /* C99 libm */ +#endif /* defined(LIBM_BUILD) */ +#endif /* defined(LIBMOPT_BUILD) */ + +#ifndef _ASM +#ifdef __STDC__ +#define __P(p) p +#else +#define __P(p) () +#endif + +#include + +extern double _SVID_libm_err __P((double, double, int)); +extern double __k_cos __P((double, double)); +extern double __k_cos_ __P((double *)); +extern double __k_lgamma __P((double, int *)); +extern double __k_sin __P((double, double)); +extern double __k_sin_ __P((double *)); +extern double __k_sincos __P((double, double, double *)); +extern double __k_sincos_ __P((double *, double *)); +extern double __k_tan __P((double, double, int)); +extern double __k_cexp __P((double, int *)); +extern long double __k_cexpl __P((long double, int *)); +extern double __k_clog_r __P((double, double, double *)); +extern long double __k_clog_rl __P((long double, long double, long double *)); +extern double __k_atan2 __P((double, double, double *)); +extern long double __k_atan2l __P((long double, long double, long double *)); +extern int __rem_pio2 __P((double, double *)); +extern int __rem_pio2m __P((double *, double *, int, int, int, const int *)); + +/* + * entry points that are in-lined + */ +extern double copysign __P((double, double)); +extern int finite __P((double)); +extern enum fp_class_type fp_class __P((double)); +extern double infinity __P((void)); +extern int isinf __P((double)); +extern int signbit __P((double)); + +/* + * new C99 entry points + */ +extern double fdim __P((double, double)); +extern double fma __P((double, double, double)); +extern double fmax __P((double, double)); +extern double fmin __P((double, double)); +extern double frexp __P((double, int *)); +extern double ldexp __P((double, int)); +extern double modf __P((double, double *)); +extern double nan __P((const char *)); +extern double nearbyint __P((double)); +extern double nexttoward __P((double, long double)); +extern double remquo __P((double, double, int *)); +extern double round __P((double)); +extern double scalbln __P((double, long int)); +extern double tgamma __P((double)); +extern double trunc __P((double)); +extern float fdimf __P((float, float)); +extern float fmaf __P((float, float, float)); +extern float fmaxf __P((float, float)); +extern float fminf __P((float, float)); +extern float frexpf __P((float, int *)); +extern float ldexpf __P((float, int)); +extern float modff __P((float, float *)); +extern float nanf __P((const char *)); +extern float nearbyintf __P((float)); +extern float nextafterf __P((float, float)); +extern float nexttowardf __P((float, long double)); +extern float remquof __P((float, float, int *)); +extern float roundf __P((float)); +extern float scalblnf __P((float, long int)); +extern float tgammaf __P((float)); +extern float truncf __P((float)); +extern long double frexpl(long double, int *); +extern long double fdiml __P((long double, long double)); +extern long double fmal __P((long double, long double, long double)); +extern long double fmaxl __P((long double, long double)); +extern long double fminl __P((long double, long double)); +extern long double ldexpl __P((long double, int)); +extern long double modfl __P((long double, long double *)); +extern long double nanl __P((const char *)); +extern long double nearbyintl __P((long double)); +extern long double nextafterl __P((long double, long double)); +extern long double nexttowardl __P((long double, long double)); +extern long double remquol __P((long double, long double, int *)); +extern long double roundl __P((long double)); +extern long double scalblnl __P((long double, long int)); +extern long double tgammal __P((long double)); +extern long double truncl __P((long double)); +extern long int lrint __P((double)); +extern long int lrintf __P((float)); +extern long int lrintl __P((long double)); +extern long int lround __P((double)); +extern long int lroundf __P((float)); +extern long int lroundl __P((long double)); +extern long long int llrint __P((double)); +extern long long int llrintf __P((float)); +extern long long int llrintl __P((long double)); +extern long long int llround __P((double)); +extern long long int llroundf __P((float)); +extern long long int llroundl __P((long double)); +#endif /* !defined(_ASM) */ + +#endif /* !defined(_LIBM_PROTOS_H) */ diff --git a/usr/src/libm/src/C/libm_synonyms.h b/usr/src/libm/src/C/libm_synonyms.h new file mode 100644 index 0000000..2687990 --- /dev/null +++ b/usr/src/libm/src/C/libm_synonyms.h @@ -0,0 +1,748 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_SYNONYMS_H +#define _LIBM_SYNONYMS_H + +#pragma ident "@(#)libm_synonyms.h 1.37 06/01/31 SMI" + +#if defined(ELFOBJ) && !defined(lint) + +#define cabs __cabs /* C99 */ +#define cabsf __cabsf /* C99 */ +#define cabsl __cabsl /* C99 */ +#define cacos __cacos /* C99 */ +#define cacosf __cacosf /* C99 */ +#define cacosl __cacosl /* C99 */ +#define cacosh __cacosh /* C99 */ +#define cacoshf __cacoshf /* C99 */ +#define cacoshl __cacoshl /* C99 */ +#define carg __carg /* C99 */ +#define cargf __cargf /* C99 */ +#define cargl __cargl /* C99 */ +#define casin __casin /* C99 */ +#define casinf __casinf /* C99 */ +#define casinl __casinl /* C99 */ +#define casinh __casinh /* C99 */ +#define casinhf __casinhf /* C99 */ +#define casinhl __casinhl /* C99 */ +#define catan __catan /* C99 */ +#define catanf __catanf /* C99 */ +#define catanl __catanl /* C99 */ +#define catanh __catanh /* C99 */ +#define catanhf __catanhf /* C99 */ +#define catanhl __catanhl /* C99 */ +#define ccos __ccos /* C99 */ +#define ccosf __ccosf /* C99 */ +#define ccosl __ccosl /* C99 */ +#define ccosh __ccosh /* C99 */ +#define ccoshf __ccoshf /* C99 */ +#define ccoshl __ccoshl /* C99 */ +#define cexp __cexp /* C99 */ +#define cexpf __cexpf /* C99 */ +#define cexpl __cexpl /* C99 */ +#define cimag __cimag /* C99 */ +#define cimagf __cimagf /* C99 */ +#define cimagl __cimagl /* C99 */ +#define clog __clog /* C99 */ +#define clogf __clogf /* C99 */ +#define clogl __clogl /* C99 */ +#define conj __conj /* C99 */ +#define conjf __conjf /* C99 */ +#define conjl __conjl /* C99 */ +#define cpow __cpow /* C99 */ +#define cpowf __cpowf /* C99 */ +#define cpowl __cpowl /* C99 */ +#define cproj __cproj /* C99 */ +#define cprojf __cprojf /* C99 */ +#define cprojl __cprojl /* C99 */ +#define creal __creal /* C99 */ +#define crealf __crealf /* C99 */ +#define creall __creall /* C99 */ +#define csin __csin /* C99 */ +#define csinf __csinf /* C99 */ +#define csinl __csinl /* C99 */ +#define csinh __csinh /* C99 */ +#define csinhf __csinhf /* C99 */ +#define csinhl __csinhl /* C99 */ +#define csqrt __csqrt /* C99 */ +#define csqrtf __csqrtf /* C99 */ +#define csqrtl __csqrtl /* C99 */ +#define ctan __ctan /* C99 */ +#define ctanf __ctanf /* C99 */ +#define ctanl __ctanl /* C99 */ +#define ctanh __ctanh /* C99 */ +#define ctanhf __ctanhf /* C99 */ +#define ctanhl __ctanhl /* C99 */ +#define abrupt_underflow_ __abrupt_underflow_ +#define acos __acos +#define acosd __acosd +#define acosdf __acosdf +#define acosdl __acosdl +#define acosf __acosf +#define acosh __acosh +#define acoshf __acoshf +#define acoshl __acoshl +#define acosl __acosl +#define acosp __acosp +#define acospf __acospf +#define acospi __acospi +#define acospif __acospif +#define acospil __acospil +#define acospl __acospl +#define aint __aint +#define aintf __aintf +#define aintl __aintl +#define anint __anint +#define anintf __anintf +#define anintl __anintl +#define annuity __annuity +#define annuityf __annuityf +#define annuityl __annuityl +#define asin __asin +#define asind __asind +#define asindf __asindf +#define asindl __asindl +#define asinf __asinf +#define asinh __asinh +#define asinhf __asinhf +#define asinhl __asinhl +#define asinl __asinl +#define asinp __asinp +#define asinpf __asinpf +#define asinpi __asinpi +#define asinpif __asinpif +#define asinpil __asinpil +#define asinpl __asinpl +#define atan __atan +#define atan2 __atan2 +#define atan2d __atan2d +#define atan2df __atan2df +#define atan2dl __atan2dl +#define atan2f __atan2f +#define atan2l __atan2l +#define atan2pi __atan2pi +#define atan2pif __atan2pif +#define atan2pil __atan2pil +#define atand __atand +#define atandf __atandf +#define atandl __atandl +#define atanf __atanf +#define atanh __atanh +#define atanhf __atanhf +#define atanhl __atanhl +#define atanl __atanl +#define atanp __atanp +#define atanpf __atanpf +#define atanpi __atanpi +#define atanpif __atanpif +#define atanpil __atanpil +#define atanpl __atanpl +#define cbrt __cbrt +#define cbrtf __cbrtf +#define cbrtl __cbrtl +#define ceil __ceil +#define ceilf __ceilf +#define ceill __ceill +#define compound __compound +#define compoundf __compoundf +#define compoundl __compoundl +#define convert_external __convert_external +#define convert_external_ __convert_external_ +#define copysign __copysign +#define copysignf __copysignf +#define copysignl __copysignl +#define cos __cos +#define cosd __cosd +#define cosdf __cosdf +#define cosdl __cosdl +#define cosf __cosf +#define cosh __cosh +#define coshf __coshf +#define coshl __coshl +#define cosl __cosl +#define cosp __cosp +#define cospf __cospf +#define cospi __cospi +#define cospif __cospif +#define cospil __cospil +#define cospl __cospl +#define d_acos_ __d_acos_ +#define d_acosd_ __d_acosd_ +#define d_acosh_ __d_acosh_ +#define d_acosp_ __d_acosp_ +#define d_acospi_ __d_acospi_ +#define d_addran_ __d_addran_ +#define d_addrans_ __d_addrans_ +#define d_aint_ __d_aint_ +#define d_anint_ __d_anint_ +#define d_annuity_ __d_annuity_ +#define d_asin_ __d_asin_ +#define d_asind_ __d_asind_ +#define d_asinh_ __d_asinh_ +#define d_asinp_ __d_asinp_ +#define d_asinpi_ __d_asinpi_ +#define d_atan2_ __d_atan2_ +#define d_atan2d_ __d_atan2d_ +#define d_atan2pi_ __d_atan2pi_ +#define d_atan_ __d_atan_ +#define d_atand_ __d_atand_ +#define d_atanh_ __d_atanh_ +#define d_atanp_ __d_atanp_ +#define d_atanpi_ __d_atanpi_ +#define d_cbrt_ __d_cbrt_ +#define d_ceil_ __d_ceil_ +#define d_compound_ __d_compound_ +#define d_copysign_ __d_copysign_ +#define d_cos_ __d_cos_ +#define d_cosd_ __d_cosd_ +#define d_cosh_ __d_cosh_ +#define d_cosp_ __d_cosp_ +#define d_cospi_ __d_cospi_ +#define d_erf_ __d_erf_ +#define d_erfc_ __d_erfc_ +#define d_exp10_ __d_exp10_ +#define d_exp2_ __d_exp2_ +#define d_exp_ __d_exp_ +#define d_expm1_ __d_expm1_ +#define d_fabs_ __d_fabs_ +#define d_floor_ __d_floor_ +#define d_fmod_ __d_fmod_ +#define d_get_addrans_ __d_get_addrans_ +#define d_hypot_ __d_hypot_ +#define d_infinity_ __d_infinity_ +#define d_init_addrans_ __d_init_addrans_ +#define d_j0_ __d_j0_ +#define d_j1_ __d_j1_ +#define d_jn_ __d_jn_ +#define d_lcran_ __d_lcran_ +#define d_lcrans_ __d_lcrans_ +#define d_lgamma_ __d_lgamma_ +#define d_lgamma_r_ __d_lgamma_r_ +#define d_log10_ __d_log10_ +#define d_log1p_ __d_log1p_ +#define d_log2_ __d_log2_ +#define d_log_ __d_log_ +#define d_logb_ __d_logb_ +#define d_max_normal_ __d_max_normal_ +#define d_max_subnormal_ __d_max_subnormal_ +#define d_min_normal_ __d_min_normal_ +#define d_min_subnormal_ __d_min_subnormal_ +#define d_mwcran_ __d_mwcran_ +#define d_mwcrans_ __d_mwcrans_ +#define d_nextafter_ __d_nextafter_ +#define d_pow_ __d_pow_ +#define d_quiet_nan_ __d_quiet_nan_ +#define d_remainder_ __d_remainder_ +#define d_rint_ __d_rint_ +#define d_scalb_ __d_scalb_ +#define d_scalbn_ __d_scalbn_ +#define d_set_addrans_ __d_set_addrans_ +#define d_shufrans_ __d_shufrans_ +#define d_signaling_nan_ __d_signaling_nan_ +#define d_significand_ __d_significand_ +#define d_sin_ __d_sin_ +#define d_sincos_ __d_sincos_ +#define d_sincosd_ __d_sincosd_ +#define d_sincosp_ __d_sincosp_ +#define d_sincospi_ __d_sincospi_ +#define d_sind_ __d_sind_ +#define d_sinh_ __d_sinh_ +#define d_sinp_ __d_sinp_ +#define d_sinpi_ __d_sinpi_ +#define d_sqrt_ __d_sqrt_ +#define d_tan_ __d_tan_ +#define d_tand_ __d_tand_ +#define d_tanh_ __d_tanh_ +#define d_tanp_ __d_tanp_ +#define d_tanpi_ __d_tanpi_ +#define d_y0_ __d_y0_ +#define d_y1_ __d_y1_ +#define d_yn_ __d_yn_ +#define drem __drem +#define erf __erf +#define erfc __erfc +#define erfcf __erfcf +#define erfcl __erfcl +#define erff __erff +#define erfl __erfl +#define exp __exp +#define exp10 __exp10 +#define exp10f __exp10f +#define exp10l __exp10l +#define exp2 __exp2 +#define exp2f __exp2f +#define exp2l __exp2l +#define expf __expf +#define expl __expl +#define expm1 __expm1 +#define expm1f __expm1f +#define expm1l __expm1l +#define fabs __fabs +#define fabsf __fabsf +#define fabsl __fabsl +#define fdim __fdim /* C99 */ +#define fdimf __fdimf /* C99 */ +#define fdiml __fdiml /* C99 */ +#define finitef __finitef +#define finitel __finitel +#define floor __floor +#define floorf __floorf +#define floorl __floorl +#define fma __fma /* C99 */ +#define fmaf __fmaf /* C99 */ +#define fmal __fmal /* C99 */ +#define fmax __fmax /* C99 */ +#define fmaxf __fmaxf /* C99 */ +#define fmaxl __fmaxl /* C99 */ +#define fmin __fmin /* C99 */ +#define fminf __fminf /* C99 */ +#define fminl __fminl /* C99 */ +#define fmod __fmod +#define fmodf __fmodf +#define fmodl __fmodl +#define fp_class __fp_class +#define fp_classf __fp_classf +#define fp_classl __fp_classl +#define frexp __frexp /* S10 */ +#define frexpf __frexpf /* S10 */ +#define frexpl __frexpl /* S10 */ +#define gamma __gamma +#define gamma_r __gamma_r +#define gammaf __gammaf +#define gammaf_r __gammaf_r +#define gammal __gammal +#define gammal_r __gammal_r +#define gradual_underflow_ __gradual_underflow_ +#define hypot __hypot +#define hypotf __hypotf +#define hypotl __hypotl +#define i_addran_ __i_addran_ +#define i_addrans_ __i_addrans_ +#define i_get_addrans_ __i_get_addrans_ +#define i_get_lcrans_ __i_get_lcrans_ +#define i_get_mwcrans_ __i_get_mwcrans_ +#define i_init_addrans_ __i_init_addrans_ +#define i_init_lcrans_ __i_init_lcrans_ +#define i_init_mwcrans_ __i_init_mwcrans_ +#define i_lcran_ __i_lcran_ +#define i_lcrans_ __i_lcrans_ +#define i_llmwcran_ __i_llmwcran_ +#define i_llmwcrans_ __i_llmwcrans_ +#define i_mwcran_ __i_mwcran_ +#define i_mwcrans_ __i_mwcrans_ +#define i_set_addrans_ __i_set_addrans_ +#define i_set_lcrans_ __i_set_lcrans_ +#define i_set_mwcrans_ __i_set_mwcrans_ +#define i_shufrans_ __i_shufrans_ +#define id_finite_ __id_finite_ +#define id_fp_class_ __id_fp_class_ +#define id_ilogb_ __id_ilogb_ +#define id_irint_ __id_irint_ +#define id_isinf_ __id_isinf_ +#define id_isnan_ __id_isnan_ +#define id_isnormal_ __id_isnormal_ +#define id_issubnormal_ __id_issubnormal_ +#define id_iszero_ __id_iszero_ +#define id_nint_ __id_nint_ +#define id_signbit_ __id_signbit_ +#define ieee_flags __ieee_flags +#define ieee_flags_ __ieee_flags_ +#define ieee_handler __ieee_handler +#define ieee_handler_ __ieee_handler_ +#define ieee_handlers __ieee_handlers +#define ieee_retrospective __ieee_retrospective +#define ieee_retrospective_ __ieee_retrospective_ +#define ilogb __ilogb +#define ilogbf __ilogbf +#define ilogbl __ilogbl +#define infinity __infinity +#define infinityf __infinityf +#define infinityl __infinityl +#define iq_finite_ __iq_finite_ +#define iq_fp_class_ __iq_fp_class_ +#define iq_ilogb_ __iq_ilogb_ +#define iq_isinf_ __iq_isinf_ +#define iq_isnan_ __iq_isnan_ +#define iq_isnormal_ __iq_isnormal_ +#define iq_issubnormal_ __iq_issubnormal_ +#define iq_iszero_ __iq_iszero_ +#define iq_signbit_ __iq_signbit_ +#define ir_finite_ __ir_finite_ +#define ir_fp_class_ __ir_fp_class_ +#define ir_ilogb_ __ir_ilogb_ +#define ir_irint_ __ir_irint_ +#define ir_isinf_ __ir_isinf_ +#define ir_isnan_ __ir_isnan_ +#define ir_isnormal_ __ir_isnormal_ +#define ir_issubnormal_ __ir_issubnormal_ +#define ir_iszero_ __ir_iszero_ +#define ir_nint_ __ir_nint_ +#define ir_signbit_ __ir_signbit_ +#define irint __irint +#define irintf __irintf +#define irintl __irintl +#define isinf __isinf +#define isinff __isinff +#define isinfl __isinfl +#define isnan __isnan +#define isnanf __isnanf +#define isnanl __isnanl +#define isnormal __isnormal +#define isnormalf __isnormalf +#define isnormall __isnormall +#define issubnormal __issubnormal +#define issubnormalf __issubnormalf +#define issubnormall __issubnormall +#define iszero __iszero +#define iszerof __iszerof +#define iszerol __iszerol +#define j0 __j0 +#define j0f __j0f +#define j0l __j0l +#define j1 __j1 +#define j1f __j1f +#define j1l __j1l +#define jn __jn +#define jnf __jnf +#define jnl __jnl +#define ldexp __ldexp /* S10 */ +#define ldexpf __ldexpf /* S10 */ +#define ldexpl __ldexpl /* S10 */ +#define lgamma __lgamma +#define lgamma_r __lgamma_r +#define lgammaf __lgammaf +#define lgammaf_r __lgammaf_r +#define lgammal __lgammal +#define lgammal_r __lgammal_r +#define llrint __llrint /* C99 */ +#define llrintf __llrintf /* C99 */ +#define llrintl __llrintl /* C99 */ +#define llround __llround /* C99 */ +#define llroundf __llroundf /* C99 */ +#define llroundl __llroundl /* C99 */ +#define lrint __lrint /* C99 */ +#define lrintf __lrintf /* C99 */ +#define lrintl __lrintl /* C99 */ +#define lround __lround /* C99 */ +#define lroundf __lroundf /* C99 */ +#define lroundl __lroundl /* C99 */ +#define log __log +#define log10 __log10 +#define log10f __log10f +#define log10l __log10l +#define log1p __log1p +#define log1pf __log1pf +#define log1pl __log1pl +#define log2 __log2 +#define log2f __log2f +#define log2l __log2l +#define logb __logb +#define logbf __logbf +#define logbl __logbl +#define logf __logf +#define logl __logl +#define max_normal __max_normal +#define max_normalf __max_normalf +#define max_normall __max_normall +#define max_subnormal __max_subnormal +#define max_subnormalf __max_subnormalf +#define max_subnormall __max_subnormall +#define min_normal __min_normal +#define min_normalf __min_normalf +#define min_normall __min_normall +#define min_subnormal __min_subnormal +#define min_subnormalf __min_subnormalf +#define min_subnormall __min_subnormall +#define modf __modf /* S10 */ +#define modff __modff /* S10 */ +#define modfl __modfl /* S10 */ +#define nan __nan /* C99 */ +#define nanf __nanf /* C99 */ +#define nanl __nanl /* C99 */ +#define nearbyint __nearbyint /* C99 */ +#define nearbyintf __nearbyintf /* C99 */ +#define nearbyintl __nearbyintl /* C99 */ +#define nextafter __nextafter +#define nextafterf __nextafterf +#define nextafterl __nextafterl +#define nexttoward __nexttoward /* C99 */ +#define nexttowardf __nexttowardf /* C99 */ +#define nexttowardl __nexttowardl /* C99 */ +#define nint __nint +#define nintf __nintf +#define nintl __nintl +#define nonstandard_arithmetic __nonstandard_arithmetic +#define nonstandard_arithmetic_ __nonstandard_arithmetic_ +#define pow __pow +#define pow_di __pow_di +#define pow_li __pow_li +#define pow_ri __pow_ri +#define powf __powf +#define powl __powl +#define q_copysign_ __q_copysign_ +#define q_fabs_ __q_fabs_ +#define q_fmod_ __q_fmod_ +#define q_infinity_ __q_infinity_ +#define q_max_normal_ __q_max_normal_ +#define q_max_subnormal_ __q_max_subnormal_ +#define q_min_normal_ __q_min_normal_ +#define q_min_subnormal_ __q_min_subnormal_ +#define q_nextafter_ __q_nextafter_ +#define q_quiet_nan_ __q_quiet_nan_ +#define q_remainder_ __q_remainder_ +#define q_scalbn_ __q_scalbn_ +#define q_signaling_nan_ __q_signaling_nan_ +#define quiet_nan __quiet_nan +#define quiet_nanf __quiet_nanf +#define quiet_nanl __quiet_nanl +#define r_acos_ __r_acos_ +#define r_acosd_ __r_acosd_ +#define r_acosh_ __r_acosh_ +#define r_acosp_ __r_acosp_ +#define r_acospi_ __r_acospi_ +#define r_addran_ __r_addran_ +#define r_addrans_ __r_addrans_ +#define r_aint_ __r_aint_ +#define r_anint_ __r_anint_ +#define r_annuity_ __r_annuity_ +#define r_asin_ __r_asin_ +#define r_asind_ __r_asind_ +#define r_asinh_ __r_asinh_ +#define r_asinp_ __r_asinp_ +#define r_asinpi_ __r_asinpi_ +#define r_atan2_ __r_atan2_ +#define r_atan2d_ __r_atan2d_ +#define r_atan2pi_ __r_atan2pi_ +#define r_atan_ __r_atan_ +#define r_atand_ __r_atand_ +#define r_atanh_ __r_atanh_ +#define r_atanp_ __r_atanp_ +#define r_atanpi_ __r_atanpi_ +#define r_cbrt_ __r_cbrt_ +#define r_ceil_ __r_ceil_ +#define r_compound_ __r_compound_ +#define r_copysign_ __r_copysign_ +#define r_cos_ __r_cos_ +#define r_cosd_ __r_cosd_ +#define r_cosh_ __r_cosh_ +#define r_cosp_ __r_cosp_ +#define r_cospi_ __r_cospi_ +#define r_erf_ __r_erf_ +#define r_erfc_ __r_erfc_ +#define r_exp10_ __r_exp10_ +#define r_exp2_ __r_exp2_ +#define r_exp_ __r_exp_ +#define r_expm1_ __r_expm1_ +#define r_fabs_ __r_fabs_ +#define r_floor_ __r_floor_ +#define r_fmod_ __r_fmod_ +#define r_get_addrans_ __r_get_addrans_ +#define r_hypot_ __r_hypot_ +#define r_infinity_ __r_infinity_ +#define r_init_addrans_ __r_init_addrans_ +#define r_j0_ __r_j0_ +#define r_j1_ __r_j1_ +#define r_jn_ __r_jn_ +#define r_lcran_ __r_lcran_ +#define r_lcrans_ __r_lcrans_ +#define r_lgamma_ __r_lgamma_ +#define r_lgamma_r_ __r_lgamma_r_ +#define r_log10_ __r_log10_ +#define r_log1p_ __r_log1p_ +#define r_log2_ __r_log2_ +#define r_log_ __r_log_ +#define r_logb_ __r_logb_ +#define r_max_normal_ __r_max_normal_ +#define r_max_subnormal_ __r_max_subnormal_ +#define r_min_normal_ __r_min_normal_ +#define r_min_subnormal_ __r_min_subnormal_ +#define r_mwcran_ __r_mwcran_ +#define r_mwcrans_ __r_mwcrans_ +#define r_nextafter_ __r_nextafter_ +#define r_pow_ __r_pow_ +#define r_quiet_nan_ __r_quiet_nan_ +#define r_remainder_ __r_remainder_ +#define r_rint_ __r_rint_ +#define r_scalb_ __r_scalb_ +#define r_scalbn_ __r_scalbn_ +#define r_set_addrans_ __r_set_addrans_ +#define r_shufrans_ __r_shufrans_ +#define r_signaling_nan_ __r_signaling_nan_ +#define r_significand_ __r_significand_ +#define r_sin_ __r_sin_ +#define r_sincos_ __r_sincos_ +#define r_sincosd_ __r_sincosd_ +#define r_sincosp_ __r_sincosp_ +#define r_sincospi_ __r_sincospi_ +#define r_sind_ __r_sind_ +#define r_sinh_ __r_sinh_ +#define r_sinp_ __r_sinp_ +#define r_sinpi_ __r_sinpi_ +#define r_sqrt_ __r_sqrt_ +#define r_tan_ __r_tan_ +#define r_tand_ __r_tand_ +#define r_tanh_ __r_tanh_ +#define r_tanp_ __r_tanp_ +#define r_tanpi_ __r_tanpi_ +#define r_y0_ __r_y0_ +#define r_y1_ __r_y1_ +#define r_yn_ __r_yn_ +#define remainder __remainder +#define remainderf __remainderf +#define remainderl __remainderl +#define remquo __remquo /* C99 */ +#define remquof __remquof /* C99 */ +#define remquol __remquol /* C99 */ +#define rint __rint +#define rintf __rintf +#define rintl __rintl +#define round __round /* C99 */ +#define roundf __roundf /* C99 */ +#define roundl __roundl /* C99 */ +#define scalb __scalb +#define scalbf __scalbf +#define scalbl __scalbl +#define scalbln __scalbln /* C99 */ +#define scalblnf __scalblnf /* C99 */ +#define scalblnl __scalblnl /* C99 */ +#define scalbn __scalbn +#define scalbnf __scalbnf +#define scalbnl __scalbnl +#define sigfpe __sigfpe +#define sigfpe_ __sigfpe_ +#define signaling_nan __signaling_nan +#define signaling_nanf __signaling_nanf +#define signaling_nanl __signaling_nanl +#define signbit __signbit +#define signbitf __signbitf +#define signbitl __signbitl +#define signgam __signgam +#define signgamf __signgamf +#define signgaml __signgaml +#define significand __significand +#define significandf __significandf +#define significandl __significandl +#define sin __sin +#define sincos __sincos +#define sincosd __sincosd +#define sincosdf __sincosdf +#define sincosdl __sincosdl +#define sincosf __sincosf +#define sincosl __sincosl +#define sincosp __sincosp +#define sincospf __sincospf +#define sincospi __sincospi +#define sincospif __sincospif +#define sincospil __sincospil +#define sincospl __sincospl +#define sind __sind +#define sindf __sindf +#define sindl __sindl +#define sinf __sinf +#define sinh __sinh +#define sinhf __sinhf +#define sinhl __sinhl +#define sinl __sinl +#define sinp __sinp +#define sinpf __sinpf +#define sinpi __sinpi +#define sinpif __sinpif +#define sinpil __sinpil +#define sinpl __sinpl +#define smwcran_ __smwcran_ +#define sqrt __sqrt +#define sqrtf __sqrtf +#define sqrtl __sqrtl +#define standard_arithmetic __standard_arithmetic +#define standard_arithmetic_ __standard_arithmetic_ +#define tan __tan +#define tand __tand +#define tandf __tandf +#define tandl __tandl +#define tanf __tanf +#define tanh __tanh +#define tanhf __tanhf +#define tanhl __tanhl +#define tanl __tanl +#define tanp __tanp +#define tanpf __tanpf +#define tanpi __tanpi +#define tanpif __tanpif +#define tanpil __tanpil +#define tanpl __tanpl +#define tgamma __tgamma /* C99 */ +#define tgammaf __tgammaf /* C99 */ +#define tgammal __tgammal /* C99 */ +#define trunc __trunc /* C99 */ +#define truncf __truncf /* C99 */ +#define truncl __truncl /* C99 */ +#define u_addrans_ __u_addrans_ +#define u_lcrans_ __u_lcrans_ +#define u_llmwcran_ __u_llmwcran_ +#define u_llmwcrans_ __u_llmwcrans_ +#define u_mwcran_ __u_mwcran_ +#define u_mwcrans_ __u_mwcrans_ +#define u_shufrans_ __u_shufrans_ +#define y0 __y0 +#define y0f __y0f +#define y0l __y0l +#define y1 __y1 +#define y1f __y1f +#define y1l __y1l +#define yn __yn +#define ynf __ynf +#define ynl __ynl + +/* + * these are libdl entry points + */ +#define dlclose _dlclose +#define dlopen _dlopen +#define dlsym _dlsym + +/* + * these are libc entry points + */ +#define finite _finite +#define fpclass _fpclass +#define isnand _isnand +#define sigaction _sigaction +#define sigemptyset _sigemptyset +#define unordered _unordered +#define write _write +#ifdef _REENTRANT +#define mutex_lock _mutex_lock +#define mutex_unlock _mutex_unlock +#define thr_getspecific _thr_getspecific +#define thr_keycreate _thr_keycreate +#define thr_main _thr_main +#define thr_setspecific _thr_setspecific +#endif + +#endif /* defined(ELFOBJ) && !defined(lint) */ + +#endif /* _LIBM_SYNONYMS_H */ diff --git a/usr/src/libm/src/C/libm_thread.h b/usr/src/libm/src/C/libm_thread.h new file mode 100644 index 0000000..ad30648 --- /dev/null +++ b/usr/src/libm/src/C/libm_thread.h @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBM_THREAD_H +#define _LIBM_THREAD_H + +#pragma ident "@(#)libm_thread.h 1.6 06/01/31 SMI" + +#include +#include + +/* + * -lthread function(s) not prototyped anywhere + */ +extern int thr_main(void); +/* + * function call(s) local to libsunmath + */ +extern void *__tsd_alloc(thread_key_t *, int, int); +#endif /* _LIBM_THREAD_H */ diff --git a/usr/src/libm/src/C/libmv1.c b/usr/src/libm/src/C/libmv1.c new file mode 100644 index 0000000..16f9fc3 --- /dev/null +++ b/usr/src/libm/src/C/libmv1.c @@ -0,0 +1,661 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)libmv1.c 1.4 06/01/31 SMI" + +#pragma weak _lib_version = __libm_lib_version +#pragma weak acos = __acos +#pragma weak acosh = __acosh +#pragma weak asin = __asin +#pragma weak asinh = __asinh +#pragma weak atan = __atan +#pragma weak atan2 = __atan2 +#pragma weak atanh = __atanh +#pragma weak cbrt = __cbrt +#pragma weak ceil = __ceil +#pragma weak copysign = __copysign +#pragma weak cos = __cos +#pragma weak cosh = __cosh +#pragma weak erf = __erf +#pragma weak erfc = __erfc +#pragma weak exp = __exp +#pragma weak expm1 = __expm1 +#pragma weak fabs = __fabs +#pragma weak floor = __floor +#pragma weak fmod = __fmod +#pragma weak gamma = __gamma +#pragma weak gamma_r = __gamma_r +#pragma weak hypot = __hypot +#pragma weak ilogb = __ilogb +#pragma weak isnan = __isnan +#pragma weak j0 = __j0 +#pragma weak j1 = __j1 +#pragma weak jn = __jn +#pragma weak lgamma = __lgamma +#pragma weak lgamma_r = __lgamma_r +#pragma weak log = __log +#pragma weak log10 = __log10 +#pragma weak log1p = __log1p +#pragma weak logb = __logb +#pragma weak nextafter = __nextafter +#pragma weak pow = __pow +#pragma weak remainder = __remainder +#pragma weak rint = __rint +#pragma weak scalb = __scalb +#pragma weak scalbn = __scalbn +#pragma weak signgam = __signgam +#pragma weak significand = __significand +#pragma weak sin = __sin +#pragma weak sinh = __sinh +#pragma weak sqrt = __sqrt +#pragma weak tan = __tan +#pragma weak tanh = __tanh +#pragma weak y0 = __y0 +#pragma weak y1 = __y1 +#pragma weak yn = __yn + +#include + +const enum version __libm_lib_version = libm_ieee; +int __signgam = 0; + +#if !defined(__sparcv9) && !defined(__amd64) +/* ARGSUSED */ +int * +__libm_errno(void) { + return (0); +} +#endif + +/* ARGSUSED */ +int +__libm__rem_pio2(double x, double *y) { + return (0); +} + +/* ARGSUSED */ +int +__libm__rem_pio2m(double *x, double *y, int e0, int nx, int p, const int *ip) { + return (0); +} + +/* ARGSUSED */ +double +__acos(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__acosh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__asin(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__asinh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__atan(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__atan2(double y, double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__atanh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__cbrt(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__ceil(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__copysign(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__cos(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__cosh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__erf(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__erfc(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__exp(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__expm1(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__fabs(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__floor(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__fmod(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__gamma(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__gamma_r(double x, int *signgamp) { + return (0.0); +} + +/* ARGSUSED */ +double +__hypot(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +int +__ilogb(double x) { + return (0); +} + +/* ARGSUSED */ +int +__isnan(double x) { + return (0); +} + +/* ARGSUSED */ +double +__j0(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__j1(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__jn(int n, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__lgamma(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__lgamma_r(double x, int *signgamp) { + return (0.0); +} + +/* ARGSUSED */ +double +__log(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__log10(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__log1p(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__logb(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__nextafter(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__pow(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__remainder(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__rint(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__scalb(double x, double y) { + return (0.0); +} + +/* ARGSUSED */ +double +__scalbn(double x, int n) { + return (0.0); +} + +/* ARGSUSED */ +double +__significand(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__sin(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__sinh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__sqrt(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__tan(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__tanh(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__y0(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__y1(double x) { + return (0.0); +} + +/* ARGSUSED */ +double +__yn(int n, double x) { + return (0.0); +} + +/* ARGSUSED */ +int +matherr(struct exception *excep) { + return (0); +} + +/* ARGSUSED */ +float +__acosf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__asinf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__atanf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__atan2f(float y, float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__ceilf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__cosf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__coshf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__expf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__fabsf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__floorf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__fmodf(float x, float y) { + return (0.0F); +} + +/* ARGSUSED */ +float +__frexpf(float x, int *e) { + return (0.0F); +} + +/* ARGSUSED */ +float +__ldexpf(float x, int n) { + return (0.0F); +} + +/* ARGSUSED */ +float +__logf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__log10f(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__modff(float x, float *iptr) { + return (0.0F); +} + +/* ARGSUSED */ +float +__powf(float x, float y) { + return (0.0F); +} + +/* ARGSUSED */ +float +__sinf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__sinhf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__sqrtf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__tanf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +float +__tanhf(float x) { + return (0.0F); +} + +/* ARGSUSED */ +long double +__acosl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__asinl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__atanl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__atan2l(long double y, long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__ceill(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__cosl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__coshl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__expl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__fabsl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__floorl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__fmodl(long double x, long double y) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__frexpl(long double x, int *e) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__ldexpl(long double x, int n) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__logl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__log10l(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__modfl(long double x, long double *iptr) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__powl(long double x, long double y) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__sinl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__sinhl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__sqrtl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__tanl(long double x) { + return (0.0L); +} + +/* ARGSUSED */ +long double +__tanhl(long double x) { + return (0.0L); +} diff --git a/usr/src/libm/src/C/log.c b/usr/src/libm/src/C/log.c new file mode 100644 index 0000000..ddedf57 --- /dev/null +++ b/usr/src/libm/src/C/log.c @@ -0,0 +1,219 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log.c 1.29 06/01/23 SMI" + +#pragma weak log = __log + +/* INDENT OFF */ +/* + * log(x) + * Table look-up algorithm with product polynomial approximation. + * By K.C. Ng, Oct 23, 2004. Updated Oct 18, 2005. + * + * (a). For x in [1-0.125, 1+0.1328125], using a special approximation: + * Let f = x - 1 and z = f*f. + * return f + ((a1*z) * + * ((a2 + (a3*f)*(a4+f)) + (f*z)*(a5+f))) * + * (((a6 + f*(a7+f)) + (f*z)*(a8+f)) * + * ((a9 + (a10*f)*(a11+f)) + (f*z)*(a12+f))) + * a1 -6.88821452420390473170286327331268694251775741577e-0002, + * a2 1.97493380704769294631262255279580131173133850098e+0000, + * a3 2.24963218866067560242072431719861924648284912109e+0000, + * a4 -9.02975906958474405783476868236903101205825805664e-0001, + * a5 -1.47391630715542865104339398385491222143173217773e+0000, + * a6 1.86846544648220058704168877738993614912033081055e+0000, + * a7 1.82277370459347465292410106485476717352867126465e+0000, + * a8 1.25295479915214102994980294170090928673744201660e+0000, + * a9 1.96709676945198275177517643896862864494323730469e+0000, + * a10 -4.00127989749189894030934055990655906498432159424e-0001, + * a11 3.01675528558798333733648178167641162872314453125e+0000, + * a12 -9.52325445049240770778453679668018594384193420410e-0001, + * + * with remez error |(log(1+f) - P(f))/f| <= 2**-56.81 and + * + * (b). For 0.09375 <= x < 24 + * Use an 8-bit table look-up (3-bit for exponent and 5 bit for + * significand): + * Let ix stands for the high part of x in IEEE double format. + * Since 0.09375 <= x < 24, we have + * 0x3fb80000 <= ix < 0x40380000. + * Let j = (ix - 0x3fb80000) >> 15. Then 0 <= j < 256. Choose + * a Y[j] such that HIWORD(Y[j]) ~ 0x3fb8400 + (j<<15) (the middle + * number between 0x3fb80000 + (j<<15) and 3fb80000 + ((j+1)<<15)), + * and at the same time 1/Y[j] as well as log(Y[j]) are very close + * to 53-bits floating point numbers. + * A table of Y[j], 1/Y[j], and log(Y[j]) are pre-computed and thus + * log(x) = log(Y[j]) + log(1 + (x-Y[j])*(1/Y[j])) + * = log(Y[j]) + log(1 + s) + * where + * s = (x-Y[j])*(1/Y[j]) + * We compute max (x-Y[j])*(1/Y[j]) for the chosen Y[j] and obtain + * |s| < 0.0154. By applying remez algorithm with Product Polynomial + * Approximiation, we find the following approximated of log(1+s) + * (b1*s)*(b2+s*(b3+s))*((b4+s*b5)+(s*s)*(b6+s))*(b7+s*(b8+s)) + * with remez error |log(1+s) - P(s)| <= 2**-63.5 + * + * (c). Otherwise, get "n", the exponent of x, and then normalize x to + * z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5 + * significant bits. Then + * log(x) = n*ln2 + log(Y[i]) + log(z/Y[i]). + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Maximum error observed: less than 0.90 ulp + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +#include "libm.h" + +extern const double _TBL_log[]; + +static const double P[] = { +/* ONE */ 1.0, +/* TWO52 */ 4503599627370496.0, +/* LN2HI */ 6.93147180369123816490e-01, /* 3fe62e42, fee00000 */ +/* LN2LO */ 1.90821492927058770002e-10, /* 3dea39ef, 35793c76 */ +/* A1 */ -6.88821452420390473170286327331268694251775741577e-0002, +/* A2 */ 1.97493380704769294631262255279580131173133850098e+0000, +/* A3 */ 2.24963218866067560242072431719861924648284912109e+0000, +/* A4 */ -9.02975906958474405783476868236903101205825805664e-0001, +/* A5 */ -1.47391630715542865104339398385491222143173217773e+0000, +/* A6 */ 1.86846544648220058704168877738993614912033081055e+0000, +/* A7 */ 1.82277370459347465292410106485476717352867126465e+0000, +/* A8 */ 1.25295479915214102994980294170090928673744201660e+0000, +/* A9 */ 1.96709676945198275177517643896862864494323730469e+0000, +/* A10 */ -4.00127989749189894030934055990655906498432159424e-0001, +/* A11 */ 3.01675528558798333733648178167641162872314453125e+0000, +/* A12 */ -9.52325445049240770778453679668018594384193420410e-0001, +/* B1 */ -1.25041641589283658575482149899471551179885864258e-0001, +/* B2 */ 1.87161713283355151891381127914642725337613123482e+0000, +/* B3 */ -1.89082956295731507978530316904652863740921020508e+0000, +/* B4 */ -2.50562891673640253387134180229622870683670043945e+0000, +/* B5 */ 1.64822828085258366037635369139024987816810607910e+0000, +/* B6 */ -1.24409107065868340669112512841820716857910156250e+0000, +/* B7 */ 1.70534231658220414296067701798165217041969299316e+0000, +/* B8 */ 1.99196833784655646937267192697618156671524047852e+0000, +}; + +#define ONE P[0] +#define TWO52 P[1] +#define LN2HI P[2] +#define LN2LO P[3] +#define A1 P[4] +#define A2 P[5] +#define A3 P[6] +#define A4 P[7] +#define A5 P[8] +#define A6 P[9] +#define A7 P[10] +#define A8 P[11] +#define A9 P[12] +#define A10 P[13] +#define A11 P[14] +#define A12 P[15] +#define B1 P[16] +#define B2 P[17] +#define B3 P[18] +#define B4 P[19] +#define B5 P[20] +#define B6 P[21] +#define B7 P[22] +#define B8 P[23] + +double +log(double x) { + double *tb, dn, dn1, s, z, r, w; + int i, hx, ix, n, lx; + + n = 0; + hx = ((int *)&x)[HIWORD]; + ix = hx & 0x7fffffff; + lx = ((int *)&x)[LOWORD]; + + /* subnormal,0,negative,inf,nan */ + if ((hx + 0x100000) < 0x200000) { + if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0)) /* nan */ + return (x * x); + if (((hx << 1) | lx) == 0) /* zero */ + return (_SVID_libm_err(x, x, 16)); + if (hx < 0) /* negative */ + return (_SVID_libm_err(x, x, 17)); + if (((hx - 0x7ff00000) | lx) == 0) /* +inf */ + return (x); + + /* x must be positive and subnormal */ + x *= TWO52; + n = -52; + ix = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + } + + i = ix >> 19; + if (i >= 0x7f7 && i <= 0x806) { + /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */ + if (ix >= 0x3fec0000 && ix < 0x3ff22000) { + /* 0.875 <= x < 1.125 */ + s = x - ONE; + z = s * s; + if (((ix - 0x3ff00000) | lx) == 0) /* x = 1 */ + return (z); + r = (A10 * s) * (A11 + s); + w = z * s; + return (s + ((A1 * z) * + (A2 + ((A3 * s) * (A4 + s) + w * (A5 + s)))) * + ((A6 + (s * (A7 + s) + w * (A8 + s))) * + (A9 + (r + w * (A12 + s))))); + } else { + i = (ix - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + return (tb[2] + ((B1 * s) * (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s)))); + } + } else { + dn = (double)(n + ((ix >> 20) - 0x3ff)); + dn1 = dn * LN2HI; + i = (ix & 0x000fffff) | 0x3ff00000; /* scale x to [1,2] */ + ((int *)&x)[HIWORD] = i; + i = (i - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + dn = dn * LN2LO + tb[2]; + return (dn1 + (dn + ((B1 * s) * (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } +} diff --git a/usr/src/libm/src/C/log10.c b/usr/src/libm/src/C/log10.c new file mode 100644 index 0000000..29bf07b --- /dev/null +++ b/usr/src/libm/src/C/log10.c @@ -0,0 +1,217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log10.c 1.23 06/01/23 SMI" + +#pragma weak log10 = __log10 + +/* INDENT OFF */ +/* + * log10(x) = log(x)/log10 + * + * Base on Table look-up algorithm with product polynomial + * approximation for log(x). + * + * By K.C. Ng, Nov 29, 2004 + * + * (a). For x in [1-0.125, 1+0.125], from log.c we have + * log(x) = f + ((a1*f^2) * + * ((a2 + (a3*f)*(a4+f)) + (f^3)*(a5+f))) * + * (((a6 + f*(a7+f)) + (f^3)*(a8+f)) * + * ((a9 + (a10*f)*(a11+f)) + (f^3)*(a12+f))) + * where f = x - 1. + * (i) modify a1 <- a1 / log10 + * (ii) 1/log10 = 0.4342944819... + * = 0.4375 - 0.003205518... (7 bit shift) + * Let lgv = 0.4375 - 1/log10, then + * lgv = 0.003205518096748172348871081083395..., + * (iii) f*0.4375 is exact because f has 3 trailing zero. + * (iv) Thus, log10(x) = f*0.4375 - (lgv*f - PPoly) + * + * (b). For 0.09375 <= x < 24 + * Let j = (ix - 0x3fb80000) >> 15. Look up Y[j], 1/Y[j], and log(Y[j]) + * from _TBL_log.c. Then + * log10(x) = log10(Y[j]) + log10(1 + (x-Y[j])*(1/Y[j])) + * = log(Y[j])(1/log10) + log10(1 + s) + * where + * s = (x-Y[j])*(1/Y[j]) + * From log.c, we have log(1+s) = + * 2 2 2 + * (b s) (b + b s + s ) [b + b s + s (b + s)] (b + b s + s ) + * 1 2 3 4 5 6 7 8 + * + * By setting b1 <- b1/log10, we have + * log10(x) = 0.4375 * T - (lgv * T - POLY(s)) + * + * (c). Otherwise, get "n", the exponent of x, and then normalize x to + * z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5 + * significant bits. Then + * log(x) = n*ln2 + log(Y[i]) + log(z/Y[i]). + * log10(x) = n*(ln2/ln10) + log10(z). + * + * Special cases: + * log10(x) is NaN with signal if x < 0 (including -INF) ; + * log10(+INF) is +INF; log10(0) is -INF with signal; + * log10(NaN) is that NaN with no signal. + * + * Maximum error observed: less than 0.89 ulp + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +#include "libm.h" + +extern const double _TBL_log[]; + +static const double P[] = { +/* ONE */ 1.0, +/* TWO52 */ 4503599627370496.0, +/* LNAHI */ 3.01029995607677847147e-01, /* 3FD34413 50900000 */ +/* LNALO */ 5.63033480667509769841e-11, /* 3DCEF3FD E623E256 */ +/* A1 */ -2.9142521960136582507385480707044582802184e-02, +/* A2 */ 1.99628461483039965074226529395673424005508422852e+0000, +/* A3 */ 2.26812367662950720159642514772713184356689453125e+0000, +/* A4 */ -9.05030639084976384900471657601883634924888610840e-0001, +/* A5 */ -1.48275767132434044270894446526654064655303955078e+0000, +/* A6 */ 1.88158320939722756293122074566781520843505859375e+0000, +/* A7 */ 1.83309386046986411145098827546462416648864746094e+0000, +/* A8 */ 1.24847063988317086291601754055591300129890441895e+0000, +/* A9 */ 1.98372421445537705508854742220137268304824829102e+0000, +/* A10 */ -3.94711735767898475035764249696512706577777862549e-0001, +/* A11 */ 3.07890395362954372160402272129431366920471191406e+0000, +/* A12 */ -9.60099585275022149311041630426188930869102478027e-0001, +/* B1 */ -5.4304894950350052960838096752491540286689e-02, +/* B2 */ 1.87161713283355151891381127914642725337613123482e+0000, +/* B3 */ -1.89082956295731507978530316904652863740921020508e+0000, +/* B4 */ -2.50562891673640253387134180229622870683670043945e+0000, +/* B5 */ 1.64822828085258366037635369139024987816810607910e+0000, +/* B6 */ -1.24409107065868340669112512841820716857910156250e+0000, +/* B7 */ 1.70534231658220414296067701798165217041969299316e+0000, +/* B8 */ 1.99196833784655646937267192697618156671524047852e+0000, +/* LGH */ 0.4375, +/* LGL */ 0.003205518096748172348871081083395, +/* LG10V */ 0.43429448190325182765112891891660509576226, +}; + +#define ONE P[0] +#define TWO52 P[1] +#define LNAHI P[2] +#define LNALO P[3] +#define A1 P[4] +#define A2 P[5] +#define A3 P[6] +#define A4 P[7] +#define A5 P[8] +#define A6 P[9] +#define A7 P[10] +#define A8 P[11] +#define A9 P[12] +#define A10 P[13] +#define A11 P[14] +#define A12 P[15] +#define B1 P[16] +#define B2 P[17] +#define B3 P[18] +#define B4 P[19] +#define B5 P[20] +#define B6 P[21] +#define B7 P[22] +#define B8 P[23] +#define LGH P[24] +#define LGL P[25] +#define LG10V P[26] + +double +log10(double x) { + double *tb, dn, dn1, s, z, r, w; + int i, hx, ix, n, lx; + + n = 0; + hx = ((int *)&x)[HIWORD]; + ix = hx & 0x7fffffff; + lx = ((int *)&x)[LOWORD]; + + /* subnormal,0,negative,inf,nan */ + if ((hx + 0x100000) < 0x200000) { + if (ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0)) /* nan */ + return (x * x); + if (((hx << 1) | lx) == 0) /* zero */ + return (_SVID_libm_err(x, x, 18)); + if (hx < 0) /* negative */ + return (_SVID_libm_err(x, x, 19)); + if (((hx - 0x7ff00000) | lx) == 0) /* +inf */ + return (x); + + /* x must be positive and subnormal */ + x *= TWO52; + n = -52; + ix = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + } + + i = ix >> 19; + if (i >= 0x7f7 && i <= 0x806) { + /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */ + if (ix >= 0x3fec0000 && ix < 0x3ff20000) { + /* 0.875 <= x < 1.125 */ + s = x - ONE; + z = s * s; + if (((ix - 0x3ff00000) | lx) == 0) /* x = 1 */ + return (z); + r = (A10 * s) * (A11 + s); + w = z * s; + return (LGH * s - (LGL * s - ((A1 * z) * + ((A2 + (A3 * s) * (A4 + s)) + w * (A5 + s))) * + (((A6 + s * (A7 + s)) + w * (A8 + s)) * + ((A9 + r) + w * (A12 + s))))); + } else { + i = (ix - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + return (LGH * tb[2] - (LGL * tb[2] - ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } + } else { + dn = (double)(n + ((ix >> 20) - 0x3ff)); + dn1 = dn * LNAHI; + i = (ix & 0x000fffff) | 0x3ff00000; /* scale x to [1,2] */ + ((int *)&x)[HIWORD] = i; + i = (i - 0x3fb80000) >> 15; + tb = (double *)_TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + dn = dn * LNALO + tb[2] * LG10V; + return (dn1 + (dn + ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } +} diff --git a/usr/src/libm/src/C/log1p.c b/usr/src/libm/src/C/log1p.c new file mode 100644 index 0000000..845fd0e --- /dev/null +++ b/usr/src/libm/src/C/log1p.c @@ -0,0 +1,201 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log1p.c 1.23 06/01/23 SMI" + +#pragma weak log1p = __log1p + +/* INDENT OFF */ +/* + * Method : + * 1. Argument Reduction: find k and f such that + * 1+x = 2^k * (1+f), + * where sqrt(2)/2 < 1+f < sqrt(2) . + * + * Note. If k=0, then f=x is exact. However, if k!=0, then f + * may not be representable exactly. In that case, a correction + * term is need. Let u=1+x rounded. Let c = (1+x)-u, then + * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), + * and add back the correction term c/u. + * (Note: when x > 2**53, one can simply return log(x)) + * + * 2. Approximation of log1p(f). + * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) + * = 2s + 2/3 s**3 + 2/5 s**5 + ....., + * = 2s + s*R + * We use a special Reme algorithm on [0,0.1716] to generate + * a polynomial of degree 14 to approximate R The maximum error + * of this polynomial approximation is bounded by 2**-58.45. In + * other words, + * 2 4 6 8 10 12 14 + * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s + * (the values of Lp1 to Lp7 are listed in the program) + * and + * | 2 14 | -58.45 + * | Lp1*s +...+Lp7*s - R(z) | <= 2 + * | | + * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. + * In order to guarantee error in log below 1ulp, we compute log + * by + * log1p(f) = f - (hfsq - s*(hfsq+R)). + * + * 3. Finally, log1p(x) = k*ln2 + log1p(f). + * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) + * Here ln2 is splitted into two floating point number: + * ln2_hi + ln2_lo, + * where n*ln2_hi is always exact for |n| < 2000. + * + * Special cases: + * log1p(x) is NaN with signal if x < -1 (including -INF) ; + * log1p(+INF) is +INF; log1p(-1) is -INF with signal; + * log1p(NaN) is that NaN with no signal. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + * + * Note: Assuming log() return accurate answer, the following + * algorithm can be used to compute log1p(x) to within a few ULP: + * + * u = 1+x; + * if(u==1.0) return x ; else + * return log(u)*(x/(u-1.0)); + * + * See HP-15C Advanced Functions Handbook, p.193. + */ +/* INDENT ON */ + +#include "libm.h" + +static const double xxx[] = { +/* ln2_hi */ 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ +/* ln2_lo */ 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ +/* two54 */ 1.80143985094819840000e+16, /* 43500000 00000000 */ +/* Lp1 */ 6.666666666666735130e-01, /* 3FE55555 55555593 */ +/* Lp2 */ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ +/* Lp3 */ 2.857142874366239149e-01, /* 3FD24924 94229359 */ +/* Lp4 */ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ +/* Lp5 */ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ +/* Lp6 */ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ +/* Lp7 */ 1.479819860511658591e-01, /* 3FC2F112 DF3E5244 */ +/* zero */ 0.0 +}; +#define ln2_hi xxx[0] +#define ln2_lo xxx[1] +#define two54 xxx[2] +#define Lp1 xxx[3] +#define Lp2 xxx[4] +#define Lp3 xxx[5] +#define Lp4 xxx[6] +#define Lp5 xxx[7] +#define Lp6 xxx[8] +#define Lp7 xxx[9] +#define zero xxx[10] + +double +log1p(double x) { + double hfsq, f, c, s, z, R, u; + int k, hx, hu, ax; + + hx = ((int *)&x)[HIWORD]; /* high word of x */ + ax = hx & 0x7fffffff; + + if (ax >= 0x7ff00000) { /* x is inf or nan */ + if (((hx - 0xfff00000) | ((int *)&x)[LOWORD]) == 0) /* -inf */ + return (_SVID_libm_err(x, x, 44)); + return (x * x); + } + + k = 1; + if (hx < 0x3FDA827A) { /* x < 0.41422 */ + if (ax >= 0x3ff00000) /* x <= -1.0 */ + return (_SVID_libm_err(x, x, x == -1.0 ? 43 : 44)); + if (ax < 0x3e200000) { /* |x| < 2**-29 */ + if (two54 + x > zero && /* raise inexact */ + ax < 0x3c900000) /* |x| < 2**-54 */ + return (x); + else + return (x - x * x * 0.5); + } + if (hx > 0 || hx <= (int)0xbfd2bec3) { /* -0.2929> 20) - 1023; + /* + * correction term + */ + c = k > 0 ? 1.0 - (u - x) : x - (u - 1.0); + c /= u; + } else { + u = x; + hu = ((int *)&u)[HIWORD]; /* high word of u */ + k = (hu >> 20) - 1023; + c = 0; + } + hu &= 0x000fffff; + if (hu < 0x6a09e) { /* normalize u */ + ((int *)&u)[HIWORD] = hu | 0x3ff00000; + } else { /* normalize u/2 */ + k += 1; + ((int *)&u)[HIWORD] = hu | 0x3fe00000; + hu = (0x00100000 - hu) >> 2; + } + f = u - 1.0; + } + hfsq = 0.5 * f * f; + if (hu == 0) { /* |f| < 2**-20 */ + if (f == zero) { + if (k == 0) + return (zero); + c += k * ln2_lo; + return (k * ln2_hi + c); + } + R = hfsq * (1.0 - 0.66666666666666666 * f); + if (k == 0) + return (f - R); + return (k * ln2_hi - ((R - (k * ln2_lo + c)) - f)); + } + s = f / (2.0 + f); + z = s * s; + R = z * (Lp1 + z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + + z * (Lp6 + z * Lp7)))))); + if (k == 0) + return (f - (hfsq - s * (hfsq + R))); + return (k * ln2_hi - ((hfsq - (s * (hfsq + R) + + (k * ln2_lo + c))) - f)); +} diff --git a/usr/src/libm/src/C/log2.c b/usr/src/libm/src/C/log2.c new file mode 100644 index 0000000..8bdfe82 --- /dev/null +++ b/usr/src/libm/src/C/log2.c @@ -0,0 +1,226 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log2.c 1.16 06/01/31 SMI" + +#pragma weak log2 = __log2 + +/* INDENT OFF */ +/* + * log2(x) = log(x)/log2 + * + * Base on Table look-up algorithm with product polynomial + * approximation for log(x). + * + * By K.C. Ng, Nov 29, 2004 + * + * (a). For x in [1-0.125, 1+0.125], from log.c we have + * log(x) = f + ((a1*f^2) * + * ((a2 + (a3*f)*(a4+f)) + (f^3)*(a5+f))) * + * (((a6 + f*(a7+f)) + (f^3)*(a8+f)) * + * ((a9 + (a10*f)*(a11+f)) + (f^3)*(a12+f))) + * where f = x - 1. + * (i) modify a1 <- a1 / log2 + * (ii) 1/log2 = 1.4426950408889634... + * = 1.5 - 0.057304959... (4 bit shift) + * Let lv = 1.5 - 1/log2, then + * lv = 0.057304959111036592640075318998107956665325, + * (iii) f*1.5 is exact because f has 3 trailing zero. + * (iv) Thus, log2(x) = f*1.5 - (lv*f - PPoly) + * + * (b). For 0.09375 <= x < 24 + * Let j = (ix - 0x3fb80000) >> 15. Look up Y[j], 1/Y[j], and log(Y[j]) + * from _TBL_log.c. Then + * log2(x) = log2(Y[j]) + log2(1 + (x-Y[j])*(1/Y[j])) + * = log(Y[j])(1/log2) + log2(1 + s) + * where + * s = (x-Y[j])*(1/Y[j]) + * From log.c, we have log(1+s) = + * 2 2 2 + * (b s) (b + b s + s ) [b + b s + s (b + s)] (b + b s + s ) + * 1 2 3 4 5 6 7 8 + * + * By setting b1 <- b1/log2, we have + * log2(x) = 1.5 * T - (lv * T - POLY(s)) + * + * (c). Otherwise, get "n", the exponent of x, and then normalize x to + * z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5 + * significant bits. Then + * log2(x) = n + log2(z). + * + * Special cases: + * log2(x) is NaN with signal if x < 0 (including -INF) ; + * log2(+INF) is +INF; log2(0) is -INF with signal; + * log2(NaN) is that NaN with no signal. + * + * Maximum error observed: less than 0.84 ulp + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" + +extern const double _TBL_log[]; + +static const double P[] = { +/* ONE */ 1.0, +/* TWO52 */ 4503599627370496.0, +/* LN10V */ 1.4426950408889634073599246810018920433347, /* 1/log10 */ +/* ZERO */ 0.0, +/* A1 */ -9.6809362455249638217841932228967194640116e-02, +/* A2 */ 1.99628461483039965074226529395673424005508422852e+0000, +/* A3 */ 2.26812367662950720159642514772713184356689453125e+0000, +/* A4 */ -9.05030639084976384900471657601883634924888610840e-0001, +/* A5 */ -1.48275767132434044270894446526654064655303955078e+0000, +/* A6 */ 1.88158320939722756293122074566781520843505859375e+0000, +/* A7 */ 1.83309386046986411145098827546462416648864746094e+0000, +/* A8 */ 1.24847063988317086291601754055591300129890441895e+0000, +/* A9 */ 1.98372421445537705508854742220137268304824829102e+0000, +/* A10 */ -3.94711735767898475035764249696512706577777862549e-0001, +/* A11 */ 3.07890395362954372160402272129431366920471191406e+0000, +/* A12 */ -9.60099585275022149311041630426188930869102478027e-0001, +/* B1 */ -1.8039695622547469514898963204616532885451e-01, +/* B2 */ 1.87161713283355151891381127914642725337613123482e+0000, +/* B3 */ -1.89082956295731507978530316904652863740921020508e+0000, +/* B4 */ -2.50562891673640253387134180229622870683670043945e+0000, +/* B5 */ 1.64822828085258366037635369139024987816810607910e+0000, +/* B6 */ -1.24409107065868340669112512841820716857910156250e+0000, +/* B7 */ 1.70534231658220414296067701798165217041969299316e+0000, +/* B8 */ 1.99196833784655646937267192697618156671524047852e+0000, +/* LGH */ 1.5, +/* LGL */ 0.057304959111036592640075318998107956665325, +}; + +#define ONE P[0] +#define TWO52 P[1] +#define LN10V P[2] +#define ZERO P[3] +#define A1 P[4] +#define A2 P[5] +#define A3 P[6] +#define A4 P[7] +#define A5 P[8] +#define A6 P[9] +#define A7 P[10] +#define A8 P[11] +#define A9 P[12] +#define A10 P[13] +#define A11 P[14] +#define A12 P[15] +#define B1 P[16] +#define B2 P[17] +#define B3 P[18] +#define B4 P[19] +#define B5 P[20] +#define B6 P[21] +#define B7 P[22] +#define B8 P[23] +#define LGH P[24] +#define LGL P[25] + +double +log2(double x) { + int i, hx, ix, n, lx; + + n = 0; + hx = ((int *) &x)[HIWORD]; ix = hx & 0x7fffffff; + lx = ((int *) &x)[LOWORD]; + + /* subnormal,0,negative,inf,nan */ + if ((hx + 0x100000) < 0x200000) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */ + return (x); /* for Cheetah when x is QNaN */ +#endif + if (((hx << 1) | lx) == 0) /* log(0.0) = -inf */ + return (A5 / fabs(x)); + if (hx < 0) { /* x < 0 */ + if (ix >= 0x7ff00000) + return (x - x); /* x is -inf or NaN */ + else + return (ZERO / (x - x)); + } + if (((hx - 0x7ff00000) | lx) == 0) /* log(inf) = inf */ + return (x); + if (ix >= 0x7ff00000) /* log(NaN) = NaN */ + return (x - x); + x *= TWO52; + n = -52; + hx = ((int *) &x)[HIWORD]; ix = hx & 0x7fffffff; + lx = ((int *) &x)[LOWORD]; + } + + /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */ + i = ix >> 19; + if (i >= 0x7f7 && i <= 0x806) { + /* 0.875 <= x < 1.125 */ + if (ix >= 0x3fec0000 && ix < 0x3ff20000) { + double s, z, r, w; + s = x - ONE; z = s * s; r = (A10 * s) * (A11 + s); + w = z * s; + if (((ix << 12) | lx) == 0) + return (z); + else + return (LGH * s - (LGL * s - ((A1 * z) * + ((A2 + (A3 * s) * (A4 + s)) + w * (A5 + s))) * + (((A6 + s * (A7 + s)) + w * (A8 + s)) * + ((A9 + r) + w * (A12 + s))))); + } else { + double *tb, s; + i = (ix - 0x3fb80000) >> 15; + tb = (double *) _TBL_log + (i + i + i); + if (((ix << 12) | lx) == 0) /* 2's power */ + return ((double) ((ix >> 20) - 0x3ff)); + s = (x - tb[0]) * tb[1]; + return (LGH * tb[2] - (LGL * tb[2] - ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } + } else { + double *tb, dn, s; + dn = (double) (n + ((ix >> 20) - 0x3ff)); + ix <<= 12; + if ((ix | lx) == 0) + return (dn); + i = ((unsigned) ix >> 12) | 0x3ff00000; /* scale x to [1,2) */ + ((int *) &x)[HIWORD] = i; + i = (i - 0x3fb80000) >> 15; + tb = (double *) _TBL_log + (i + i + i); + s = (x - tb[0]) * tb[1]; + return (dn + (tb[2] * LN10V + ((B1 * s) * + (B2 + s * (B3 + s))) * + (((B4 + s * B5) + (s * s) * (B6 + s)) * + (B7 + s * (B8 + s))))); + } +} diff --git a/usr/src/libm/src/C/logb.c b/usr/src/libm/src/C/logb.c new file mode 100644 index 0000000..83f0022 --- /dev/null +++ b/usr/src/libm/src/C/logb.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)logb.c 1.16 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak logb = __logb +#pragma weak _logb = __logb +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(USE_FPSCALE) || defined(__i386) +static const double two52 = 4503599627370496.0; +#else +/* + * v: high part of a non-zero subnormal |x|; w: low part of |x| + */ +static int +ilogb_subnormal(unsigned v, unsigned w) { + int r = -1022 - 52; + + if (v) + r += 32; + else + v = w; + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +double +logb(double x) { + int *px = (int *) &x, k = px[HIWORD] & ~0x80000000; + + if (k < 0x00100000) { + if ((px[LOWORD] | k) == 0) + return (_SVID_libm_err(x, x, 45)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two52; + return ((double) (((px[HIWORD] & 0x7ff00000) >> 20) + - 1075)); +#else + return ((double) ilogb_subnormal(k, px[LOWORD])); +#endif + } else + return (-1022.0); + } else if (k < 0x7ff00000) + return ((double) ((k >> 20) - 1023)); + else + return (x * x); +} diff --git a/usr/src/libm/src/C/matherr.c b/usr/src/libm/src/C/matherr.c new file mode 100644 index 0000000..29fa363 --- /dev/null +++ b/usr/src/libm/src/C/matherr.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)matherr.c 1.13 06/01/31 SMI" + +#pragma weak matherr = __matherr + +#include "libm.h" + +/* ARGSUSED0 */ +int +__matherr(struct exception *x) { + return 0; +} diff --git a/usr/src/libm/src/C/nextafter.c b/usr/src/libm/src/C/nextafter.c new file mode 100644 index 0000000..1540f25 --- /dev/null +++ b/usr/src/libm/src/C/nextafter.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nextafter.c 1.21 06/01/23 SMI" + +#pragma weak nextafter = __nextafter +#pragma weak _nextafter = __nextafter + +#include "libm.h" +#include /* DBL_MIN */ + +double +nextafter(double x, double y) { + int hx, hy, k; + double ans; + unsigned lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + hy = ((int *)&y)[HIWORD]; + k = (hx & ~0x80000000) | lx; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + if (k == 0) { /* x = 0 */ + k = hy & 0x80000000; + ((int *)&ans)[HIWORD] = k; + ((int *)&ans)[LOWORD] = 1; + } else if (hx >= 0) { + if (x > y) { + ((int *)&ans)[LOWORD] = lx - 1; + k = (lx == 0)? hx - 1 : hx; + ((int *)&ans)[HIWORD] = k; + } else { + ((int *)&ans)[LOWORD] = lx + 1; + k = (lx == 0xffffffff)? hx + 1 : hx; + ((int *)&ans)[HIWORD] = k; + } + } else { + if (x < y) { + ((int *)&ans)[LOWORD] = lx - 1; + k = (lx == 0)? hx - 1 : hx; + ((int *)&ans)[HIWORD] = k; + } else { + ((int *)&ans)[LOWORD] = lx + 1; + k = (lx == 0xffffffff)? hx + 1 : hx; + ((int *)&ans)[HIWORD] = k; + } + } + k = (k >> 20) & 0x7ff; + if (k == 0x7ff) { + /* overflow */ + return (_SVID_libm_err(x, y, 46)); +#if !defined(__lint) + } else if (k == 0) { + /* underflow */ + volatile double dummy = DBL_MIN * copysign(DBL_MIN, x); +#endif + } + return (ans); +} diff --git a/usr/src/libm/src/C/pow.c b/usr/src/libm/src/C/pow.c new file mode 100644 index 0000000..208a741 --- /dev/null +++ b/usr/src/libm/src/C/pow.c @@ -0,0 +1,342 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)pow.c 1.44 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak pow = __pow +#endif + +/* + * pow(x,y) return x**y + * n + * Method: Let x = 2 * (1+f) + * 1. Compute and return log2(x) in two pieces: + * log2(x) = w1 + w2, + * where w1 has 24 bits trailing zero. + * 2. Perform y*log2(x) by simulating muti-precision arithmetic + * 3. Return x**y = exp2(y*log(x)) + * + * Special cases: + * 1. (anything) ** +-0 is 1 + * 1'. 1 ** (anything) is 1 (C99; 1 ** +-INF/NAN used to be NAN) + * 2. (anything) ** 1 is itself + * 3. (anything except 1) ** NAN is NAN ("except 1" is C99) + * 4. NAN ** (anything except 0) is NAN + * 5. +-(|x| > 1) ** +INF is +INF + * 6. +-(|x| > 1) ** -INF is +0 + * 7. +-(|x| < 1) ** +INF is +0 + * 8. +-(|x| < 1) ** -INF is +INF + * 9. -1 ** +-INF is 1 (C99; -1 ** +-INF used to be NAN) + * 10. +0 ** (+anything except 0, NAN) is +0 + * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 + * 12. +0 ** (-anything except 0, NAN) is +INF + * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF + * 14. -0 ** (odd integer) = -( +0 ** (odd integer) ) + * 15. +INF ** (+anything except 0,NAN) is +INF + * 16. +INF ** (-anything except 0,NAN) is +0 + * 17. -INF ** (anything) = -0 ** (-anything) + * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) + * 19. (-anything except 0 and inf) ** (non-integer) is NAN + * + * Accuracy: + * pow(x,y) returns x**y nearly rounded. In particular + * pow(integer,integer) + * always returns the correct integer provided it is representable. + */ + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_pow _C99SUSv3_pow_treats_Inf_as_an_even_int + +static const double zero = 0.0, one = 1.0, two = 2.0; + +extern const double _TBL_log2_hi[], _TBL_log2_lo[]; +static const double + two53 = 9007199254740992.0, + A1_hi = 2.8853900432586669921875, + A1_lo = 3.8519259825035041963606002e-8, + A1 = 2.885390081777926817222541963606002026086e+0000, + A2 = 9.617966939207270828380543979852286255862e-0001, + A3 = 5.770807680887875964868853124873696201995e-0001, + B0_hi = 2.8853900432586669921875, + B0_lo = 3.8519259822532793056374320585e-8, + B0 = 2.885390081777926814720293056374320585689e+0000, + B1 = 9.617966939259755138949202350396200257632e-0001, + B2 = 5.770780163585687000782112776448797953382e-0001, + B3 = 4.121985488948771523290174512461778354953e-0001, + B4 = 3.207590534812432970433641789022666850193e-0001; + +static double +log2_x(double x, double *w) { + double f, s, z, qn, h, t; + int *px = (int *) &x; + int *pz = (int *) &z; + int i, j, ix, n; + + n = 0; + ix = px[HIWORD]; + if (ix >= 0x3fef03f1 && ix < 0x3ff08208) { /* 65/63 > x > 63/65 */ + double f1, v; + f = x - one; + if (((ix - 0x3ff00000) | px[LOWORD]) == 0) { + *w = zero; + return (zero); /* log2(1)= +0 */ + } + qn = one / (two + f); + s = f * qn; /* |s|<2**-6 */ + v = s * s; + h = (double) ((float) s); + f1 = (double) ((float) f); + t = qn * (((f - two * h) - h * f1) - h * (f - f1)); + /* s = h+t */ + f1 = h * B0_lo + s * (v * (B1 + v * (B2 + v * (B3 + v * B4)))); + t = f1 + t * B0; + h *= B0_hi; + s = (double) ((float) (h + t)); + *w = t - (s - h); + return (s); + } + if (ix < 0x00100000) { /* subnormal x */ + x *= two53; + n = -53; + ix = px[HIWORD]; + } + /* LARGE N */ + n += ((ix + 0x1000) >> 20) - 0x3ff; + ix = (ix & 0x000fffff) | 0x3ff00000; /* scale x to [1,2] */ + px[HIWORD] = ix; + i = ix + 0x1000; + pz[HIWORD] = i & 0xffffe000; + pz[LOWORD] = 0; + qn = one / (x + z); + f = x - z; + s = f * qn; + h = (double) ((float) s); + t = qn * ((f - (h + h) * z) - h * f); + j = (i >> 13) & 0x7f; + f = s * s; + t = t * A1 + h * A1_lo; + t += (s * f) * (A2 + f * A3); + qn = h * A1_hi; + s = n + _TBL_log2_hi[j]; + h = qn + s; + t += _TBL_log2_lo[j] - ((h - s) - qn); + f = (double) ((float) (h + t)); + *w = t - (f - h); + return (f); +} + +extern const double _TBL_exp2_hi[], _TBL_exp2_lo[]; +static const double /* poly app of 2^x-1 on [-1e-10,2^-7+1e-10] */ + E1 = 6.931471805599453100674958533810346197328e-0001, + E2 = 2.402265069587779347846769151717493815979e-0001, + E3 = 5.550410866475410512631124892773937864699e-0002, + E4 = 9.618143209991026824853712740162451423355e-0003, + E5 = 1.333357676549940345096774122231849082991e-0003; + +double +pow(double x, double y) { + double z, ax; + double y1, y2, w1, w2; + int sbx, sby, j, k, yisint; + int hx, hy, ahx, ahy; + unsigned lx, ly; + int *pz = (int *) &z; + + hx = ((int *) &x)[HIWORD]; + lx = ((unsigned *) &x)[LOWORD]; + hy = ((int *) &y)[HIWORD]; + ly = ((unsigned *) &y)[LOWORD]; + ahx = hx & ~0x80000000; + ahy = hy & ~0x80000000; + if ((ahy | ly) == 0) { /* y==zero */ + if ((ahx | lx) == 0) + z = _SVID_libm_err(x, y, 20); /* +-0**+-0 */ + else if ((ahx | (((lx | -lx) >> 31) & 1)) > 0x7ff00000) + z = _SVID_libm_err(x, y, 42); /* NaN**+-0 */ + else + z = one; /* x**+-0 = 1 */ + return (z); + } else if (hx == 0x3ff00000 && lx == 0 && + (__xpg6 & _C99SUSv3_pow) != 0) + return (one); /* C99: 1**anything = 1 */ + else if (ahx > 0x7ff00000 || (ahx == 0x7ff00000 && lx != 0) || + ahy > 0x7ff00000 || (ahy == 0x7ff00000 && ly != 0)) + return (x * y); /* +-NaN return x*y; + -> * for Cheetah */ + /* includes Sun: 1**NaN = NaN */ + sbx = (unsigned) hx >> 31; + sby = (unsigned) hy >> 31; + ax = fabs(x); + + /* + * determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if (sbx) { + if (ahy >= 0x43400000) + yisint = 2; /* even integer y */ + else if (ahy >= 0x3ff00000) { + k = (ahy >> 20) - 0x3ff; /* exponent */ + if (k > 20) { + j = ly >> (52 - k); + if ((j << (52 - k)) == ly) + yisint = 2 - (j & 1); + } else if (ly == 0) { + j = ahy >> (20 - k); + if ((j << (20 - k)) == ahy) + yisint = 2 - (j & 1); + } + } + } + /* special value of y */ + if (ly == 0) { + if (ahy == 0x7ff00000) { /* y is +-inf */ + if (((ahx - 0x3ff00000) | lx) == 0) { + if ((__xpg6 & _C99SUSv3_pow) != 0) + return (one); + /* C99: (-1)**+-inf = 1 */ + else + return (y - y); + /* Sun: (+-1)**+-inf = NaN */ + } else if (ahx >= 0x3ff00000) + /* (|x|>1)**+,-inf = inf,0 */ + return (sby == 0 ? y : zero); + else /* (|x|<1)**-,+inf = inf,0 */ + return (sby != 0 ? -y : zero); + } + if (ahy == 0x3ff00000) { /* y is +-1 */ + if (sby != 0) { /* y is -1 */ + if (x == zero) /* divided by zero */ + return (_SVID_libm_err(x, y, 23)); + else if (ahx < 0x40000 || ((ahx - 0x40000) | + lx) == 0) /* overflow */ + return (_SVID_libm_err(x, y, 21)); + else + return (one / x); + } else + return (x); + } + if (hy == 0x40000000) { /* y is 2 */ + if (ahx >= 0x5ff00000 && ahx < 0x7ff00000) + return (_SVID_libm_err(x, y, 21)); + /* x*x overflow */ + else if (ahx < 0x1e56a09e && (ahx | lx) != 0 || + ahx == 0x1e56a09e && lx < 0x667f3bcd) + return (_SVID_libm_err(x, y, 22)); + /* x*x underflow */ + else + return (x * x); + } + if (hy == 0x3fe00000) { + if (!((ahx | lx) == 0 || ((ahx - 0x7ff00000) | lx) == + 0 || sbx == 1)) + return (sqrt(x)); /* y is 0.5 and x > 0 */ + } + } + /* special value of x */ + if (lx == 0) { + if (ahx == 0x7ff00000 || ahx == 0 || ahx == 0x3ff00000) { + /* x is +-0,+-inf,-1 */ + z = ax; + if (sby == 1) { + z = one / z; /* z = |x|**y */ + if (ahx == 0) + return (_SVID_libm_err(x, y, 23)); + } + if (sbx == 1) { + if (ahx == 0x3ff00000 && yisint == 0) + z = _SVID_libm_err(x, y, 24); + /* neg**non-integral is NaN + invalid */ + else if (yisint == 1) + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + return (z); + } + } + /* (x<0)**(non-int) is NaN */ + if (sbx == 1 && yisint == 0) + return (_SVID_libm_err(x, y, 24)); + /* Now ax is finite, y is finite */ + /* first compute log2(ax) = w1+w2, with 24 bits w1 */ + w1 = log2_x(ax, &w2); + + /* split up y into y1+y2 and compute (y1+y2)*(w1+w2) */ + if (((ly & 0x07ffffff) == 0) || ahy >= 0x47e00000 || + ahy <= 0x38100000) { + /* no need to split if y is short or too large or too small */ + y1 = y * w1; + y2 = y * w2; + } else { + y1 = (double) ((float) y); + y2 = (y - y1) * w1 + y * w2; + y1 *= w1; + } + z = y1 + y2; + j = pz[HIWORD]; + if (j >= 0x40900000) { /* z >= 1024 */ + if (!(j == 0x40900000 && pz[LOWORD] == 0)) /* z > 1024 */ + return (_SVID_libm_err(x, y, 21)); /* overflow */ + else { + w2 = y1 - z; + w2 += y2; + /* rounded to inf */ + if (w2 >= -8.008566259537296567160e-17) + return (_SVID_libm_err(x, y, 21)); + /* overflow */ + } + } else if ((j & ~0x80000000) >= 0x4090cc00) { /* z <= -1075 */ + if (!(j == 0xc090cc00 && pz[LOWORD] == 0)) /* z < -1075 */ + return (_SVID_libm_err(x, y, 22)); /* underflow */ + else { + w2 = y1 - z; + w2 += y2; + if (w2 <= zero) /* underflow */ + return (_SVID_libm_err(x, y, 22)); + } + } + /* + * compute 2**(k+f[j]+g) + */ + k = (int) (z * 64.0 + (((hy ^ (ahx - 0x3ff00000)) > 0) ? 0.5 : -0.5)); + j = k & 63; + w1 = y2 - ((double) k * 0.015625 - y1); + w2 = _TBL_exp2_hi[j]; + z = _TBL_exp2_lo[j] + (w2 * w1) * (E1 + w1 * (E2 + w1 * (E3 + w1 * + (E4 + w1 * E5)))); + z += w2; + k >>= 6; + if (k < -1021) + z = scalbn(z, k); + else /* subnormal output */ + pz[HIWORD] += k << 20; + if (sbx == 1 && yisint == 1) + z = -z; /* (-ve)**(odd int) */ + return (z); +} diff --git a/usr/src/libm/src/C/remainder.c b/usr/src/libm/src/C/remainder.c new file mode 100644 index 0000000..72a15c8 --- /dev/null +++ b/usr/src/libm/src/C/remainder.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remainder.c 1.24 06/01/25 SMI" + +#pragma weak remainder = __remainder + +/* + * remainder(x,p) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Return : + * returns x REM p = x - [x/p]*p as if in infinite precise arithmetic, + * where [x/p] is the (inifinite bit) integer nearest x/p (in half way + * case choose the even one). + * Method : + * Based on fmod() return x-[x/p]chopped*p exactly. + */ + +#include "libm.h" + +static const double zero = 0.0, half = 0.5; + +double +remainder(double x, double p) { + double halfp; + int ix, hx, hp; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + hp = ((int *)&p)[HIWORD] & ~0x80000000; + + if (hp > 0x7ff00000 || (hp == 0x7ff00000 && ((int *)&p)[LOWORD] != 0)) + return (x * p); + if (hx > 0x7ff00000 || (hx == 0x7ff00000 && ((int *)&x)[LOWORD] != 0)) + return (x * p); + + if ((hp | ((int *)&p)[LOWORD]) == 0 || hx == 0x7ff00000) + return (_SVID_libm_err(x, p, 28)); + + p = fabs(p); + if (hp < 0x7fe00000) + x = fmod(x, p + p); + x = fabs(x); + if (hp < 0x00200000) { + if (x + x > p) { + if (x == p) /* avoid x-x=-0 in RM mode */ + return ((ix < 0)? -zero : zero); + x -= p; + if (x + x >= p) + x -= p; + } + } else { + halfp = half * p; + if (x > halfp) { + if (x == p) /* avoid x-x=-0 in RM mode */ + return ((ix < 0)? -zero : zero); + x -= p; + if (x >= halfp) + x -= p; + } + } + return ((ix < 0)? -x : x); +} diff --git a/usr/src/libm/src/C/rint.c b/usr/src/libm/src/C/rint.c new file mode 100644 index 0000000..c600c2b --- /dev/null +++ b/usr/src/libm/src/C/rint.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)rint.c 1.17 06/01/23 SMI" + +#pragma weak rint = __rint + +/* + * rint(x) return x rounded to integral according to the rounding direction + * rint(x) returns result with the same sign as x's, including 0.0. + */ + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) && (!defined(__FLT_EVAL_METHOD__) || \ + __FLT_EVAL_METHOD__ != 0) +extern enum fp_precision_type __swapRP(enum fp_precision_type); +#define DECLRP(x) enum fp_precision_type x; +#define SWAPRP(new, x) x = __swapRP(new); +#define RESTRP(x) (void) __swapRP(x); +#else +#define DECLRP(x) +#define SWAPRP(new, x) +#define RESTRP(x) +#endif + +static const double + two52 = 4503599627370496.0, + zero = 0.0, + one = 1.0; + +double +rint(double x) { + DECLRP(rp) + double t, w; + int ix, hx; + + ix = ((int *)&x)[HIWORD]; + hx = ix & ~0x80000000; + + if (hx >= 0x43300000) + return (x * one); + t = (ix < 0)? -two52 : two52; + SWAPRP(fp_double, rp) /* set precision mode to double */ + w = x + t; /* x+sign(x)*2**52 rounded */ + RESTRP(rp) /* restore precision mode */ + if (w == t) + return ((ix < 0)? -zero : zero); + return (w - t); +} diff --git a/usr/src/libm/src/C/scalb.c b/usr/src/libm/src/C/scalb.c new file mode 100644 index 0000000..8240c76 --- /dev/null +++ b/usr/src/libm/src/C/scalb.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalb.c 1.20 06/01/23 SMI" + +#pragma weak scalb = __scalb +#pragma weak _scalb = __scalb + +#include "libm.h" + +double +scalb(double x, double fn) { + int hn, in, n; + double z; + + if (isnan(x) || isnan(fn)) + return (x * fn); + + in = ((int *)&fn)[HIWORD]; + hn = in & ~0x80000000; + if (hn == 0x7ff00000) /* fn is inf */ + return (_SVID_libm_err(x, fn, 47)); + + /* see if fn is an integer without raising inexact */ + if (hn >= 0x43300000) { + /* |fn| >= 2^52, so it must be an integer */ + n = (in < 0)? -65000 : 65000; + } else if (hn < 0x3ff00000) { + /* |fn| < 1, so it must be zero or non-integer */ + return ((fn == 0.0)? x : (x - x) / (x - x)); + } else if (hn < 0x41400000) { + /* |fn| < 2^21 */ + if ((hn & ((1 << (0x413 - (hn >> 20))) - 1)) + | ((int *)&fn)[LOWORD]) + return ((x - x) / (x - x)); + n = (int)fn; + } else { + if (((int *)&fn)[LOWORD] & ((1 << (0x433 - (hn >> 20))) - 1)) + return ((x - x) / (x - x)); + n = (in < 0)? -65000 : 65000; + } + z = scalbn(x, n); + if (z != x) { + if (z == 0.0) + return (_SVID_libm_err(x, fn, 33)); + if (!finite(z)) + return (_SVID_libm_err(x, fn, 32)); + } + return (z); +} diff --git a/usr/src/libm/src/C/scalbn.c b/usr/src/libm/src/C/scalbn.c new file mode 100644 index 0000000..23a30a6 --- /dev/null +++ b/usr/src/libm/src/C/scalbn.c @@ -0,0 +1,119 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbn.c 1.10 06/01/26 SMI" + +#pragma weak scalbn = __scalbn + +#include "libm.h" + +static const double + one = 1.0, + huge = 1.0e300, + tiny = 1.0e-300, + twom54 = 5.5511151231257827021181583404541015625e-17; + +#if defined(USE_FPSCALE) || defined(__i386) +static const double two52 = 4503599627370496.0; +#else +/* + * Normalize non-zero subnormal x and return biased exponent of x in [-51,0] + */ +static int +ilogb_biased(unsigned *px) { + int s = 52; + unsigned v = px[HIWORD] & ~0x80000000, w = px[LOWORD], t = v; + + if (t) + s -= 32; + else + t = w; + if (t & 0xffff0000) + s -= 16, t >>= 16; + if (t & 0xff00) + s -= 8, t >>= 8; + if (t & 0xf0) + s -= 4, t >>= 4; + t <<= 1; + s -= (0xffffaa50 >> t) & 0x3; + if (s < 32) { + v = (v << s) | w >> (32 - s); + w <<= s; + } else { + v = w << (s - 32); + w = 0; + } + px[HIWORD] = (px[HIWORD] & 0x80000000) | v; + px[LOWORD] = w; + return (1 - s); +} +#endif /* defined(USE_FPSCALE) */ + +double +scalbn(double x, int n) { + int *px, ix, hx, k; + + px = (int *)&x; + ix = px[HIWORD]; + hx = ix & ~0x80000000; + k = hx >> 20; + + if (k == 0x7ff) /* x is inf or NaN */ + return (x * one); + + if (k == 0) { + if ((hx | px[LOWORD]) == 0 || n == 0) + return (x); +#if defined(USE_FPSCALE) || defined(__i386) + x *= two52; + ix = px[HIWORD]; + k = ((ix & ~0x80000000) >> 20) - 52; +#else + k = ilogb_biased((unsigned *)px); + ix = px[HIWORD]; +#endif + /* now k is in the range -51..0 */ + k += n; + if (k > n) /* integer overflow occurred */ + k = -100; + } else { + /* k is in the range 1..1023 */ + k += n; + if (k < n) /* integer overflow occurred */ + k = 0x7ff; + } + + if (k > 0x7fe) + return (huge * ((ix < 0)? -huge : huge)); + if (k < 1) { + if (k <= -54) + return (tiny * ((ix < 0)? -tiny : tiny)); + k += 54; + px[HIWORD] = (ix & ~0x7ff00000) | (k << 20); + return (x * twom54); + } + px[HIWORD] = (ix & ~0x7ff00000) | (k << 20); + return (x); +} diff --git a/usr/src/libm/src/C/signgam.c b/usr/src/libm/src/C/signgam.c new file mode 100644 index 0000000..06e0711 --- /dev/null +++ b/usr/src/libm/src/C/signgam.c @@ -0,0 +1,34 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)signgam.c 1.8 06/01/31 SMI" + +#pragma weak signgam = __signgam + +#include "libm_synonyms.h" +#include + +int signgam = 0; diff --git a/usr/src/libm/src/C/significand.c b/usr/src/libm/src/C/significand.c new file mode 100644 index 0000000..cc82693 --- /dev/null +++ b/usr/src/libm/src/C/significand.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)significand.c 1.12 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak significand = __significand +#endif + +#include "libm.h" + +double +significand(double x) { + int ix = ((int *) &x)[HIWORD] & ~0x80000000; + + /* weed out 0/+-Inf/NaN because C99 ilogb raises invalid on them */ + if ((ix | ((int *) &x)[LOWORD]) == 0 || ix >= 0x7ff00000) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ((ix & 0x80000) != 0 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + else + return (scalbn(x, -ilogb(x))); +} diff --git a/usr/src/libm/src/C/sin.c b/usr/src/libm/src/C/sin.c new file mode 100644 index 0000000..c153e22 --- /dev/null +++ b/usr/src/libm/src/C/sin.c @@ -0,0 +1,188 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sin.c 1.12 06/01/23 SMI" + +#pragma weak sin = __sin + +/* INDENT OFF */ +/* + * sin(x) + * Accurate Table look-up algorithm by K.C. Ng, May, 1995. + * + * Algorithm: see sincos.c + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* PI_H = */ 3.1415926535897931159979634685, +/* PI_L = */ 1.22464679914735317722606593227425e-16, +/* PI_L0 = */ 1.22464679914558443311283879205095e-16, +/* PI_L1 = */ 1.768744113227140223300005233735517376e-28, +/* PI2_H = */ 6.2831853071795862319959269370, +/* PI2_L = */ 2.44929359829470635445213186454850e-16, +/* PI2_L0 = */ 2.44929359829116886622567758410190e-16, +/* PI2_L1 = */ 3.537488226454280446600010467471034752e-28, +}; +/* INDENT ON */ + +#define ONEA sc +#define ONE sc[0] +#define NONE sc[1] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define PI_H sc[10] +#define PI_L sc[11] +#define PI_L0 sc[12] +#define PI_L1 sc[13] +#define PI2_H sc[14] +#define PI2_L sc[15] +#define PI2_L0 sc[16] +#define PI2_L1 sc[17] + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +double +sin(double x) { + double z, y[2], w, s, v, p, q; + int i, j, n, hx, ix, lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < .1640625 */ + if (ix < 0x3e400000) /* |x| < 2**-27 */ + if ((int)x == 0) + return (x); + z = x * x; + if (ix < 0x3f800000) /* |x| < 2**-8 */ + w = (z * x) * (PP1 + z * PP2); + else + w = (x * z) * ((P1 + z * P2) + (z * z) * (P3 + z * P4)); + return (x + w); + } + + /* for .1640625 < x < M, */ + n = ix >> 20; + if (n < 0x402) { /* x < 8 */ + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + x = fabs(x); + v = x - _TBL_sincosx[j]; + if (((j - 181) ^ (j - 201)) < 0) { + /* near pi, sin(x) = sin(pi-x) */ + p = PI_H - x; + i = ix - 0x400921fb; + x = p + PI_L; + if ((i | ((lx - 0x54442D00) & 0xffffff00)) == 0) { + /* very close to pi */ + x = p + PI_L0; + return ((hx >= 0)? x + PI_L1 : -(x + PI_L1)); + } + z = x * x; + if (((ix - 0x40092000) >> 11) == 0) { + /* |pi-x|<2**-8 */ + w = PI_L + (z * x) * (PP1 + z * PP2); + } else { + w = PI_L + (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)); + } + return ((hx >= 0)? p + w : -p - w); + } + s = v * v; + if (((j - 382) ^ (j - 402)) < 0) { + /* near 2pi, sin(x) = sin(x-2pi) */ + p = x - PI2_H; + i = ix - 0x401921fb; + x = p - PI2_L; + if ((i | ((lx - 0x54442D00) & 0xffffff00)) == 0) { + /* very close to 2pi */ + x = p - PI2_L0; + return ((hx >= 0)? x - PI2_L1 : -(x - PI2_L1)); + } + z = x * x; + if (((ix - 0x40192000) >> 10) == 0) { + /* |x-2pi|<2**-8 */ + w = (z * x) * (PP1 + z * PP2) - PI2_L; + } else { + w = (z * x) * ((P1 + z * P2) + + (z * z) * (P3 + z * P4)) - PI2_L; + } + return ((hx >= 0)? p + w : -p - w); + } + j <<= 1; + w = _TBL_sincos[j+1]; + z = _TBL_sincos[j]; + p = v + (v * s) * (PP1 + s * PP2); + q = s * (QQ1 + s * QQ2); + v = w * p + z * q; + return ((hx >= 0)? z + v : -z - v); + } + + if (ix >= 0x7ff00000) /* sin(Inf or NaN) is NaN */ + return (x / x); + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n & 3) { + case 0: + return (__k_sin(y[0], y[1])); + case 1: + return (__k_cos(y[0], y[1])); + case 2: + return (-__k_sin(y[0], y[1])); + default: + return (-__k_cos(y[0], y[1])); + } +} diff --git a/usr/src/libm/src/C/sincos.c b/usr/src/libm/src/C/sincos.c new file mode 100644 index 0000000..0143a54 --- /dev/null +++ b/usr/src/libm/src/C/sincos.c @@ -0,0 +1,367 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincos.c 1.13 06/01/23 SMI" + +#pragma weak sincos = __sincos + +/* INDENT OFF */ +/* + * sincos(x,s,c) + * Accurate Table look-up algorithm by K.C. Ng, 2000. + * + * 1. Reduce x to x>0 by cos(-x)=cos(x), sin(-x)=-sin(x). + * 2. For 0<= x < 8, let i = (64*x chopped)-10. Let d = x - a[i], where + * a[i] is a double that is close to (i+10.5)/64 (and hence |d|< 10.5/64) + * and such that sin(a[i]) and cos(a[i]) is close to a double (with error + * less than 2**-8 ulp). Then + * + * cos(x) = cos(a[i]+d) = cos(a[i])cos(d) - sin(a[i])*sin(d) + * = TBL_cos_a[i]*(1+QQ1*d^2+QQ2*d^4) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_cos_a[i] + (TBL_cos_a[i]*d^2*(QQ1+QQ2*d^2) - + * TBL_sin_a[i]*(d+PP1*d^3+PP2*d^5)) + * + * sin(x) = sin(a[i]+d) = sin(a[i])cos(d) + cos(a[i])*sin(d) + * = TBL_sin_a[i]*(1+QQ1*d^2+QQ2*d^4) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5) + * = TBL_sin_a[i] + (TBL_sin_a[i]*d^2*(QQ1+QQ2*d^2) + + * TBL_cos_a[i]*(d+PP1*d^3+PP2*d^5)) + * + * Note: for x close to n*pi/2, special treatment is need for either + * sin or cos: + * i in [81, 100] ( pi/2 +-10.5/64 => tiny cos(x) = sin(pi/2-x) + * i in [181,200] ( pi +-10.5/64 => tiny sin(x) = sin(pi-x) + * i in [282,301] ( 3pi/2+-10.5/64 => tiny cos(x) = sin(x-3pi/2) + * i in [382,401] ( 2pi +-10.5/64 => tiny sin(x) = sin(x-2pi) + * i in [483,502] ( 5pi/2+-10.5/64 => tiny cos(x) = sin(5pi/2-x) + * + * 3. For x >= 8.0, use kernel function __rem_pio2 to perform argument + * reduction and call __k_sincos_ to compute sin and cos. + * + * kernel function: + * __rem_pio2 ... argument reduction routine + * __k_sincos_ ... sine and cosine function on [-pi/4,pi/4] + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * TRIG(x) returns trig(x) nearly rounded (less than 1 ulp) + */ + +#include "libm.h" + +static const double sc[] = { +/* ONE = */ 1.0, +/* NONE = */ -1.0, +/* + * |sin(x) - (x+pp1*x^3+pp2*x^5)| <= 2^-58.79 for |x| < 0.008 + */ +/* PP1 = */ -0.166666666666316558867252052378889521480627858683055567, +/* PP2 = */ .008333315652997472323564894248466758248475374977974017927, +/* + * |(sin(x) - (x+p1*x^3+...+p4*x^9)| + * |------------------------------ | <= 2^-57.63 for |x| < 0.1953125 + * | x | + */ +/* P1 = */ -1.666666666666629669805215138920301589656e-0001, +/* P2 = */ 8.333333332390951295683993455280336376663e-0003, +/* P3 = */ -1.984126237997976692791551778230098403960e-0004, +/* P4 = */ 2.753403624854277237649987622848330351110e-0006, +/* + * |cos(x) - (1+qq1*x^2+qq2*x^4)| <= 2^-55.99 for |x| <= 0.008 (0x3f80624d) + */ +/* QQ1 = */ -0.4999999999975492381842911981948418542742729, +/* QQ2 = */ 0.041666542904352059294545209158357640398771740, +/* Q1 = */ -0.5, +/* Q2 = */ 4.166666666500350703680945520860748617445e-0002, +/* Q3 = */ -1.388888596436972210694266290577848696006e-0003, +/* Q4 = */ 2.478563078858589473679519517892953492192e-0005, +/* PIO2_H = */ 1.570796326794896557999, +/* PIO2_L = */ 6.123233995736765886130e-17, +/* PIO2_L0 = */ 6.123233995727922165564e-17, +/* PIO2_L1 = */ 8.843720566135701120255e-29, +/* PI_H = */ 3.1415926535897931159979634685, +/* PI_L = */ 1.22464679914735317722606593227425e-16, +/* PI_L0 = */ 1.22464679914558443311283879205095e-16, +/* PI_L1 = */ 1.768744113227140223300005233735517376e-28, +/* PI3O2_H = */ 4.712388980384689673997, +/* PI3O2_L = */ 1.836970198721029765839e-16, +/* PI3O2_L0 = */ 1.836970198720396133587e-16, +/* PI3O2_L1 = */ 6.336322524749201142226e-29, +/* PI2_H = */ 6.2831853071795862319959269370, +/* PI2_L = */ 2.44929359829470635445213186454850e-16, +/* PI2_L0 = */ 2.44929359829116886622567758410190e-16, +/* PI2_L1 = */ 3.537488226454280446600010467471034752e-28, +/* PI5O2_H = */ 7.853981633974482789995, +/* PI5O2_L = */ 3.061616997868382943065e-16, +/* PI5O2_L0 = */ 3.061616997861941598865e-16, +/* PI5O2_L1 = */ 6.441344200433640781982e-28, +}; +/* INDENT ON */ + +#define ONE sc[0] +#define PP1 sc[2] +#define PP2 sc[3] +#define P1 sc[4] +#define P2 sc[5] +#define P3 sc[6] +#define P4 sc[7] +#define QQ1 sc[8] +#define QQ2 sc[9] +#define Q1 sc[10] +#define Q2 sc[11] +#define Q3 sc[12] +#define Q4 sc[13] +#define PIO2_H sc[14] +#define PIO2_L sc[15] +#define PIO2_L0 sc[16] +#define PIO2_L1 sc[17] +#define PI_H sc[18] +#define PI_L sc[19] +#define PI_L0 sc[20] +#define PI_L1 sc[21] +#define PI3O2_H sc[22] +#define PI3O2_L sc[23] +#define PI3O2_L0 sc[24] +#define PI3O2_L1 sc[25] +#define PI2_H sc[26] +#define PI2_L sc[27] +#define PI2_L0 sc[28] +#define PI2_L1 sc[29] +#define PI5O2_H sc[30] +#define PI5O2_L sc[31] +#define PI5O2_L0 sc[32] +#define PI5O2_L1 sc[33] +#define PoS(x, z) ((x * z) * (PP1 + z * PP2)) +#define PoL(x, z) ((x * z) * ((P1 + z * P2) + (z * z) * (P3 + z * P4))) + +extern const double _TBL_sincos[], _TBL_sincosx[]; + +void +sincos(double x, double *s, double *c) { + double z, y[2], w, t, v, p, q; + int i, j, n, hx, ix, lx; + + hx = ((int *)&x)[HIWORD]; + lx = ((int *)&x)[LOWORD]; + ix = hx & ~0x80000000; + + if (ix <= 0x3fc50000) { /* |x| < 10.5/64 = 0.164062500 */ + if (ix < 0x3e400000) { /* |x| < 2**-27 */ + if ((int)x == 0) + *c = ONE; + *s = x; + } else { + z = x * x; + if (ix < 0x3f800000) { /* |x| < 0.008 */ + q = z * (QQ1 + z * QQ2); + p = PoS(x, z); + } else { + q = z * ((Q1 + z * Q2) + (z * z) * + (Q3 + z * Q4)); + p = PoL(x, z); + } + *c = ONE + q; + *s = x + p; + } + return; + } + + n = ix >> 20; + i = (((ix >> 12) & 0xff) | 0x100) >> (0x401 - n); + j = i - 10; + if (n < 0x402) { /* |x| < 8 */ + x = fabs(x); + v = x - _TBL_sincosx[j]; + t = v * v; + w = _TBL_sincos[(j<<1)]; + z = _TBL_sincos[(j<<1)+1]; + p = v + PoS(v, t); + q = t * (QQ1 + t * QQ2); + if ((((j - 81) ^ (j - 101)) | + ((j - 282) ^ (j - 302)) | + ((j - 483) ^ (j - 503)) | + ((j - 181) ^ (j - 201)) | + ((j - 382) ^ (j - 402))) < 0) { + if (j <= 101) { + /* near pi/2, cos(x) = sin(pi/2-x) */ + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + p = PIO2_H - x; + i = ix - 0x3ff921fb; + x = p + PIO2_L; + if ((i | ((lx - 0x54442D00) & + 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PIO2_L0; + *c = x + PIO2_L1; + } else { + z = x * x; + if (((ix - 0x3ff92000) >> 12) == 0) { + /* |pi/2-x|<2**-8 */ + w = PIO2_L + PoS(x, z); + } else { + w = PIO2_L + PoL(x, z); + } + *c = p + w; + } + } else if (j <= 201) { + /* near pi, sin(x) = sin(pi-x) */ + *c = z - (w * p - z * q); + p = PI_H - x; + i = ix - 0x400921fb; + x = p + PI_L; + if ((i | ((lx - 0x54442D00) & + 0xffffff00)) == 0) { + /* very close to pi */ + x = p + PI_L0; + *s = (hx >= 0)? x + PI_L1 : + -(x + PI_L1); + } else { + z = x * x; + if (((ix - 0x40092000) >> 11) == 0) { + /* |pi-x|<2**-8 */ + w = PI_L + PoS(x, z); + } else { + w = PI_L + PoL(x, z); + } + *s = (hx >= 0)? p + w : -p - w; + } + } else if (j <= 302) { + /* near 3/2pi, cos(x)=sin(x-3/2pi) */ + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + p = x - PI3O2_H; + i = ix - 0x4012D97C; + x = p - PI3O2_L; + if ((i | ((lx - 0x7f332100) & + 0xffffff00)) == 0) { + /* very close to 3/2pi */ + x = p - PI3O2_L0; + *c = x - PI3O2_L1; + } else { + z = x * x; + if (((ix - 0x4012D800) >> 9) == 0) { + /* |3/2pi-x|<2**-8 */ + w = PoS(x, z) - PI3O2_L; + } else { + w = PoL(x, z) - PI3O2_L; + } + *c = p + w; + } + } else if (j <= 402) { + /* near 2pi, sin(x)=sin(x-2pi) */ + *c = z - (w * p - z * q); + p = x - PI2_H; + i = ix - 0x401921fb; + x = p - PI2_L; + if ((i | ((lx - 0x54442D00) & + 0xffffff00)) == 0) { + /* very close to 2pi */ + x = p - PI2_L0; + *s = (hx >= 0)? x - PI2_L1 : + -(x - PI2_L1); + } else { + z = x * x; + if (((ix - 0x40192000) >> 10) == 0) { + /* |x-2pi|<2**-8 */ + w = PoS(x, z) - PI2_L; + } else { + w = PoL(x, z) - PI2_L; + } + *s = (hx >= 0)? p + w : -p - w; + } + } else { + /* near 5pi/2, cos(x) = sin(5pi/2-x) */ + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + p = PI5O2_H - x; + i = ix - 0x401F6A7A; + x = p + PI5O2_L; + if ((i | ((lx - 0x29553800) & + 0xffffff00)) == 0) { + /* very close to pi/2 */ + x = p + PI5O2_L0; + *c = x + PI5O2_L1; + } else { + z = x * x; + if (((ix - 0x401F6A7A) >> 7) == 0) { + /* |5pi/2-x|<2**-8 */ + w = PI5O2_L + PoS(x, z); + } else { + w = PI5O2_L + PoL(x, z); + } + *c = p + w; + } + } + } else { + *c = z - (w * p - z * q); + t = w * q + z * p; + *s = (hx >= 0)? w + t : -w - t; + } + return; + } + + if (ix >= 0x7ff00000) { + *s = *c = x / x; + return; + } + + /* argument reduction needed */ + n = __rem_pio2(x, y); + switch (n & 3) { + case 0: + *s = __k_sincos(y[0], y[1], c); + break; + case 1: + *c = -__k_sincos(y[0], y[1], s); + break; + case 2: + *s = -__k_sincos(y[0], y[1], c); + *c = -*c; + break; + default: + *c = __k_sincos(y[0], y[1], s); + *s = -*s; + } +} diff --git a/usr/src/libm/src/C/sincospi.c b/usr/src/libm/src/C/sincospi.c new file mode 100644 index 0000000..c3c19ca --- /dev/null +++ b/usr/src/libm/src/C/sincospi.c @@ -0,0 +1,197 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincospi.c 1.17 06/01/31 SMI" + +#pragma weak sincospi = __sincospi + +/* INDENT OFF */ +/* + * void sincospi(double x, double *s, double *c) + * *s = sin(pi*x); *c = cos(pi*x); + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y==z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include +#include + +static const double + pi = 3.14159265358979323846, /* 400921FB,54442D18 */ + sqrth_h = 0.70710678118654757273731092936941422522068023681640625, + sqrth_l = -4.8336466567264565185935844299127932213411660131004e-17; +/* INDENT ON */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif + +void +sincospi(double x, double *s, double *c) { + double y, z, t; + int n, ix, k; + int hx = ((int *) &x)[HIWORD]; + unsigned h, lx = ((unsigned *) &x)[LOWORD]; + + ix = hx & ~0x80000000; + n = (ix >> 20) - 0x3ff; + if (n >= 51) { /* |x| >= 2**51 */ + if (n >= 1024) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + *s = *c = ix >= 0x7ff80000 ? x : x - x; + /* assumes sparc-like QNaN */ +#else + *s = *c = x - x; +#endif + else { + if (n >= 53) { + *s = 0.0; + *c = 1.0; + } + else if (n == 52) { + if ((lx & 1) == 0) { + *s = 0.0; + *c = 1.0; + } + else { + *s = -0.0; + *c = -1.0; + } + } + else { /* n == 51 */ + if ((lx & 1) == 0) { + *s = 0.0; + *c = 1.0; + } + else { + *s = 1.0; + *c = 0.0; + } + if ((lx & 2) != 0) { + *s = -*s; + *c = -*c; + } + } + } + } + else if (n < -2) /* |x| < 0.25 */ + *s = __k_sincos(pi * fabs(x), 0.0, c); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + if (ix < 0x41C00000) { /* |x| < 2**29 */ + y = 4.0 * fabs(x); + n = (int) y; /* exact */ + z = (double) n; + k = z == y; + t = (y - z) * 0.25; + } + else { /* 2**29 <= |x| < 2**51 */ + y = fabs(x); + k = 50 - n; + n = lx >> k; + h = n << k; + ((unsigned *) &z)[LOWORD] = h; + ((int *) &z)[HIWORD] = ix; + k = h == lx; + t = y - z; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + *s = *c = sqrth_h + sqrth_l; + else + if ((n & 2) == 0) { + *s = 0.0; + *c = 1.0; + } + else { + *s = 1.0; + *c = 0.0; + } + y = (n & 2) == 0 ? 0.0 : 1.0; + if ((n & 4) != 0) + *s = -*s; + if (((n + 1) & 4) != 0) + *c = -*c; + } + else { + if ((n & 1) != 0) + t = 0.25 - t; + if (((n + (n & 1)) & 2) == 0) + *s = __k_sincos(pi * t, 0.0, c); + else + *c = __k_sincos(pi * t, 0.0, s); + if ((n & 4) != 0) + *s = -*s; + if (((n + 2) & 4) != 0) + *c = -*c; + } + } + if (hx < 0) + *s = -*s; +} diff --git a/usr/src/libm/src/C/sinh.c b/usr/src/libm/src/C/sinh.c new file mode 100644 index 0000000..24953a3 --- /dev/null +++ b/usr/src/libm/src/C/sinh.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinh.c 1.19 06/01/23 SMI" + +#pragma weak sinh = __sinh + +/* INDENT OFF */ +/* + * sinh(x) + * Code originated from 4.3bsd. + * Modified by K.C. Ng for SUN 4.0 libm. + * Method : + * 1. reduce x to non-negative by sinh(-x) = - sinh(x). + * 2. + * + * expm1(x) + expm1(x)/(expm1(x)+1) + * 0 <= x <= lnovft : sinh(x) := -------------------------------- + * 2 + * lnovft <= x < INF : sinh(x) := exp(x-1024*ln2)*2**1023 + * + * + * Special cases: + * sinh(x) is x if x is +INF, -INF, or NaN. + * only sinh(0)=0 is exact for finite argument. + * + */ +/* INDENT ON */ + +#include "libm.h" + +static const double + ln2hi = 6.93147180369123816490e-01, + ln2lo = 1.90821492927058770002e-10, + lnovft = 7.09782712893383973096e+02; + +double +sinh(double x) { + double ox, r, t; + + ox = x; + r = fabs(x); + if (!finite(x)) + return (x * r); + if (r < lnovft) { + t = expm1(r); + r = copysign((t + t / (1.0 + t)) * 0.5, x); + } else { + if (r < 1000.0) + x = copysign(exp((r - 1024 * ln2hi) - 1024 * ln2lo), x); + r = scalbn(x, 1023); + } + if (!finite(r)) + r = _SVID_libm_err(ox, ox, 25); + return (r); +} diff --git a/usr/src/libm/src/C/sqrt.c b/usr/src/libm/src/C/sqrt.c new file mode 100644 index 0000000..e16ee33 --- /dev/null +++ b/usr/src/libm/src/C/sqrt.c @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sqrt.c 1.20 06/01/23 SMI" + +#pragma weak sqrt = __sqrt + +#include "libm.h" + +#ifdef __INLINE + +extern double __inline_sqrt(double); + +double +sqrt(double x) { + double z = __inline_sqrt(x); + + if (isnan(x)) + return (z); + return ((x < 0.0)? _SVID_libm_err(x, x, 26) : z); +} + +#else /* defined(__INLINE) */ + +/* + * Warning: This correctly rounded sqrt is extremely slow because it computes + * the sqrt bit by bit using integer arithmetic. + */ + +static const double big = 1.0e30, small = 1.0e-30; + +double +sqrt(double x) +{ + double z; + unsigned r, t1, s1, ix1, q1; + int ix0, s0, j, q, m, n, t; + int *px = (int *)&x, *pz = (int *)&z; + + ix0 = px[HIWORD]; + ix1 = px[LOWORD]; + if ((ix0 & 0x7ff00000) == 0x7ff00000) { /* x is inf or NaN */ + if (ix0 == 0xfff00000 && ix1 == 0) + return (_SVID_libm_err(x, x, 26)); + return (x + x); + } + if (((ix0 & 0x7fffffff) | ix1) == 0) /* x is zero */ + return (x); + + /* extract exponent and significand */ + m = ilogb(x); + z = scalbn(x, -m); + ix0 = (pz[HIWORD] & 0x000fffff) | 0x00100000; + ix1 = pz[LOWORD]; + n = m >> 1; + if (n + n != m) { + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + m -= 1; + } + + /* generate sqrt(x) bit by bit */ + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + q = q1 = s0 = s1 = 0; + r = 0x00200000; + + for (j = 1; j <= 22; j++) { + t = s0 + r; + if (t <= ix0) { + s0 = t + r; + ix0 -= t; + q += r; + } + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + r >>= 1; + } + + r = 0x80000000; + for (j = 1; j <= 32; j++) { + t1 = s1 + r; + t = s0; + if (t < ix0 || (t == ix0 && t1 <= ix1)) { + s1 = t1 + r; + if ((t1 & 0x80000000) == 0x80000000 && + (s1 & 0x80000000) == 0) + s0 += 1; + ix0 -= t; + if (ix1 < t1) + ix0 -= 1; + ix1 -= t1; + q1 += r; + } + ix0 = (ix0 << 1) | (ix1 >> 31); + ix1 <<= 1; + r >>= 1; + } + + /* round */ + if ((ix0 | ix1) == 0) + goto done; + z = big - small; /* trigger inexact flag */ + if (z < big) + goto done; + if (q1 == 0xffffffff) { + q1 = 0; + q += 1; + goto done; + } + z = big + small; + if (z > big) { + if (q1 == 0xfffffffe) + q += 1; + q1 += 2; + goto done; + } + q1 += (q1 & 1); +done: + pz[HIWORD] = (q >> 1) + 0x3fe00000; + pz[LOWORD] = q1 >> 1; + if ((q & 1) == 1) + pz[LOWORD] |= 0x80000000; + return (scalbn(z, n)); +} + +#endif /* defined(__INLINE) */ diff --git a/usr/src/libm/src/C/tan.c b/usr/src/libm/src/C/tan.c new file mode 100644 index 0000000..568d473 --- /dev/null +++ b/usr/src/libm/src/C/tan.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tan.c 1.17 06/01/31 SMI" + +#pragma weak tan = __tan + +/* INDENT OFF */ +/* + * tan(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_tan ... tangent function on [-pi/4,pi/4] + * __rem_pio2 ... argument reduction routine + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include + +double +tan(double x) { + double y[2], z = 0.0; + int n, ix; + + /* high word of x */ + ix = ((int *) &x)[HIWORD]; + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3fe921fb) + return (__k_tan(x, z, 0)); + + /* tan(Inf or NaN) is NaN */ + else if (ix >= 0x7ff00000) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x - x); /* NaN */ + /* assumes sparc-like QNaN */ +#else + return (x - x); /* NaN */ +#endif + } + + /* argument reduction needed */ + else { + n = __rem_pio2(x, y); + return (__k_tan(y[0], y[1], n & 1)); + } +} diff --git a/usr/src/libm/src/C/tanh.c b/usr/src/libm/src/C/tanh.c new file mode 100644 index 0000000..9d8b6ba --- /dev/null +++ b/usr/src/libm/src/C/tanh.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanh.c 1.18 06/01/31 SMI" + +#pragma weak tanh = __tanh + +/* INDENT OFF */ +/* TANH(X) + * RETURN THE HYPERBOLIC TANGENT OF X + * code based on 4.3bsd + * Modified by K.C. Ng for sun 4.0, Jan 31, 1987 + * + * Method : + * 1. reduce x to non-negative by tanh(-x) = - tanh(x). + * 2. + * 0 < x <= 1.e-10 : tanh(x) := x + * -expm1(-2x) + * 1.e-10 < x <= 1 : tanh(x) := -------------- + * expm1(-2x) + 2 + * 2 + * 1 <= x <= 22.0 : tanh(x) := 1 - --------------- + * expm1(2x) + 2 + * 22.0 < x <= INF : tanh(x) := 1. + * + * Note: 22 was chosen so that fl(1.0+2/(expm1(2*22)+2)) == 1. + * + * Special cases: + * tanh(NaN) is NaN; + * only tanh(0)=0 is exact for finite argument. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include + +static const double + one = 1.0, + two = 2.0, + small = 1.0e-10, + big = 1.0e10; +/* INDENT ON */ + +double +tanh(double x) { + double t, y, z; + int signx; + volatile double dummy; + + if (isnan(x)) + return x * x; /* + -> * for Cheetah */ + signx = signbit(x); + t = fabs(x); + z = one; + if (t <= 22.0) { + if (t > one) + z = one - two / (expm1(t + t) + two); + else if (t > small) { + y = expm1(-t - t); + z = -y / (y + two); + } + else { /* raise the INEXACT flag for non-zero t */ + dummy = t + big; +#ifdef lint + dummy = dummy; +#endif + return x; + } + } + else if (!finite(t)) + return copysign(1.0, x); + else + return signx == 1 ? -z + small * small : z - small * small; + + return signx == 1 ? -z : z; +} diff --git a/usr/src/libm/src/C/xpg6.h b/usr/src/libm/src/C/xpg6.h new file mode 100644 index 0000000..df646ed --- /dev/null +++ b/usr/src/libm/src/C/xpg6.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _XPG6_H +#define _XPG6_H + +#pragma ident "@(#)xpg6.h 1.8 06/01/31 SMI" + +/* + * The bits in lib/libc/inc/xpg6.h fpgroup may use as per PSARC/2003/486. + */ + +/* + * If set, math library entry points present in SUSv2 deal with exceptional + * cases as per SUSv3 spec where math_errhandling is set to MATH_ERREXCEPT; + * otherwise they behave as per SUSv2 spec. + */ +#define _C99SUSv3_math_errexcept 0x00000400 +/* + * If set, pow(+/-1,+/-Inf) & pow(1,NaN) return 1; otherwise NaN is returned. + * Analogous comment applies to powf and powl. + */ +#define _C99SUSv3_pow_treats_Inf_as_an_even_int 0x00000080 +/* + * If set, logb(subnormal) returns (double) ilogb(subnormal); otherwise + * logb(subnormal) returns logb(DBL_MIN). Analogous comment applies to + * logbf and logbl. + */ +#define _C99SUSv3_logb_subnormal_is_like_ilogb 0x00000040 +/* + * If set, ilogb(0/+Inf/-Inf/NaN) raises FE_INVALID as per SUSv3; otherwise + * no exception is raised. Analogous comment applies to ilogbf and ilogbl. + */ +#define _C99SUSv3_ilogb_0InfNaN_raises_invalid 0x00000020 + +/* + * __xpg6 = _C99SUSv3_mode_OFF disables C99/SUSv3 standards conformance mode. + */ +#define _C99SUSv3_mode_OFF 0xFFFF0000 + +#if !defined(_ASM) +extern unsigned int __xpg6; +#endif + +#endif /* _XPG6_H */ diff --git a/usr/src/libm/src/LD/_TBL_cosl.c b/usr/src/libm/src/LD/_TBL_cosl.c new file mode 100644 index 0000000..128dc8a --- /dev/null +++ b/usr/src/libm/src/LD/_TBL_cosl.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_cosl.c 1.9 06/01/31 SMI" + +/* + * For i = 0L, ..., 75 let x(i) be the extended precision number + * whose exponent is given by 0x3ffc + ((i + 8) >> 5) and whose + * five most significant fraction bits are given by (i + 8) & 0x1f. + * (The remaining fraction bits are zero and the integer bit is 1.) + * Then _TBL_cosl_hi[i] := cos(x(i)) rounded to extended precisionL, + * and _TBL_cosl_lo[i] ~ cos(x(i)) - _TBL_cosl_hi[i]. + */ + +#include "libm.h" + +const long double _TBL_cosl_hi[] = { + 9.8781778381647194407734133e-01L, + 9.8720237785483049041453801e-01L, + 9.8657190839949758873065125e-01L, + 9.8592638507066143575569700e-01L, + 9.8526581771821381618451860e-01L, + 9.8459021642159980601798769e-01L, + 9.8389959148966397219646454e-01L, + 9.8319395346049307253706584e-01L, + 9.8247331310125525749262290e-01L, + 9.8173768140803577633441562e-01L, + 9.8098706960566919046918752e-01L, + 9.8022148914756809622147657e-01L, + 9.7944095171554836000860772e-01L, + 9.7864546921965086785991095e-01L, + 9.7783505379795979334592304e-01L, + 9.7700971781641738478493484e-01L, + 9.7616947386863527671421389e-01L, + 9.7531433477570232649326437e-01L, + 9.7444431358598898037593275e-01L, + 9.7355942357494817143660423e-01L, + 9.7265967824491275265730642e-01L, + 9.7174509132488946761517512e-01L, + 9.7081567677034946294446077e-01L, + 9.6987144876301534501253018e-01L, + 9.6891242171064478417089050e-01L, + 9.6695002923067782202260975e-01L, + 9.6492861910477100957986285e-01L, + 9.6284831470937969988364152e-01L, + 9.6070924301556190306409372e-01L, + 9.5851153458122862729886421e-01L, + 9.5625532354317529696403552e-01L, + 9.5394074760889473397129298e-01L, + 9.5156794804817220216272555e-01L, + 9.4913706968446302764510006e-01L, + 9.4664826088605332182323443e-01L, + 9.4410167355700434565568893e-01L, + 9.4149746312788106861798448e-01L, + 9.3883578854626548865214275e-01L, + 9.3611681226705529027757452e-01L, + 9.3334070024254843565662820e-01L, + 9.3050762191231429116015580e-01L, + 9.2761775019285190965094914e-01L, + 9.2467126146703609851492875e-01L, + 9.2166833557335191816090730e-01L, + 9.1860915579491826785281383e-01L, + 9.1549390884830122858606058e-01L, + 9.1232278487211784648910212e-01L, + 9.0909597741543105166956915e-01L, + 9.0581368342593642076004609e-01L, + 9.0247610323794150491687888e-01L, + 8.9908344056013845619268129e-01L, + 8.9563590246317069891836618e-01L, + 8.9213369936699440471096142e-01L, + 8.8857704502803554333020819e-01L, + 8.8496615652614329169001889e-01L, + 8.8130125425134059916022419e-01L, + 8.7758256189037271613028607e-01L, + 8.6998471805841738884335773e-01L, + 8.6217447993488050434493855e-01L, + 8.5415375427738538514389754e-01L, + 8.4592449923106795446874767e-01L, + 8.3748872385052368529220410e-01L, + 8.2884848760932573481351876e-01L, + 8.2000589989723400824016969e-01L, + 8.1096311950521790220310775e-01L, + 8.0172235409841845058843968e-01L, + 7.9228585967717854313466241e-01L, + 7.8265594002627279692635431e-01L, + 7.7283494615247154478458735e-01L, + 7.6282527571057625053081719e-01L, + 7.5262937241806647606931838e-01L, + 7.4224972545850130697074609e-01L, + 7.3168886887382088632511210e-01L, + 7.2094938094569641805946583e-01L, + 7.1003388356607967499180972e-01L, +}; + +const long double _TBL_cosl_lo[] = { + 2.3161701550475222913914987e-20L, + -1.8449479910096732184579231e-20L, + 2.6686158961121436032543157e-20L, + -8.6377467693509323999412576e-21L, + 1.9776110020628332806497627e-20L, + -3.5925805070704800589322274e-21L, + -1.8155190558460064943241466e-20L, + -9.1900782344860461108346151e-21L, + -5.2952188498928572418662889e-21L, + 1.8052490350294447403358175e-22L, + 1.4237809112451219388907461e-22L, + 2.6375298402937478119012648e-20L, + -1.0076765547845230197228052e-20L, + 2.4356732099577389276048253e-20L, + -1.3951467830437376437362152e-20L, + 1.7110854885636746562043992e-20L, + 9.7751412348794551526570426e-21L, + -1.5984515732024779414075399e-20L, + -2.6221693743524256098098490e-20L, + 2.1708281645344702813143892e-20L, + 1.3606643184793342931047312e-20L, + 4.0913737251026449191179388e-21L, + 3.0297735892921952471510043e-21L, + -2.0186136916357220892889611e-20L, + -2.6295048282251297741856903e-20L, + -1.4268128384616571293099177e-20L, + 1.2118148575499258442724515e-21L, + 1.6059569963428104840244296e-20L, + 2.5656322072743666174102425e-21L, + 3.1051993049709377435678279e-21L, + 1.1564422287617245178214769e-20L, + 1.0031811944878086819339264e-20L, + -1.7237335190163247756143591e-20L, + 2.0747363423904458194504323e-20L, + 2.2865077385189808827392339e-20L, + -2.5671240384658541701793951e-20L, + 2.6526752505060021072717663e-20L, + -1.9564443985440576261207264e-20L, + 1.6662891366649668957364366e-20L, + -1.3289734577249155895809888e-21L, + -1.0679012486769670465318810e-20L, + -2.2918344926389240849631303e-20L, + -1.2815734598986502345856155e-20L, + 1.4504064768242345767590746e-20L, + -1.4988853557132440148049946e-20L, + -2.2142847270523120702212966e-20L, + 2.9274200155749021994272015e-21L, + -1.9187410072234352245854903e-20L, + -1.5529430996486684056198058e-20L, + 8.3043961792850937525987774e-21L, + 2.3863634821654097616646090e-20L, + -1.7796180005854437467836689e-20L, + 1.2938828814644961764053094e-20L, + -1.2599167110905505919738134e-20L, + 7.2776486597245992496949283e-21L, + -2.0062284600282808092832087e-20L, + -1.4004485599673539406695080e-20L, + -1.4442131618989703782137918e-20L, + 2.2223959244287650022010583e-20L, + -4.4575975223558432505505015e-22L, + -9.0245930394257121787744934e-21L, + 2.3149253152495269264191463e-20L, + -3.3469699832521350974745777e-21L, + 1.5380944635427999356502468e-20L, + -1.3572945384913555811651506e-20L, + 1.9052929123346841342486920e-20L, + 6.8389097769442269862154625e-21L, + 4.4331336879906155675581769e-21L, + 2.6264491975559389159451170e-20L, + -2.3718434730140290189643472e-20L, + -1.4777051948748214572130603e-20L, + 2.0601161465229389031848878e-20L, + -1.3273342027649427778913402e-20L, + -1.5653047869359238584973515e-20L, + -1.7688078635602856653655125e-20L, +}; diff --git a/usr/src/libm/src/LD/_TBL_ipio2l.c b/usr/src/libm/src/LD/_TBL_ipio2l.c new file mode 100644 index 0000000..72ce06c --- /dev/null +++ b/usr/src/libm/src/LD/_TBL_ipio2l.c @@ -0,0 +1,503 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_ipio2l.c 1.10 06/01/31 SMI" + +/* + * Table of constants for 2/pi, used in __rem_pio2l (trigl) function. + * By K.C. Ng, April 25, 1989 + */ + +#include "libm.h" + +const int _TBL_ipio2l_inf[] = { /* by DHBailey MP package */ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, + 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, + 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, + 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, + 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, + 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, + 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, + 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, + 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, + 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, + 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, + 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, + 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, + 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, + 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, + 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, + 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, + 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, + 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, + 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, + 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, + 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, + 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, + 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, + 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, + 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, + 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, + 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, + 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, + 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, + 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, + 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, + 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, + 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, + 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, + 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, + 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, + 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, + 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, + 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, + 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, + 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, + 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, + 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, + 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, + 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, + 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, + 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, + 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, + 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, + 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, + 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, + 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, + 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, + 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, + 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, + 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, + 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, + 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, + 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, + 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, + 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, + 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, + 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, + 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, + 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, + 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, + 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, + 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, + 0xE13F89, 0xB295F3, 0x64A8F1, 0xAEA74B, 0x38FC4C, 0xEAB2BB, + 0x47270B, 0xABC3A7, 0x34BA60, 0x52DD34, 0xF8563A, 0xEB7E8A, + 0x31BB36, 0x5895B7, 0x47F7A9, 0x94C3AA, 0xD39225, 0x1E7F3E, + 0xD8974E, 0xBBA94F, 0xD8AE01, 0xE661B4, 0x393D8E, 0xA523AA, + 0x33068E, 0x1633B5, 0x3BB188, 0x1D3A9D, 0x4013D0, 0xCC1BE5, + 0xF862E7, 0x3BF28F, 0x39B5BF, 0x0BC235, 0x22747E, 0xA247C0, + 0xD52D1F, 0x19ADD3, 0x9094DF, 0x9311D0, 0xB42B25, 0x496DB2, + 0xE264B2, 0x5EF135, 0x3BC6A4, 0x1A4AD0, 0xAAC92E, 0x64E886, + 0x573091, 0x982CFB, 0x311B1A, 0x08728B, 0xBDCEE1, 0x60E142, + 0xEB641D, 0xD0BBA3, 0xE559D4, 0x597B8C, 0x2A4483, 0xF332BA, + 0xF84867, 0x2C8D1B, 0x2FA9B0, 0x50F3DD, 0xF9F573, 0xDB61B4, + 0xFE233E, 0x6C41A6, 0xEEA318, 0x775A26, 0xBC5E5C, 0xCEA708, + 0x94DC57, 0xE20196, 0xF1E839, 0xBE4851, 0x5D2D2F, 0x4E9555, + 0xD96EC2, 0xE7D755, 0x6304E0, 0xC02E0E, 0xFC40A0, 0xBBF9B3, + 0x7125A7, 0x222DFB, 0xF619D8, 0x838C1C, 0x6619E6, 0xB20D55, + 0xBB5137, 0x79E809, 0xAF9149, 0x0D73DE, 0x0B0DA5, 0xCE7F58, + 0xAC1934, 0x724667, 0x7A1A13, 0x9E26BC, 0x4555E7, 0x585CB5, + 0x711D14, 0x486991, 0x480D60, 0x56ADAB, 0xD62F64, 0x96EE0C, + 0x212FF3, 0x5D6D88, 0xA67684, 0x95651E, 0xAB9E0A, 0x4DDEFE, + 0x571010, 0x836A39, 0xF8EA31, 0x9E381D, 0xEAC8B1, 0xCAC96B, + 0x37F21E, 0xD505E9, 0x984743, 0x9FC56C, 0x0331B7, 0x3B8BF8, + 0x86E56A, 0x8DC343, 0x6230E7, 0x93CFD5, 0x6A8F2D, 0x733005, + 0x1AF021, 0xA09FCB, 0x7415A1, 0xD56B23, 0x6FF725, 0x2F4BC7, + 0xB8A591, 0x7FAC59, 0x5C55DE, 0x212C38, 0xB13296, 0x5CFF50, + 0x366262, 0xFA7B16, 0xF4D9A6, 0x2ACFE7, 0xF07403, 0xD4D604, + 0x6FD916, 0x31B1BF, 0xCBB450, 0x5BD7C8, 0x0CE194, 0x6BD643, + 0x4FD91C, 0xDF4543, 0x5F3453, 0xE2B5AA, 0xC9AEC8, 0x131485, + 0xF9D2BF, 0xBADB9E, 0x76F5B9, 0xAF15CF, 0xCA3182, 0x14B56D, + 0xE9FE4D, 0x50FC35, 0xF5AED5, 0xA2D0C1, 0xC96057, 0x192EB6, + 0xE91D92, 0x07D144, 0xAEA3C6, 0x343566, 0x26D5B4, 0x3161E2, + 0x37F1A2, 0x209EFF, 0x958E23, 0x493798, 0x35F4A6, 0x4BDC02, + 0xC2BE13, 0xBE80A0, 0x0B72A3, 0x115C5F, 0x1E1BD1, 0x0DB4D3, + 0x869E85, 0x96976B, 0x2AC91F, 0x8A26C2, 0x3070F0, 0x041412, + 0xFC9FA5, 0xF72A38, 0x9C6878, 0xE2AA76, 0x50CFE1, 0x559274, + 0x934E38, 0x0A92F7, 0x5533F0, 0xA63DB4, 0x399971, 0xE2B755, + 0xA98A7C, 0x008F19, 0xAC54D2, 0x2EA0B4, 0xF5F3E0, 0x60C849, + 0xFFD269, 0xAE52CE, 0x7A5FDD, 0xE9CE06, 0xFB0AE8, 0xA50CCE, + 0xEA9D3E, 0x3766DD, 0xB834F5, 0x0DA090, +}; + +#if 0 +const int _TBL_ipio2l_66[] = { + 0xA2F983, 0x6E4E44, 0x152A00, 0x062BC4, 0x0DA276, 0xBED4C1, + 0xFDF905, 0x5CD5BA, 0x767CEC, 0x1F80D6, 0xC26053, 0x3A0070, + 0x107C2A, 0xF68EE9, 0x687B7A, 0xB990AA, 0x38DE4B, 0x96CFF3, + 0x92735E, 0x8B34F6, 0x195BFC, 0x27F88E, 0xA93EC5, 0x3958A5, + 0x3E5D13, 0x1C55A8, 0x5B4A8B, 0xA42E04, 0x12D105, 0x35580D, + 0xF62347, 0x450900, 0xB98BCA, 0xF7E8A4, 0xA2E5D5, 0x69BC52, + 0xF0381D, 0x1A0A88, 0xFE8714, 0x7F6735, 0xBB7D4D, 0xC6F642, + 0xB27E80, 0x6191BF, 0xB6B750, 0x52776E, 0xD60FD0, 0x607DCC, + 0x68BFAF, 0xED69FC, 0x6EB305, 0xD2557D, 0x25BDFB, 0x3E4AA1, + 0x84472D, 0x8B0376, 0xF77740, 0xD290DF, 0x15EC8C, 0x45A5C3, + 0x6181EF, 0xC5E7E8, 0xD8909C, 0xF62144, 0x298428, 0x6E5D9D, + 0xF9A9B4, 0xCDBD2F, 0xC083E7, 0x0D3957, 0xECA3B2, 0x96223C, + 0xC1080D, 0x087D47, 0x7D7576, 0xA614B1, 0x42A4B6, 0xAA173C, + 0xE217E5, 0xFDCD34, 0x279D5F, 0x39AACA, 0x1CA8DF, 0x8B6633, + 0x5C49E4, 0xB56803, 0x1E7938, 0x741FDC, 0x4CB19B, 0xCECC3B, + 0x921EB7, 0x7C0FC3, 0x361F23, 0xF9EE22, 0xBA4235, 0xA5FCA3, + 0xBD4680, 0xFCDF65, 0xFC96AD, 0x31C90C, 0x919EEB, 0xFE0FB7, + 0x75B4B0, 0x693961, 0x75BCAA, 0xEB6F39, 0xA343C0, 0xD16FF2, + 0x33DAD0, 0xC1E095, 0x053182, 0x11E4A1, 0x40F943, 0x32D314, + 0xAF1B98, 0xE1B05A, 0xE5F3AD, 0x6E633F, 0x363D14, 0xA3777C, + 0xC8C6EE, 0x001E18, 0x0D180C, 0xAA1369, 0xEDFBA2, 0x998A9D, + 0x16E799, 0x693B75, 0x90EF50, 0x938DD4, 0xFB7ACD, 0x67CEEB, + 0x249DE3, 0x9B9B52, 0xD8CDAC, 0xC31A54, 0x855FBF, 0x848591, + 0x0954B0, 0x946B8C, 0xA4C7B4, 0x9A9E51, 0xF20425, 0xAA2637, + 0xFC6657, 0x7D8625, 0x620B74, 0x8B578D, 0xEC9A05, 0xDEF24F, + 0x7F19B0, 0xFC2544, 0x1DA0F1, 0x23790C, 0xC4294D, 0x6D3C32, + 0x66FE56, 0xD45562, 0x66264F, 0xA24162, 0x13E930, 0xB0E7C0, + 0xFA0E97, 0xBFC62C, 0x0E663F, 0x90F33B, 0x55E73C, 0xD791F7, + 0xD3F00D, 0xAB01C7, 0x40CF8F, 0xA593BA, 0xE627D5, 0x4A8308, + 0x32DC06, 0x80C876, 0x1C3DB5, 0xB5489F, 0x632CDF, 0xB02517, + 0xD17EFA, 0x92570F, 0xFAED44, 0x8F8536, 0x27069B, 0xC014DC, + 0x997D48, 0x961D61, 0x7A960B, 0x31B622, 0xD3C425, 0xA69520, + 0x98D29E, 0xF1C973, 0x5483D7, 0x99611E, 0xEAFF5F, 0x7DEFF1, + 0x98475C, 0x91C787, 0x537E17, 0x068C65, 0xF05E52, 0x942F04, + 0x37CF92, 0xEF4223, 0xC4C52F, 0x521DAA, 0xBAAF97, 0x972236, + 0xA2B3D3, 0x62C921, 0x8D3A8B, 0x2B3302, 0x6061B9, 0x0CBE94, + 0x75F451, 0xBD06DE, 0x86042D, 0xFB61ED, 0x4C8869, 0x590232, + 0x479963, 0x23518D, 0xAF5D28, 0x60C9DE, 0x473DB0, 0x9DE009, + 0xD8FC4C, 0xE96991, 0x9CA455, 0x800BC8, 0x977CE0, 0xDCBFA6, + 0x19D249, 0xA0F76D, 0x5F9B2F, 0x452BB3, 0x77E091, 0xB6383A, + 0x7BE9C2, 0x4BF7C1, 0x8A5EBF, 0xEB0D55, 0x9AF4DC, 0x275CA0, + 0xED09D0, 0xE50A7F, 0xBEF42C, 0x4803AF, 0x56139F, 0xD58848, + 0x797D96, 0xB8352E, 0x49D90D, 0x7607E0, 0xC99256, 0x75F530, + 0xB72237, 0x1AF080, 0xC2E813, 0x06CFA9, 0xB9DF8E, 0x919C38, + 0x89D97E, 0x0464D5, 0xB12EEF, 0xD14165, 0x365A72, 0x550D35, + 0x3772D8, 0xF41B58, 0x0378A7, 0x2D5D7D, 0xD6E433, 0xDD2018, + 0x139FD7, 0x1B5621, 0x94E046, 0x97A323, 0x693176, 0x28DF59, + 0xD24273, 0x0E4E26, 0xA9A8F6, 0xF15B41, 0x450EE3, 0x57EA61, + 0x7DADA6, 0xF21086, 0x394BEE, 0x8F4813, 0x3FDEE9, 0xF3A53D, + 0xAB2F40, 0x8B1E2B, 0xA07FD4, 0x992CC4, 0x63532D, 0x9F35A2, + 0x6FA290, 0x0094DE, 0xD2A24D, 0x755B81, 0x79F9E1, 0xFE1D35, + 0xFEE8CC, 0x9224C5, 0x54E2CE, 0x41F31C, 0xF45138, 0xED6D10, + 0x6B439D, 0xD2BE46, 0xC327D4, 0x68BFB0, 0x46D5A5, 0x79B285, + 0x776D7C, 0xE18647, 0x00E32F, 0xEBB7F2, 0x5DE307, 0x5A8EA0, + 0x06CEFE, 0x20923C, 0x354CE1, 0xAD09C5, 0x56996D, 0xCFB124, + 0xEF7BC1, 0x76BF72, 0xF20753, 0x5BBAFA, 0xB8A2B2, 0x5914F2, + 0x5D834F, 0xE64A08, 0x14C3AB, 0x07796B, 0xF2212D, 0xC74049, + 0xB61C6A, 0x282CFC, 0x25070C, 0x315BF1, 0x6FEAD3, 0x2CD2E5, + 0xD10F9C, 0x1972BB, 0x908073, 0x0F368C, 0x69BE97, 0xA242B0, + 0x722DFE, 0xAFE6A2, 0x143D8B, 0x5C5699, 0x48232B, 0xFF49AC, + 0xB5FA62, 0x6AD778, 0x7A844D, 0x258AA0, 0x8EDE3D, 0x9A9496, + 0x49924E, 0xA33E97, 0x4F43FA, 0xC40741, 0x2F764A, 0x8EB2B1, + 0x8E67D3, 0x9FF324, 0x51B11B, 0x5D6E09, 0xE9AD3E, 0xFFA902, + 0xF48653, 0x0845D3, 0xDED33E, 0x32D30E, 0x6247CA, 0x7C586D, + 0x2EAF9E, 0x323A35, 0xAD11FB, 0x0F420C, 0x0E0685, 0x401B60, + 0xBB3D43, 0xF4D489, 0xBCDC4C, 0x40FFBA, 0x18AB08, 0x7AC72D, + 0x5E76DB, 0xE8344E, 0x3975A2, 0xF9611B, 0x1121F3, 0x3A429C, + 0x9B18EC, 0xF298B1, 0x8AEC78, 0x1C248B, 0x69108F, 0xDB2D37, + 0xA1A613, 0x910359, 0x521451, 0xD4441F, 0x0BB3B6, 0x50D9DB, + 0xBD589F, 0x62A62E, 0xA9B903, 0x935F63, 0x058BEC, 0x78BCB5, + 0x2CB460, 0x3A9037, 0x0291C4, 0x1FABC1, 0xBE7D05, 0xF948E7, + 0x6BA5CD, 0xF62A0A, 0x9AEA19, 0x2257AB, 0x2E0D7D, 0x9EB93F, + 0x5E3F77, 0xD4A13F, 0x08E3DB, 0xDFD689, 0x2B9B4E, 0xB58427, + 0x25424B, 0x1197FD, 0xCF298A, 0x314008, 0xD5687F, 0x0F0EAC, + 0x13C485, 0xF684B2, 0xED7EC7, 0x6E636D, 0x28C933, 0xE19058, + 0x688B6A, 0xC88905, 0xFB2F31, 0x61304C, 0xC19765, 0x60D81A, + 0x57F276, 0xC6EFC4, 0x048954, 0x303470, 0xDA6F6F, 0x93901A, + 0x911439, 0x363D12, 0x59E72B, 0x6F9F1E, 0x57C584, 0xDF0D23, + 0xBB743F, 0xADE99C, 0x546097, 0xFCC820, 0xCBB968, 0xDA9B5F, + 0x0DC271, 0x563337, 0x9ED662, 0xE7C44F, 0x3129F8, 0xF5EAF9, + 0xDAF7F2, 0xCD09FF, 0xA92535, 0x441C29, 0x7DF436, 0xE2B00A, + 0x36746F, 0xF1DC61, 0x9D3C9C, 0x63AB71, 0xB8F3BB, 0x1C80F6, + 0x62FF65, 0x5FFE5F, 0x3B2814, 0xBADE27, 0x1B384B, 0x268AA9, + 0xBD91EF, 0xCA436B, 0xABE107, 0x88DCA6, 0xC3AFC0, 0x85D155, + 0x464A48, 0xBFDAEB, 0xC6F389, 0x907C11, 0x0D3E41, 0xCD2197, + 0x549008, 0x817E4E, 0x8C7154, 0x1DC37F, 0x5E897E, 0xA9A2FE, + 0xEC6060, 0xCC0728, 0x430D3B, 0x62471C, 0xD3A4D3, 0x2BA57B, + 0xE5D15A, 0xD632F3, 0xF2B76F, 0xEC8498, 0xAE41C2, 0xAAF413, + 0xEAF5C0, 0xDD1B07, 0xB9A2A0, 0x59F230, 0xA3F61B, 0x8F8643, + 0x05DE6B, 0x1B5B8E, 0x63ECC5, 0xBFF01D, 0x8F1440, 0x3F8ADF, + 0x2E6539, 0xF3DB7A, 0x293FE5, 0x7EE714, 0x88E6D8, 0x2B2A6A, + 0xDF6E34, 0x8D4604, 0x4F6594, 0x639063, 0x6B51CC, 0x0D05CD, + 0x009607, 0xE7BF70, 0xC9A0EA, 0x0D80DD, 0xA1A065, 0x0DCB8F, + 0xA48430, 0x715934, 0x6FC8E4, 0x6FFC52, 0xEF8B05, 0xDE506A, + 0xE62BBC, 0x31480F, 0xEA64EA, 0x51E6FB, 0x9AE773, 0x21C54D, + 0xBFA080, 0x273D1E, 0x9FFD4E, 0x0C2CA8, 0x0690A5, 0xF8773B, + 0x4B2680, 0x6E3F56, 0xC8B89F, 0x0B7BD0, 0x71C8BF, 0x5AABD3, + 0x2BA93E, 0x9D2EE1, 0xCDF2FA, 0xEE57BE, 0x84A116, 0xDA756D, + 0x8FD6C0, 0x927153, 0xFF5EF3, 0x9F8331, 0x713411, 0xF945F3, + 0x0382B2, 0x8BAE30, 0xBC45A4, 0x630101, 0x5C9C3A, 0x643CFD, + 0x48115C, 0x17F03E, 0xB5F55E, 0x288DAF, 0x725660, 0xFB58E0, + 0xFC189E, 0x1ECA69, 0xFB19A6, 0xFA7A92, 0x7CC48E, 0x869372, + 0x58089A, 0x16DB5C, 0xADC0CD, 0x09D3D4, 0xD1108E, 0xDC64ED, + 0x3A999C, 0xAA8716, 0x5A3D8E, 0x7037FB, 0x1976AD, 0xE477D7, + 0x23782B, 0xC51F39, 0x4A5E9A, 0xDAD9DA, 0xE5B559, 0x08EF06, + 0x76E24F, 0x7361AD, 0x5F42A3, 0x9B70E5, 0xCE96C4, 0x552E99, + 0x6D7A6F, 0x804474, 0x4FA45B, 0x1D115B, 0x6D109E, 0x0A1A63, + 0x1084A6, 0xE18E5D, 0x2D8589, 0x203345, 0x4851AF, 0xA71EDC, + 0x03B6B1, 0x267970, 0xDEC908, 0x795BED, 0x7099B9, 0x209321, + 0x7FC2E7, 0x0F3E5E, 0xC7A4F4, 0x088129, 0x59AE63, 0x4E3251, + 0x344268, 0x79285D, 0x2B9494, 0xF1E2A2, 0xF7DA20, 0xDF6756, + 0xCA3BA3, 0x422489, 0xA2239C, 0x38724D, 0x2AC767, 0x601E9D, + 0xB47C6C, 0xA22481, 0xBBB655, 0x1EC0C4, 0xD84A97, 0xD449EE, + 0x162C9D, 0x782F29, 0xCEB4FA, 0xE317BC, 0x2FFDBD, 0xB342D2, + 0xB2CB19, 0x323AB9, 0x1AFF93, 0x13A8DF, 0x86B5A5, 0x5741D6, + 0xC54342, 0x3CAC29, 0xF7517C, 0x129A7A, 0xB2B8B4, 0x9B709F, + 0x3923C5, 0xEAFA6E, 0xDB9077, 0x29EEA0, 0x702D8C, 0x4DC14F, + 0xE46933, 0xA764E4, 0x754266, 0xFA4F98, 0x643DA5, 0xCA775C, + 0x7F1632, 0xE671A3, 0x4BF4C6, 0xA82378, 0xEFD317, 0xE62D38, + 0xD461C9, 0x8EEC80, 0xC89882, 0x4CC73C, 0x830F3F, 0xE4B200, + 0x582615, 0x6CD558, 0xA66727, 0xEF7975, 0xFEA5CE, 0x147A40, + 0x4796E4, 0xC07761, 0xF5D5B3, 0x6B65FB, 0xE4F14D, 0xA837CA, + 0x9A152A, 0x554E94, 0x83EC5F, 0xA62174, 0x85E2ED, 0xCCE71C, + 0x3540FF, 0x088A84, 0xBA2816, 0x293610, 0x4C3EE7, 0x8E55A9, + 0x49E5E5, 0x782178, 0x45D2AA, 0x9BB449, 0x00D282, 0xF61E67, + 0xE2F7DE, 0xCC6AA1, 0xCD1979, 0x52FEDB, 0x9A8776, 0x70A018, + 0x500271, 0x1273BA, 0xDE648E, 0x7AC7F7, 0x767725, 0xD0A457, + 0xF17250, 0xBC578C, 0x2DFD3A, 0x97F988, 0xA576C8, 0x8129BB, + 0x22D9C3, 0x0436ED, 0x650791, 0xA314EC, 0x42A0B3, 0x37A521, + 0x4BFB2B, 0x8C1B7F, 0x115E17, 0xF7C27F, 0xC1D5EB, 0x060487, + 0x8A28D6, 0x41330F, 0xBFAE67, 0x7774E8, 0x4CCC3C, 0x6B2F80, + 0x628BF2, 0x1E41A6, 0x8D0B22, 0xBC85BA, 0xCCF461, 0xBEC69C, + 0xDF8A10, 0x3C5E71, 0x2F8D5F, 0x63D3DA, 0x5934D1, 0x2CA35D, + 0xC687A2, 0x24E9B4, 0x1843D3, 0x5C9B97, 0x9B580C, 0x780B2C, + 0x59943D, 0x0744D0, 0x8DA6E3, 0x07AAF6, 0x2214D0, 0x72E8D7, + 0x54151B, 0x514DE9, 0x8DCC3B, 0x0CEB00, 0x2C4DE3, 0x5012AE, + 0xD7B72E, 0xB7DE9A, 0x641B2F, 0xF9CF17, 0x8BD282, 0x9F31A3, + 0xDED846, 0x467E05, 0x26CCEA, 0xF8E404, 0x65572E, 0x82C594, + 0xE572A9, 0x895653, 0xA1AA94, 0x8DD876, 0x5E9A61, 0x69EB1C, + 0x0385A9, 0x5BC844, 0x95B2DF, 0x6678F6, 0xFA7033, 0xE4F434, + 0x5584A9, 0x32C099, 0x9AD846, 0xB3FFD1, 0xA81C56, 0x4E54EF, + 0x54D173, 0xF191B4, 0x49B2A2, 0xB309D9, 0x546D8D, 0xC0A51E, + 0xCAFFC0, 0x785400, 0x05F69D, 0x894056, 0xC33098, 0xDFF6C2, + 0x908D97, 0x05CC96, 0x46484B, 0xBD7B9D, 0xB152F5, 0x5A7461, + 0x59CA20, 0x8F8EF5, 0xC9FF05, 0xF6F398, 0x856C97, 0x81E07C, + 0xAE5EDA, 0x51BDC9, 0xF26437, 0xBBC8CE, 0x091B52, 0x68B6A5, + 0x90750E, 0x925EF9, 0x3D9CB3, 0x46EA96, 0x97D648, 0x78BCC7, + 0xF4B488, 0x05275E, 0x6619DF, 0x56D4A0, 0x8C5C41, 0xDB345A, + 0x0B79DA, 0x496369, 0x96109B, 0x667664, 0xC40CF9, 0x91D7CA, + 0x119F1A, 0xA99272, 0xCBB529, 0xBB033E, 0x8F91C0, 0x570045, + 0xB845C2, 0x2B8E52, 0x687AFB, 0x0D0AA3, 0x200863, 0x043B83, + 0xF129DE, 0x49C2D6, 0x9641D2, 0xC4747C, 0x220804, 0x503F05, + 0x7E274F, 0xCA83D9, 0x9D6495, 0x0E5039, +}; +const int _TBL_ipio2l_53[] = { + 0xA2F983, 0x6E4E44, 0x16F3C4, 0xEA69B5, 0xD3E131, 0x60E1D2, + 0xD7982A, 0xC031F5, 0xD67BCC, 0xFD1375, 0x60919B, 0x3FA0BB, + 0x612ABB, 0x714F9B, 0x03DA8A, 0xC05948, 0xD023F4, 0x5AFA37, + 0x51631D, 0xCD7A90, 0xC0474A, 0xF6A6F3, 0x1A52E1, 0x5C3927, + 0x3ADA45, 0x4E2DB5, 0x64E8C4, 0x274A5B, 0xB74ADC, 0x1E6591, + 0x2822BE, 0x4771F5, 0x12A63F, 0x83BD35, 0x2488CA, 0x1FE1BE, + 0x42C21A, 0x682569, 0x2AFB91, 0x68ADE1, 0x4A42E5, 0x9BE357, + 0xB79675, 0xCE998A, 0x83AF8B, 0xE645E6, 0xDF0789, 0x9E9747, + 0xAA15FF, 0x358C3F, 0xAF3141, 0x72A3F7, 0x2BF1D4, 0xF3AD96, + 0x7D759F, 0x257FCE, 0x29FB69, 0xB1B42C, 0xC32DE1, 0x8C0BBD, + 0x31EC2F, 0x942026, 0x85DCE7, 0x653FF3, 0x136FA7, 0x0D7A5F, + 0x93FC61, 0x035287, 0xC77FCA, 0x73530A, 0xC6BC15, 0x0E4B0F, + 0x568FCE, 0x2D3456, 0x4D7FE1, 0xA12CD1, 0xB2CEA2, 0x531C62, + 0x70B4D2, 0x1BCE9A, 0x87704D, 0x6B83D7, 0xAA8121, 0x2530EA, + 0x2074BF, 0x28A071, 0x9D69C3, 0x406DD8, 0xF58783, 0x115D89, + 0x5E85F3, 0xAACDCC, 0x8C0B57, 0xD7DFFE, 0x550D96, 0xC43EB4, + 0x89ABA7, 0x94F595, 0x56F260, 0x06A4CD, 0x7FD2E2, 0x6FDFA8, + 0x3E9C98, 0xBFD682, 0xAD3A12, 0x23A8A6, 0x173A89, 0x5DE9BD, + 0x95A978, 0x28E484, 0x5964F3, 0x496AF0, 0x4B1DA9, 0x989061, + 0xBD2BF2, 0xE01A90, 0x0905B7, 0xAC39AC, 0x52D5B7, 0x109F25, + 0x3AE1DC, 0xF90A7C, 0x33F4E5, 0xF5DFDF, 0x1522D0, 0x562CE6, + 0x392CFF, 0xEB9032, 0x10A08E, 0x0B1D7F, 0x42B80A, 0x366DD2, + 0xC24F89, 0x02222E, 0x21494C, 0x985287, 0x87FD07, 0x2EE361, + 0xAD8D68, 0xE72273, 0x9E8D59, 0xD09999, 0x10F4A1, 0x1079A3, + 0xE9BEAF, 0x9C0887, 0x09C622, 0xEBCF06, 0x974532, 0x086A8F, + 0x6CEA05, 0x388C00, 0x74969E, 0xC85B16, 0x385A38, 0x9A2F35, + 0x670531, 0xABA6D0, 0xEFD3C1, 0x27AD92, 0xF4203E, 0x3D619F, + 0x4D05F4, 0x9AE7CC, 0x03B592, 0x41FF55, 0xCAFCA5, 0x1A0987, + 0x88AB79, 0x3627D4, 0x25B12A, 0x52594A, 0xA2BEB0, 0x25C3F2, + 0x4489DA, 0x7959A7, 0xEAEC89, 0xB34714, 0x960196, 0x1FC33A, + 0x7F0275, 0x32EF92, 0x0111CE, 0x8E4685, 0x6F5B34, 0xF6123A, + 0x5543B2, 0xE9A02A, 0x74E03F, 0x54D5A8, 0x086A2C, 0x4A9CD3, + 0x921191, 0x229764, 0x0A1A84, 0x9B45AE, 0xC653A5, 0xB15F33, + 0x100FD1, 0x7DD740, 0xB20CD3, 0x0A0786, 0xF506C3, 0x25EBF4, + 0x3AB39E, 0xE3BB24, 0x27646F, 0xEECE57, 0x706BFE, 0xC7A869, + 0x57ED51, 0x118C82, 0x2B0FF5, 0xC8E545, 0xC43D80, 0x2A3183, + 0x4C1BB9, 0xBC108A, 0x099779, 0xF9ECC8, 0x2A1063, 0x5D2F6A, + 0x8F2675, 0x12FF6D, 0x32EED9, 0xE4A245, 0x7392CF, 0x5C240B, + 0xC476FF, 0x97AFC7, 0xB76131, 0x665E05, 0x67BD57, 0x19E998, + 0x3A5863, 0x23B8AA, 0x5B5608, 0x8A66C6, 0x5F2AD3, 0x78BAFA, + 0x3516CE, 0xCBEA16, 0x6E40D4, 0xB463D4, 0xA6C12F, 0xABD3D7, + 0x32650A, 0x579D10, 0x3CB9E2, 0x1A02A7, 0xDF2FFA, 0x28C991, + 0xB2264C, 0x027870, 0x47BDD4, 0xF243B1, 0x39AE2C, 0x282EA4, + 0xAF1D98, 0x2AFD16, 0xABE7AF, 0x17CB67, 0x8FF93E, 0x793167, + 0x435F6B, 0x48058B, 0x417DA0, 0xE01217, 0x085A69, 0xB50E36, + 0x79A4CD, 0xD74907, 0x26C4B5, 0xB90054, 0x06C3AD, 0x5AB38F, + 0x585E91, 0xD04E4F, 0x2938CE, 0xD4EAA7, 0xA06DE5, 0x40BFE5, + 0xDE6849, 0xEF65F0, 0xF1D4BB, 0x94C21E, 0x66E978, 0x1B9B94, + 0x961043, 0x5961B8, 0xBAAA74, 0xD662EE, 0x9DABF6, 0x0AFE28, + 0x9587A4, 0xA632BC, 0x09149F, 0xDEA996, 0x2CAFD7, 0xBDE29B, + 0x7159E6, 0x1F7C49, 0xF2E2ED, 0xBFA992, 0x7C77EF, 0xC245D0, + 0xB2D129, 0x993E75, 0xAB4C0C, 0x5C84B6, 0x17F542, 0x45314E, + 0x1DEF1B, 0xE3BDCC, 0xB3AE86, 0x24522F, 0x918FC6, 0x2138D5, + 0x883646, 0x6858B6, 0x032762, 0x5170F8, 0x4974EA, 0x76BF77, + 0xECDA8A, 0x9EADDD, 0x2404EF, 0xC52A5D, 0xF2E858, 0xC42D60, + 0xD18C08, 0xDE59B2, 0x4CC3A6, 0x94D888, 0x4C4AF0, 0xCF1F8C, + 0xBF2F6F, 0x7B4535, 0x98B0DB, 0x2BE0CF, 0x4616A7, 0xA8D9FB, + 0x88CA7A, 0x5087E1, 0x18DD8A, 0x1A9F4F, 0x1DCECE, 0xF8609E, + 0xE2F0C8, 0x9AD7D4, 0xE3CDFE, 0xC6FDD5, 0x8FF3CD, 0x7D45AA, + 0xD34957, 0x7C1963, 0x6CE098, 0xB70215, 0x326BBF, 0x47B3A6, + 0xF9235D, 0x6F66F5, 0xC6E40C, 0xE7F50B, 0xFF2FDD, 0x5A1251, + 0xE95EF1, 0xDE8E67, 0xECEE9B, 0xC9F98E, 0x722224, 0x6DF750, + 0x81D08F, 0x2BFCF0, 0xDDC10D, 0x775314, 0xDB1D87, 0x41626B, + 0x9EDF31, 0x7738D9, 0x8D9EB4, 0x4F1C2A, 0xF3E795, 0xB69699, + 0xD9A56D, 0x31BB1B, 0x542975, 0xAB917B, 0x63927C, 0x9BB764, + 0x84A598, 0x0A0C51, 0x5E48C4, 0x7780E3, 0x87E156, 0x155972, + 0xE406F8, 0x48AB9E, 0x3CCDDA, 0x010F87, 0x683B70, 0x400CAD, + 0x5DE5C5, 0x7262FA, 0xFA248D, 0x013AF2, 0xE2E8B5, 0x995F7D, + 0x7F8C4B, 0x0E8B59, 0x1006F1, 0x40B6E9, 0x760654, 0xCBCC8C, + 0x086F40, 0xDC7F6F, 0xFCD0D4, 0xA47ADE, 0x5204FA, 0xF38A9D, + 0xE76C7C, 0x575207, 0x499BF1, 0x0DB01C, 0x09098E, 0x957A71, + 0xD53E0E, 0x61DF1D, 0xE6EF34, 0x5821EC, 0x96BCC0, 0xDC96CE, + 0xA9C0AE, 0x130B2C, 0xCCC589, 0x829BB9, 0x2A75BA, 0x97611C, + 0x0CEAB8, 0x165D9D, 0x35AD41, 0x82A805, 0x975628, 0x5601A6, + 0x074F08, 0x80A27D, 0xEFA64E, 0xD7BB4B, 0x5E6397, 0xC92FFC, + 0x4F3F7A, 0xBEA764, 0x0C9B7D, 0xC5DC74, 0xEAD216, 0x6DBBC0, + 0x913E3E, 0xABF50B, 0x95B24A, 0x3FC9C5, 0xE7BA15, 0x8C7F70, + 0xF81358, 0x774606, 0xCE8C0D, 0xB6B268, 0xB85BA6, 0xAC9B2E, + 0x1AAB05, 0x0C6C82, 0x6EC2AE, 0x606874, 0x8F60BF, 0x1FBC7B, + 0x58C97A, 0x448794, 0xBA48A0, 0x72E882, 0x6D3568, 0xE131FD, + 0x4745D0, 0x0BFA1E, 0x07B01D, 0x474D43, 0x59387E, 0x5B0AD5, + 0xC37A8C, 0x0474E8, 0x13D99D, 0x68A13C, 0xB69118, 0x89228C, + 0x6F7D83, 0x86D665, 0x5C7744, 0xDD183E, 0x1C2E17, 0x712F5E, + 0x4AACCB, 0xB69B68, 0xA1201F, 0x743C2B, 0xF6AD70, 0x92E024, + 0xF34FD8, 0x33712E, 0xFE1D73, 0x4471F0, 0x7D0526, 0x58AF47, + 0x7B11FE, 0x1FCE4F, 0x1356C9, 0x9CE3CA, 0xA843C0, 0x8EEA3C, + 0xABEEE4, 0xA5D495, 0xA407A4, 0x31BB4B, 0x0AA1E3, 0x518E7C, + 0xAA4A66, 0xD82CD8, 0x6EF8D2, 0x6F32E6, 0x1DC26B, 0x17AE59, + 0x4B683B, 0x8D48F7, 0xF4FBD8, 0xD4FE0A, 0xE961DE, 0x87BD37, + 0xE6CCD6, 0xCBD76D, 0x3E99DE, 0xB72E21, 0x54EB90, 0x6AB45D, + 0x600AFB, 0xA17B2F, 0xDA0421, 0xE6CA95, 0x35AAA2, 0x7D8FB1, + 0x3207BB, 0xBF82EE, 0x71F55F, 0xC661CB, 0xBD72A1, 0xBF5A64, + 0x6E39E8, 0x6C6DE2, 0x2BD178, 0xAF62A5, 0xA7D86E, 0xE7D0FE, + 0x84DB03, 0x67FDA2, 0x2D6809, 0x0F8B8F, 0x1B50E3, 0x234EF5, + 0x7325ED, 0x8F8F4C, 0xC1E426, 0x3066AD, 0x0759A4, 0xE03390, + 0x70CC9A, 0x524F77, 0xCDD489, 0x97DD24, 0xA81858, 0xF24513, + 0xA9C18E, 0x2A2F82, 0xC2C014, 0xB8E7F0, 0x934036, 0xD36E51, + 0xD9A089, 0xDBC587, 0xB30418, 0x969192, 0x0A5213, 0xE21841, + 0x2881EC, 0x9A293F, 0x0DF705, 0x85B497, 0xE430B9, 0xE90ECF, + 0xC15FDC, 0x9E8A7E, 0xC5472D, 0xB54FBD, 0x456AF2, 0xCA80B6, + 0xAE25FE, 0xA03B46, 0x6C6CFD, 0x78382A, 0x0E7877, 0x7F2D31, + 0x03C827, 0x61CF52, 0x339A2F, 0x2286A9, 0xE41DF0, 0x640F5C, + 0xBEF364, 0x010506, 0x6D2C21, 0x841EFF, 0x7F3B5D, 0xD98DC8, + 0x0F9421, 0xA25B0C, 0x4C2C44, 0x922392, 0xB98A8A, 0x6179B9, + 0xF7B419, 0x289AAF, 0xE92F47, 0x5E47A2, 0x82927F, 0xC7290E, + 0x6C925C, 0xBA5A3C, 0x8FB7F6, 0x9C4BEE, 0x02C529, 0x0CFCD7, + 0x5EBD8C, 0x7196E0, 0x4B917E, 0x6B9780, 0x6A1731, 0xA617FF, + 0x27A20D, 0x5A56A3, 0x43C4DB, 0xC62EA4, 0x637A84, 0x1C46F9, + 0x33C780, 0x61A278, 0x4915C9, 0xD6C776, 0x6A7C66, 0xD8DD0C, + 0xF87EB1, 0x124C43, 0x5B87E7, 0x097456, 0x3C2FA7, 0x307C4A, + 0x54267A, 0x30E34E, 0xC0CF98, 0xD75B19, 0xFADEDB, 0x12CBE8, + 0x29F24C, 0x579C7E, 0xBF3682, 0xDCB460, 0xAE08B3, 0xA524BC, + 0xC181C2, 0x5DAB90, 0x466602, 0x55345B, 0xA13941, 0x47D820, + 0x278066, 0x81B089, 0x165EFB, 0x4D27FD, 0x2BF9F4, 0x2E2FFB, + 0x6106B5, 0xE76806, 0x445A84, 0x0BDA0D, 0x49D7A4, 0x72650D, + 0xCDC55B, 0x3E16BC, 0x132F6F, 0x29E8FD, 0xE58428, 0x621E41, + 0x7D2AC4, 0xAB5697, 0xAC61EB, 0xE5DAF0, 0x654ED6, 0x8E77E3, + 0x0B2FBC, 0x2E63A3, 0xC8296A, 0x8B631F, 0x4ECCA6, 0x91859C, + 0x9E3E45, 0x0E3CC7, 0xC12454, 0xCCBCB6, 0x17979E, 0xD0D374, + 0xA489A2, 0xC6258F, 0xE8EF9E, 0x12EE26, 0xC614C2, 0x62E23E, + 0xCA8C5C, 0x409AC9, 0x511D05, 0xA88CE0, 0x195500, 0xF7144F, + 0x913BB7, 0x17D064, 0xF6C9CE, 0xAC5D11, 0xD0C313, 0xBCCCB6, + 0xAAD4FC, 0xE47B2C, 0xFE4362, 0xF2E712, 0x2D5EFF, 0x833822, + 0x58A1D7, 0x68377C, 0xE49B25, 0x22B179, 0x048796, 0x069400, + 0xE670D3, 0xD2CB85, 0x55FBE6, 0x67F281, 0xFE2DE0, 0x8CFAF2, + 0x9865BC, 0x210CD3, 0x86DD70, 0x43D00F, 0x55E279, 0x679252, + 0x8D4F58, 0xE17AC5, 0x6A6127, 0x1B0876, 0x5D8ED0, 0x701330, + 0xD5BD25, 0xC9A126, 0x57C571, 0xDC5C3F, 0xB6D34E, 0xB72383, + 0x001A9E, 0x7D36C0, 0x8151F6, 0x65D7C1, 0xE1F513, 0xCD372A, + 0xE69B0C, 0xD02685, 0x23C3EB, 0x3544CB, 0xF0BE31, 0x83F399, + 0xCB93F8, 0xFFC693, 0x908EC6, 0x8E5DE1, 0x315B7E, 0x67CE7B, + 0x40AAF7, 0x7FD285, 0x069B36, 0x03C00A, 0x13C7D5, 0x0DA14C, + 0x1EAAD4, 0x2B777F, 0x8E05C1, 0x5AD1AE, 0x60C398, 0xA4EA59, + 0x10BEED, 0x88F2FA, 0x69B941, 0xA54E70, 0xA817C3, 0xB96246, + 0xE8EEDC, 0x56D570, 0xBBEBB5, 0xD8F235, 0x201AB9, 0x9CC747, + 0x5BC2FB, 0xC877F3, 0x428CF6, 0x4EEF84, 0xBF85FD, 0xEE6D34, + 0x84C2DE, 0xC42F4C, 0x1A513B, 0x9AC41F, 0x87FFFA, 0x1CA431, + 0x714252, 0xC73FB9, 0x662D89, 0x3D83BA, 0xBDF046, 0x2E4F62, + 0x76B7C0, 0x81336C, 0xBE80A9, 0x4C9D72, 0x739A15, 0x47972C, + 0xA36A1B, 0xD31731, 0x54BA46, 0x2E8C72, 0xFEA5A5, 0x9A7E5F, + 0xC359ED, 0x8F0FFB, 0x1270DA, 0x5E9B08, 0xB0BFCB, 0x36974C, + 0x6CD8F9, 0xD02E1F, 0x1C3F2F, 0xFCF8F0, 0x4C2C6D, 0x0B2169, + 0x48B9CE, 0x42737D, 0xA8E974, 0x64062D, 0xA86C59, 0xEEC419, + 0x047C83, 0x996A23, 0xF2A4C8, 0x4BE1B8, 0x348286, 0xE84240, + 0x8337CB, 0xE55A2F, 0xC17750, 0xA4DA06, 0x64347F, 0x59A5A1, + 0xDFF53D, 0x62A571, 0xEECF3A, 0x886700, 0xC06DAF, 0x4E161F, + 0x12670E, 0xBDFE1A, 0xA72B38, 0x5BA22C, 0xFED227, 0x3FC814, + 0x150E5A, 0xE99B3A, 0x8EE9FC, 0xBC1845, 0x32373A, 0xBDA476, + 0xCEB88F, 0x7FAED3, 0xDB9116, 0x31CF72, 0x1A5136, 0xC4F362, + 0xDE4799, 0x768043, 0x386207, 0x8E5497, 0xB0EF6D, 0x6C57FB, + 0xF56664, 0xD24F05, 0xE0F702, 0x8A41EF, 0xA2EC53, 0x09731C, + 0x6157FE, 0xC5731C, 0xEF1A2E, 0x60EC10, 0xA67EFE, 0x486A73, + 0x8004F6, 0xC3F482, 0x63BA28, 0x107282, +}; +#endif diff --git a/usr/src/libm/src/LD/_TBL_sinl.c b/usr/src/libm/src/LD/_TBL_sinl.c new file mode 100644 index 0000000..4eb95a8 --- /dev/null +++ b/usr/src/libm/src/LD/_TBL_sinl.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_sinl.c 1.9 06/01/31 SMI" + +/* + * For i = 0L, ..., 75 let x(i) be the extended precision number + * whose exponent is given by 0x3ffc + ((i + 8) >> 5) and whose + * five most significant fraction bits are given by (i + 8) & 0x1f. + * (The remaining fraction bits are zero and the integer bit is 1.) + * Then _TBL_sinl_hi[i] := sin(x(i)) rounded to extended precisionL, + * and _TBL_sinl_lo[i] ~ sin(x(i)) - _TBL_sinl_hi[i]. + */ + +#include "libm.h" + +const long double _TBL_sinl_hi[] = { + 1.5561499277355604121432509e-01L, + 1.5947245893184341994353297e-01L, + 1.6332749173661285085207024e-01L, + 1.6718003236480673437500555e-01L, + 1.7103002203139501927501524e-01L, + 1.7487740199027218989302670e-01L, + 1.7872211353515365937804412e-01L, + 1.8256409800047155539783929e-01L, + 1.8640329676226988454758749e-01L, + 1.9023965123909906176839606e-01L, + 1.9407310289290979115543571e-01L, + 1.9790359322994628465735775e-01L, + 2.0173106380163880472144652e-01L, + 2.0555545620549551765724079e-01L, + 2.0937671208599364370531084e-01L, + 2.1319477313546989061102989e-01L, + 2.1700958109501015675778940e-01L, + 2.2082107775533849055107655e-01L, + 2.2462920495770529235180901e-01L, + 2.2843390459477474541995223e-01L, + 2.3223511861151146241076006e-01L, + 2.3603278900606633373558587e-01L, + 2.3982685783066156443802536e-01L, + 2.4361726719247488600575847e-01L, + 2.4740395925452292959266856e-01L, + 2.5496596041587846749013231e-01L, + 2.6251239976915328146124702e-01L, + 2.7004281671858503154006088e-01L, + 2.7755675164633632592044860e-01L, + 2.8505374594054742458945975e-01L, + 2.9253334202332754361585744e-01L, + 2.9999508337868305117438275e-01L, + 3.0743851458038085066887951e-01L, + 3.1486318131974525087106269e-01L, + 3.2226863043338662567511427e-01L, + 3.2965440993086017192298214e-01L, + 3.3702006902225307624892253e-01L, + 3.4436515814569840820730424e-01L, + 3.5168922899481405922451731e-01L, + 3.5899183454606505366498749e-01L, + 3.6627252908604756136416898e-01L, + 3.7353086823869294642950362e-01L, + 3.8076640899239019207055991e-01L, + 3.8797870972702504604426484e-01L, + 3.9516733024093423623426119e-01L, + 4.0233183177777311122311904e-01L, + 4.0947177705329506611003562e-01L, + 4.1658673028204111924766885e-01L, + 4.2367625720393801036934428e-01L, + 4.3073992511080319721861361e-01L, + 4.3777730287275513286178799e-01L, + 4.4478796096452721142060563e-01L, + 4.5177147149168377657582618e-01L, + 4.5872740821673659236961014e-01L, + 4.6565534658516018269211988e-01L, + 4.7255486375130445115036980e-01L, + 4.7942553860420300028150759e-01L, + 4.9307868575392305727079882e-01L, + 5.0661145481425736764773474e-01L, + 5.2002054195372700474845132e-01L, + 5.3330267353602017331871271e-01L, + 5.4645460691920356440616155e-01L, + 5.5947313124736687740433047e-01L, + 5.7235506823450724037203458e-01L, + 5.8509727294046215482874185e-01L, + 5.9769663453870153121657086e-01L, + 6.1015007707579137127265265e-01L, + 6.2245456022234368301943030e-01L, + 6.3460708001526929683284300e-01L, + 6.4660466959115237050095826e-01L, + 6.5844439991056754159573505e-01L, + 6.7012338047316289465094724e-01L, + 6.8163876002333416675559724e-01L, + 6.9298772724631791026551897e-01L, + 7.0416751145453367277888060e-01L, +}; + +const long double _TBL_sinl_lo[] = { + -4.4044420388485708604352042e-21L, + -9.3658505779466794663857779e-22L, + -5.2040678607071393508410817e-21L, + -4.0395267481940078256007650e-21L, + 6.3327332576496468315469778e-21L, + 2.6586707822142093837984364e-21L, + -2.6878787450050744237345282e-21L, + 1.7063635662305595250654237e-21L, + 4.7924921282538555045455343e-21L, + -4.4101691066939302183010470e-21L, + 6.1948600915447822830980496e-22L, + -4.9638413649749502251618971e-21L, + 3.5916271597651546227926473e-21L, + -4.0777150323673712797756569e-22L, + 6.5799136599779898603647660e-21L, + 5.0431441802236271279596547e-21L, + 2.7886967636804383702412094e-21L, + 1.7797941915507094664564119e-21L, + -1.3804554392939635583829251e-21L, + 4.7855981187615466625152631e-21L, + 3.1708211390406997503004900e-21L, + -1.5157834044725652569873263e-22L, + 3.3438946731684019204631903e-21L, + 6.4724798056855877111175401e-21L, + 4.1801428671953314697839700e-21L, + -2.5757365367012227482016023e-21L, + -1.0297394515771810295074032e-20L, + 1.2694179637735656722464528e-20L, + 1.5748512781011179565308673e-21L, + -1.6967184859202905792705521e-21L, + 8.8448858652331336251731737e-21L, + -1.1134468969040340293241825e-20L, + 1.6234471791025321420471804e-21L, + -6.0263738196054484651751291e-21L, + 1.2631652295822646843414172e-20L, + -8.6644101687582762853694906e-21L, + 1.2359222174923859397271358e-20L, + -1.3219821587241831508027981e-22L, + -1.9324110998995296922101291e-21L, + 1.2722808830089214240915385e-20L, + 8.7403704479785940299212466e-21L, + -1.2663863629342751015966219e-20L, + -1.3359206065200525634208487e-20L, + 6.8148547822187652382727319e-21L, + 1.0571450573402892191582257e-20L, + -6.0134413552023063129130024e-21L, + 1.2658405457632407447211937e-20L, + 1.1443598275137284797608912e-20L, + -7.6602922503647693246330011e-21L, + -2.2920876394624080624512678e-21L, + -1.6901640257671788285214336e-22L, + 1.2450383440926973698285013e-20L, + 5.8625687909310643361252782e-21L, + 7.6848891207540014891539434e-21L, + -1.0920363727912466924531705e-20L, + -3.8184802762435242280438906e-21L, + -8.2196498741416868399433703e-21L, + -5.6622687407305065056015130e-21L, + -5.4387357437209102117877930e-21L, + 1.1762381857741709383097597e-20L, + 1.0418391756080576218864700e-20L, + -2.8119958331524728369894697e-21L, + -1.9486464776808433575964276e-20L, + 1.2919131320458122775352322e-20L, + -2.3342533395278737494836457e-20L, + 2.2076763147253802020227787e-20L, + 1.0897468372542621634126622e-21L, + 2.2496400209117994020651730e-20L, + 1.7466909662624346932394383e-20L, + 2.3083902445127091336067492e-20L, + -6.1510978111621596519832919e-21L, + 3.5843424075843715436394953e-21L, + -2.2355288181001597796661994e-20L, + 1.6296521874464521140945741e-20L, + 1.1789113655896899561477559e-21L, +}; diff --git a/usr/src/libm/src/LD/_TBL_tanl.c b/usr/src/libm/src/LD/_TBL_tanl.c new file mode 100644 index 0000000..1104207 --- /dev/null +++ b/usr/src/libm/src/LD/_TBL_tanl.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_tanl.c 1.9 06/01/31 SMI" + +/* + * For i = 0L, ..., 75 let x(i) be the extended precision number + * whose exponent is given by 0x3ffc + ((i + 8) >> 5) and whose + * five most significant fraction bits are given by (i + 8) & 0x1f. + * (The remaining fraction bits are zero and the integer bit is 1.) + * Then _TBL_tanl_hi[i] := tan(x(i)) rounded to extended precisionL, + * and _TBL_tanl_lo[i] ~ tan(x(i)) - _TBL_tanl_hi[i]. + */ + +#include "libm.h" + +const long double _TBL_tanl_hi[] = { + 1.5753410732527161068790289e-01L, + 1.6153978404952147631388516e-01L, + 1.6555051927393397620861225e-01L, + 1.6956644521976651014845677e-01L, + 1.7358769476798152084980487e-01L, + 1.7761440147744672763405801e-01L, + 1.8164669960332142765752766e-01L, + 1.8568472411563441162006289e-01L, + 1.8972861071805913288790962e-01L, + 1.9377849586689186352228293e-01L, + 1.9783451679023866881187727e-01L, + 2.0189681150741713288741981e-01L, + 2.0596551884857887210688535e-01L, + 2.1004077847455898084587031e-01L, + 2.1412273089695866488913964e-01L, + 2.1821151749846743250413339e-01L, + 2.2230728055343133087249762e-01L, + 2.2641016324867383747423879e-01L, + 2.3052030970457614146129199e-01L, + 2.3463786499642367899603687e-01L, + 2.3876297517602592026663300e-01L, + 2.4289578729361654240565243e-01L, + 2.4703644942004126466383960e-01L, + 2.5118511066924076739260464e-01L, + 2.5534192122103626651019939e-01L, + 2.6368059641999679984405817e-01L, + 2.7205369865877088343545168e-01L, + 2.8046247014525140317325012e-01L, + 2.8890817244051472599780488e-01L, + 2.9739208726902458947518627e-01L, + 3.0591551735305926411887835e-01L, + 3.1447978727257151616261872e-01L, + 3.2308624435174552010563084e-01L, + 3.3173625957357276734381764e-01L, + 3.4043122852383038743446717e-01L, + 3.4917257236591035224446307e-01L, + 3.5796173884801699838350761e-01L, + 3.6680020334432342273152904e-01L, + 3.7568946993175484041940608e-01L, + 3.8463107250414922303567364e-01L, + 3.9362657592563275821902387e-01L, + 4.0267757722514021178576021e-01L, + 4.1178570683410847577655099e-01L, + 4.2095262986947582208789413e-01L, + 4.3018004746423004901363651e-01L, + 4.3946969814786624047050871e-01L, + 4.4882335927923970884728319e-01L, + 4.5824284853443236696884759e-01L, + 4.6773002545239179993303603e-01L, + 4.7728679304125226171028919e-01L, + 4.8691509944840632450355038e-01L, + 4.9661693969756562569970761e-01L, + 5.0639435749622981205141092e-01L, + 5.1624944711717514451250130e-01L, + 5.2618435535777914417981255e-01L, + 5.3620128358121603136601796e-01L, + 5.4630248984379051326943158e-01L, + 5.6676706558058644568054429e-01L, + 5.8759736759144322142123240e-01L, + 6.0881374032438072139072557e-01L, + 6.3043767383588476685765678e-01L, + 6.5249189792880799270563541e-01L, + 6.7500048514424290766085257e-01L, + 6.9798896362359925515245207e-01L, + 7.2148444099090441996918396e-01L, + 7.4551574055939199512374818e-01L, + 7.7011355134420870501661335e-01L, + 7.9531059356867418562312202e-01L, + 8.2114180158989412189243090e-01L, + 8.4764452644655265410892839e-01L, + 8.7485876055448234952464232e-01L, + 9.0282738745267350217570818e-01L, + 9.3159645994407246116005700e-01L, + 9.6121551049437041616208335e-01L, + 9.9173789836326868026407724e-01L, +}; + +const long double _TBL_tanl_lo[] = { + -2.6771159409105731701405510e-21L, + -4.6099226789741262900210606e-21L, + 5.3186644140375322820802458e-21L, + 2.5138405830938633735686839e-21L, + -5.1314617057806432706999694e-21L, + -2.3150818458524320771936317e-21L, + 7.4823150688409589857878346e-22L, + 6.5983384951777057330962451e-21L, + 3.1737465070309238679637904e-21L, + -6.2605330413009742107992404e-21L, + -3.4708968895421512574248288e-21L, + -3.3508177722855547163047103e-21L, + 1.8539761255947162282442845e-21L, + -4.3527863815358994574071238e-21L, + -3.0729582373746958079080308e-21L, + 1.7486583794617176080777995e-21L, + -2.0880427643688559927261666e-22L, + 3.4326156341633317484064051e-21L, + -5.8444712515543005993510667e-21L, + 3.3308393583864583403400180e-21L, + 1.5180609545016167494014088e-21L, + -4.5664864992230118395870971e-21L, + -3.4486635382887607253671356e-22L, + 6.4992471510018586950169590e-21L, + -5.7171552644357921603079772e-21L, + 1.0767820312749142840542796e-20L, + -8.8873094864264944929118678e-21L, + -3.6458345495736833933253427e-21L, + 3.7835691968285101289024150e-21L, + -7.9922577212991920007926665e-21L, + -1.1639426061963512311797196e-20L, + 1.0819496381458482697046145e-20L, + 1.2669812351932848585361942e-20L, + -4.3879352642165387665557942e-21L, + -6.2397232294970361376981025e-21L, + 1.0249894624181563425318369e-20L, + 4.8883545518509990780582976e-21L, + -1.0924217224719888561366811e-20L, + -1.0160304466598813882209781e-20L, + 5.1826415091471411711448075e-21L, + 1.0389918683332972349077236e-20L, + -7.1664776574714262163862363e-21L, + 1.2298884220333748071625466e-20L, + -1.3099990378137383497651040e-20L, + 6.6930911371536844477108605e-21L, + 1.3154437144468699485999317e-20L, + -6.7276672708135125503950130e-21L, + -9.6583948799780933132703713e-21L, + -1.1693327591353762422287158e-20L, + 1.2115072030396340314945014e-20L, + -4.8328734014430698289025015e-21L, + 1.0852973061445293626693228e-20L, + 1.9411831283588255256712679e-20L, + -2.0725962316575506668083850e-20L, + -9.1991091819589918968351350e-21L, + -1.8439030785497371079388971e-20L, + -1.4252114398617735096821730e-20L, + -3.6634999903039053547935623e-22L, + -2.7073538111310219812185487e-20L, + 2.1768400635771833866020006e-20L, + -5.0453509036808273670769239e-21L, + 1.8262326404957249986102613e-20L, + 2.3253788272891224529527726e-21L, + -2.6863465601726641017825874e-21L, + 2.0333919445169836552474035e-20L, + 1.2381983326738354735338055e-20L, + -1.0629693225258909983165405e-20L, + 2.2479666845586239075466463e-20L, + 1.8993064919061156630226362e-20L, + -1.8140078592138587341953739e-20L, + 1.5029592868184122759494625e-20L, + 2.0466189644006868146496769e-20L, + 5.1457594757697525471406575e-21L, + 2.3217272240793119168128789e-20L, + -7.2198528398134119662230907e-21L, +}; diff --git a/usr/src/libm/src/LD/__cosl.c b/usr/src/libm/src/LD/__cosl.c new file mode 100644 index 0000000..ddd9f50 --- /dev/null +++ b/usr/src/libm/src/LD/__cosl.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__cosl.c 1.7 06/01/31 SMI" + +/* INDENT OFF */ +/* + * __k_cosl( long double x; long double y ) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by cos(-x) = cos(x), we may replace x by |x| + * 2. if x < 25/128 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc60000 0), return 1.0 with inexact if x!= 0 + * z = x*x; + * if x <= 1/128 = 2**-7 = 0.0078125 + * cos(x)=1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + * else + * cos(x)=1.0+z*(q1+ ... z*q8) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * cos(t+x') + * = cos(t)cos(x')-sin(t)sin(x') + * = cos(t)(1+z*(qq1+z*qq2))-[sin(t)]*x*(1+z*(pp1+z*pp2)) + * = cos(t) + [cos(t)]*(z*(qq1+z*qq2))- + * [sin(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_cos_hi[i], b = _TBL_cos_lo[i], c= _TBL_sin_hi[i], + * x = (x-t)+y + * z = x*x; + * cos(t+x) = a+(b+ (-c*x*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +extern const long double _TBL_cosl_hi[], _TBL_cosl_lo[], _TBL_sinl_hi[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+ pp5*x^11 )| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * + * |cos(x) - (1+q1*x^2+...+q8*x^16)| <= 2^-117.11 for |x|<= 0.15625 + */ +q1 = -4.999999999999999999999999999999756416975e-0001L, +q2 = 4.166666666666666666666666664006066577258e-0002L, +q3 = -1.388888888888888888888877700363937169637e-0003L, +q4 = 2.480158730158730158494468463031814083559e-0005L, +q5 = -2.755731922398586276322819250356005542871e-0007L, +q6 = 2.087675698767424261441959760729854017855e-0009L, +q7 = -1.147074481239662089072452129010790774761e-0011L, +q8 = 4.777761647399651599730663422263531034782e-0014L, +/* + * + * |cos(x) - (1+qq1*x^2+...+ qq5*x^10)| <= 2^-123.84 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L; +/* INDENT ON */ +long double +__k_cosl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0; +#if !defined(__i386) && !defined(__amd64) + hx = px[0]; +#else + XTOI(px, hx); +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if ((i = (int) x) == 0) + return (one); /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) /* 0.0078125 */ + return (one + z * (qq1 + z * (qq2 + z * (qq3 + z * + (qq4 + z * qq5))))); + else + return (one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * (q7 + z * q8)))))))); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if !defined(__i386) && !defined(__amd64) + pt[0] = j; +#else + ITOX(j, pt); +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_cosl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + t = _TBL_cosl_lo[i] - (_TBL_sinl_hi[i] * w - a * t); + return (a + t); +} diff --git a/usr/src/libm/src/LD/__lgammal.c b/usr/src/libm/src/LD/__lgammal.c new file mode 100644 index 0000000..ce59cfe --- /dev/null +++ b/usr/src/libm/src/LD/__lgammal.c @@ -0,0 +1,395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__lgammal.c 1.4 06/01/31 SMI" + +/* long double __k_lgammal(long double x, int *signgamlp); + * K.C. Ng, August, 1989. + * + * We choose [1.5,2.5] to be the primary interval. Our algorithms + * are mainly derived from + * + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-euler) + --------- * s - --------- * s + ... + * 2 3 + * + * + * Note 1. Since gamma(1+s)=s*gamma(s), hence + * lgamma(1+s) = log(s) + lgamma(s), or + * lgamma(s) = lgamma(1+s) - log(s). + * When s is really tiny (like roundoff), lgamma(1+s) ~ s(1-enler) + * Hence lgamma(s) ~ -log(s) for tiny s + * + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +static long double neg(long double, int *); +static long double poly(long double, const long double *, int); +static long double polytail(long double); +static long double primary(long double); + +static const long double +c0 = 0.0L, +ch = 0.5L, +c1 = 1.0L, +c2 = 2.0L, +c3 = 3.0L, +c4 = 4.0L, +c5 = 5.0L, +c6 = 6.0L, +pi = 3.1415926535897932384626433832795028841971L, +tiny = 1.0e-40L; + +long double +__k_lgammal(long double x, int *signgamlp) { + long double t,y; + int i; + + /* purge off +-inf, NaN and negative arguments */ + if(!finitel(x)) return x*x; + *signgamlp = 1; + if(signbitl(x)) return(neg(x,signgamlp)); + + /* for x < 8.0 */ + if(x<8.0L) { + y = anintl(x); + i = (int) y; + switch(i) { + case 0: + if(x<1.0e-40L) return -logl(x); else + return (primary(x)-log1pl(x))-logl(x); + case 1: + return primary(x-y)-logl(x); + case 2: + return primary(x-y); + case 3: + return primary(x-y)+logl(x-c1); + case 4: + return primary(x-y)+logl((x-c1)*(x-c2)); + case 5: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)); + case 6: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)); + case 7: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)); + case 8: + return primary(x-y)+ + logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)*(x-c6)); + } + } + + /* 8.0 <= x < 1.0e40 */ + if (x < 1.0e40L) { + t = logl(x); + return x*(t-c1)-(ch*t-polytail(c1/x)); + } + + /* 1.0e40 <= x <= inf */ + return x*(logl(x)-c1); +} + +static const long double an1[] = { /* 20 terms */ + -0.0772156649015328606065120900824024309741L, + 3.224670334241132182362075833230130289059e-0001L, + -6.735230105319809513324605383668929964120e-0002L, + 2.058080842778454787900092432928910226297e-0002L, + -7.385551028673985266273054086081102125704e-0003L, + 2.890510330741523285758867304409628648727e-0003L, + -1.192753911703260976581414338096267498555e-0003L, + 5.096695247430424562831956662855697824035e-0004L, + -2.231547584535777978926798502084300123638e-0004L, + 9.945751278186384670278268034322157947635e-0005L, + -4.492623673665547726647838474125147631082e-0005L, + 2.050721280617796810096993154281561168706e-0005L, + -9.439487785617396552092393234044767313568e-0006L, + 4.374872903516051510689234173139793159340e-0006L, + -2.039156676413643091040459825776029327487e-0006L, + 9.555777181318621470466563543806211523634e-0007L, + -4.468344919709630637558538313482398989638e-0007L, + 2.216738086090045781773004477831059444178e-0007L, + -7.472783403418388455860445842543843485916e-0008L, + 8.777317930927149922056782132706238921648e-0008L, +}; + +static const long double an2[] = { /* 20 terms */ + -.0772156649015328606062692723698127607018L, + 3.224670334241132182635552349060279118047e-0001L, + -6.735230105319809367555642883133994818325e-0002L, + 2.058080842778459676880822202762143671813e-0002L, + -7.385551028672828216011343150077846918930e-0003L, + 2.890510330762060607399561536905727853178e-0003L, + -1.192753911419623262328187532759756368041e-0003L, + 5.096695278636456678258091134532258618614e-0004L, + -2.231547306817535743052975194022893369135e-0004L, + 9.945771461633313282744264853986643877087e-0005L, + -4.492503279458972037926876061257489481619e-0005L, + 2.051311416812082875492678651369394595613e-0005L, + -9.415778282365955203915850761537462941165e-0006L, + 4.452428829045147098722932981088650055919e-0006L, + -1.835024727987632579886951760650722695781e-0006L, + 1.379783080658545009579060714946381462565e-0006L, + 2.282637532109775156769736768748402175238e-0007L, + 1.002577375515900191362119718128149880168e-0006L, + 5.177028794262638311939991106423220002463e-0007L, + 3.127947245174847104122426445937830555755e-0007L, +}; + +static const long double an3[] = { /* 20 terms */ + -.0772156649015328227870646417729220690875L, + 3.224670334241156699881788955959915250365e-0001L, + -6.735230105312273571375431059744975563170e-0002L, + 2.058080842924464587662846071337083809005e-0002L, + -7.385551008677271654723604653956131791619e-0003L, + 2.890510536479782086197110272583833176602e-0003L, + -1.192752262076857692740571567808259138697e-0003L, + 5.096800771149805289371135155128380707889e-0004L, + -2.231000836682831335505058492409860123647e-0004L, + 9.968912171073936803871803966360595275047e-0005L, + -4.412020779327746243544387946167256187258e-0005L, + 2.281374113541454151067016632998630209049e-0005L, + -4.028361291428629491824694655287954266830e-0006L, + 1.470694920619518924598956849226530750139e-0005L, + 1.381686137617987197975289545582377713772e-0005L, + 2.012493539265777728944759982054970441601e-0005L, + 1.723917864208965490251560644681933675799e-0005L, + 1.202954035243788300138608765425123713395e-0005L, + 5.079851887558623092776296577030850938146e-0006L, + 1.220657945824153751555138592006604026282e-0006L, +}; + +static const long double an4[] = { /* 21 terms */ + -.0772156649015732285350261816697540392371L, + 3.224670334221752060691751340365212226097e-0001L, + -6.735230109744009693977755991488196368279e-0002L, + 2.058080778913037626909954141611580783216e-0002L, + -7.385557567931505621170483708950557506819e-0003L, + 2.890459838416254326340844289785254883436e-0003L, + -1.193059036207136762877351596966718455737e-0003L, + 5.081914708100372836613371356529568937869e-0004L, + -2.289855016133600313131553005982542045338e-0004L, + 8.053454537980585879620331053833498511491e-0005L, + -9.574620532104845821243493405855672438998e-0005L, + -9.269085628207107155601445001196317715686e-0005L, + -2.183276779859490461716196344776208220180e-0004L, + -3.134834305597571096452454999737269668868e-0004L, + -3.973878894951937437018305986901392888619e-0004L, + -3.953352414899222799161275564386488057119e-0004L, + -3.136740932204038779362660900621212816511e-0004L, + -1.884502253819634073946130825196078627664e-0004L, + -8.192655799958926853585332542123631379301e-0005L, + -2.292183750010571062891605074281744854436e-0005L, + -3.223980628729716864927724265781406614294e-0006L, +}; + +static const long double ap1[] = { /* 19 terms */ + -0.0772156649015328606065120900824024296961L, + 3.224670334241132182362075833230047956465e-0001L, + -6.735230105319809513324605382963943777301e-0002L, + 2.058080842778454787900092126606252375465e-0002L, + -7.385551028673985266272518231365020063941e-0003L, + 2.890510330741523285681704570797770736423e-0003L, + -1.192753911703260971285304221165990244515e-0003L, + 5.096695247430420878696018188830886972245e-0004L, + -2.231547584535654004647639737841526025095e-0004L, + 9.945751278137201960636098805852315982919e-0005L, + -4.492623672777606053587919463929044226280e-0005L, + 2.050721258703289487603702670753053765201e-0005L, + -9.439485626565616989352750672499008021041e-0006L, + 4.374838162403994645138200419356844574219e-0006L, + -2.038979492862555348577006944451002161496e-0006L, + 9.536763152382263548086981191378885102802e-0007L, + -4.426111214332434049863595231916564014913e-0007L, + 1.911148847512947464234633846270287546882e-0007L, + -5.788673944861923038157839080272303519671e-0008L, +}; + +static const long double ap2[] = { /* 19 terms */ + -0.077215664901532860606428624449354836087L, + 3.224670334241132182271948744265855440139e-0001L, + -6.735230105319809467356126599005051676203e-0002L, + 2.058080842778453315716389815213496002588e-0002L, + -7.385551028673653323064118422580096222959e-0003L, + 2.890510330735923572088003424849289006039e-0003L, + -1.192753911629952368606185543945790688144e-0003L, + 5.096695239806718875364547587043220998766e-0004L, + -2.231547520600616108991867127392089144886e-0004L, + 9.945746913898151120612322833059416008973e-0005L, + -4.492599307461977003570224943054585729684e-0005L, + 2.050609891889165453592046505651759999090e-0005L, + -9.435329866734193796540515247917165988579e-0006L, + 4.362267138522223236241016136585565144581e-0006L, + -2.008556356653246579300491601497510230557e-0006L, + 8.961498103387207161105347118042844354395e-0007L, + -3.614187228330216282235692806488341157741e-0007L, + 1.136978988247816860500420915014777753153e-0007L, + -2.000532786387196664019286514899782691776e-0008L, +}; + +static const long double ap3[] = { /* 19 terms */ + -0.077215664901532859888521470795348856446L, + 3.224670334241131733364048614484228443077e-0001L, + -6.735230105319676541660495145259038151576e-0002L, + 2.058080842775975461837768839015444273830e-0002L, + -7.385551028347615729728618066663566606906e-0003L, + 2.890510327517954083379032008643080256676e-0003L, + -1.192753886919470728001821137439430882603e-0003L, + 5.096693728898932234814903769146577482912e-0004L, + -2.231540055048827662528594010961874258037e-0004L, + 9.945446210018649311491619999438833843723e-0005L, + -4.491608206598064519190236245753867697750e-0005L, + 2.047939071322271016498065052853746466669e-0005L, + -9.376824046522786006677541036631536790762e-0006L, + 4.259329829498149111582277209189150127347e-0006L, + -1.866064770421594266702176289764212873428e-0006L, + 7.462066721137579592928128104534957135669e-0007L, + -2.483546217529077735074007138457678727371e-0007L, + 5.915166576378161473299324673649144297574e-0008L, + -7.334139641706988966966252333759604701905e-0009L, +}; + +static const long double ap4[] = { /* 19 terms */ + -0.0772156649015326785569313252637238673675L, + 3.224670334241051435008842685722468344822e-0001L, + -6.735230105302832007479431772160948499254e-0002L, + 2.058080842553481183648529360967441889912e-0002L, + -7.385551007602909242024706804659879199244e-0003L, + 2.890510182473907253939821312248303471206e-0003L, + -1.192753098427856770847894497586825614450e-0003L, + 5.096659636418811568063339214203693550804e-0004L, + -2.231421144004355691166194259675004483639e-0004L, + 9.942073842343832132754332881883387625136e-0005L, + -4.483809261973204531263252655050701205397e-0005L, + 2.033260142610284888319116654931994447173e-0005L, + -9.153539544026646699870528191410440585796e-0006L, + 3.988460469925482725894144688699584997971e-0006L, + -1.609692980087029172567957221850825977621e-0006L, + 5.634916377249975825399706694496688803488e-0007L, + -1.560065465929518563549083208482591437696e-0007L, + 2.961350193868935325526962209019387821584e-0008L, + -2.834602215195368130104649234505033159842e-0009L, +}; + +static long double +primary(long double s) { /* assume |s|<=0.5 */ + int i; + + i = (int) (8.0L * (s + 0.5L)); + switch(i) { + case 0: return ch*s+s*poly(s,an4,21); + case 1: return ch*s+s*poly(s,an3,20); + case 2: return ch*s+s*poly(s,an2,20); + case 3: return ch*s+s*poly(s,an1,20); + case 4: return ch*s+s*poly(s,ap1,19); + case 5: return ch*s+s*poly(s,ap2,19); + case 6: return ch*s+s*poly(s,ap3,19); + case 7: return ch*s+s*poly(s,ap4,19); + } + /* NOTREACHED */ +} + +static long double +poly(long double s, const long double *p, int n) { + long double y; + int i; + y = p[n-1]; + for (i=n-2;i>=0;i--) y = p[i]+s*y; + return y; +} + +static const long double pt[] = { + 9.189385332046727417803297364056176804663e-0001L, + 8.333333333333333333333333333331286969123e-0002L, + -2.777777777777777777777777553194796036402e-0003L, + 7.936507936507936507927283071433584248176e-0004L, + -5.952380952380952362351042163192634108297e-0004L, + 8.417508417508395661774286645578379460131e-0004L, + -1.917526917525263651186066417934685675649e-0003L, + 6.410256409395203164659292973142293199083e-0003L, + -2.955065327248303301763594514012418438188e-0002L, + 1.796442830099067542945998615411893822886e-0001L, + -1.392413465829723742489974310411118662919e+0000L, + 1.339984238037267658352656597960492029261e+0001L, + -1.564707657605373662425785904278645727813e+0002L, + 2.156323807499211356127813962223067079300e+0003L, + -3.330486427626223184647299834137041307569e+0004L, + 5.235535072011889213611369254140123518699e+0005L, + -7.258160984602220710491988573430212593080e+0006L, + 7.316526934569686459641438882340322673357e+0007L, + -3.806450279064900548836571789284896711473e+0008L, +}; + +static long double +polytail(long double s) { + long double t,z; + int i; + z = s*s; + t = pt[18]; + for (i=17;i>=1;i--) t = pt[i]+z*t; + return pt[0]+s*t; +} + +static long double +neg(long double z, int *signgamlp) { + long double t,p; + + /* + * written by K.C. Ng, Feb 2, 1989. + * + * Since + * -z*G(-z)*G(z) = pi/sin(pi*z), + * we have + * G(-z) = -pi/(sin(pi*z)*G(z)*z) + * = pi/(sin(pi*(-z))*G(z)*z) + * Algorithm + * z = |z| + * t = sinpi(z); ...note that when z>2**112, z is an int + * and hence t=0. + * + * if(t==0.0) return 1.0/0.0; + * if(t< 0.0) *signgamlp = -1; else t= -t; + * if(z<1.0e-40) ...tiny z + * return -log(z); + * else + * return log(pi/(t*z))-lgamma(z); + * + */ + + t = sinpil(z); /* t := sin(pi*z) */ + if (t==c0) /* return 1.0/0.0 = +INF */ + return c1/c0; + + z = -z; + if(z<=tiny) + p = -logl(z); + else + p = logl(pi/(fabsl(t)*z))-__k_lgammal(z,signgamlp); + if(t=0;i--) t = p[i] + x*t; + return t; +} diff --git a/usr/src/libm/src/LD/__rem_pio2l.c b/usr/src/libm/src/LD/__rem_pio2l.c new file mode 100644 index 0000000..9d61169 --- /dev/null +++ b/usr/src/libm/src/LD/__rem_pio2l.c @@ -0,0 +1,77 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__rem_pio2l.c 1.10 06/01/31 SMI" + +/* __rem_pio2l(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] + * by calling __rem_pio2m + */ + +#include "libm.h" + +extern const int _TBL_ipio2l_inf[]; + +static const long double + two24l = 16777216.0L, + pio4 = 0.7853981633974483096156608458198757210495L; + +int +__rem_pio2l(long double x, long double *y) +{ + long double z, w; + double t[3], v[5]; + int e0, i, nx, n, sign; + + sign = signbitl(x); + z = fabsl(x); + if (z <= pio4) { + y[0] = x; + y[1] = 0; + return (0); + } + e0 = ilogbl(z) - 23; + z = scalbnl(z, -e0); + for (i = 0; i < 3; i++) { + t[i] = (double)((int)(z)); + z = (z - (long double)t[i]) * two24l; + } + nx = 3; + while (t[nx-1] == 0.0) + nx--; /* omit trailing zeros */ + n = __rem_pio2m(t, v, e0, nx, 2, _TBL_ipio2l_inf); + z = (long double)v[1]; + w = (long double)v[0]; + y[0] = z + w; + y[1] = z - (y[0] - w); + if (sign == 1) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); +} diff --git a/usr/src/libm/src/LD/__sincosl.c b/usr/src/libm/src/LD/__sincosl.c new file mode 100644 index 0000000..7960081 --- /dev/null +++ b/usr/src/libm/src/LD/__sincosl.c @@ -0,0 +1,151 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sincosl.c 1.7 06/01/31 SMI" + +/* INDENT OFF */ +/* + * long double __k_sincos( long double x, long double y, long double *c ) + * kernel sincosl function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * return sinl(x) with *c = cosl(x) + * + * Table look up algorithm + * see __k_sinl() and __k_cosl() + */ + +#include "libm.h" + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], _TBL_cosl_hi[], + _TBL_cosl_lo[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+pp5*x^11)| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ +p1 = -1.666666666666666666666666666666211262297e-0001L, +p2 = 8.333333333333333333333333301497876908541e-0003L, +p3 = -1.984126984126984126984041302881180621922e-0004L, +p4 = 2.755731922398589064100587351307269621093e-0006L, +p5 = -2.505210838544163129378906953765595393873e-0008L, +p6 = 1.605904383643244375050998243778534074273e-0010L, +p7 = -7.647162722800685516901456114270824622699e-0013L, +p8 = 2.810046428661902961725428841068844462603e-0015L, +/* + * + * |cos(x) - (1+qq1*x^2+...+ qq5*x^10)| <= 2^-123.84 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * + * |cos(x) - (1+q1*x^2+...+ q8*x^16)| <= 2^-117.11 for |x|<= 0.15625 + */ +q1 = -4.999999999999999999999999999999756416975e-0001L, +q2 = 4.166666666666666666666666664006066577258e-0002L, +q3 = -1.388888888888888888888877700363937169637e-0003L, +q4 = 2.480158730158730158494468463031814083559e-0005L, +q5 = -2.755731922398586276322819250356005542871e-0007L, +q6 = 2.087675698767424261441959760729854017855e-0009L, +q7 = -1.147074481239662089072452129010790774761e-0011L, +q8 = 4.777761647399651599730663422263531034782e-0014L; +/* INDENT ON */ +long double +__k_sincosl(long double x, long double y, long double *c) { + long double a1, a2, t, t1, t2, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0; +#if !defined(__i386) && !defined(__amd64) + hx = px[0]; +#else + XTOI(px, hx); +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if (((int) x) == 0) { + *c = one; + return (x); + } /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) { + *c = one + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + + z * qq5)))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * p6))))); + } else { + *c = one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + z * + (q5 + z * (q6 + z * (q7 + z * q8))))))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * (p6 + z * (p7 + z * p8))))))); + } + + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if !defined(__i386) && !defined(__amd64) + pt[0] = j; +#else + ITOX(j, pt); +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a1 = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + a2 = _TBL_cosl_hi[i]; + t2 = _TBL_cosl_lo[i] - (a1 * w - a2 * t); + *c = a2 + t2; + t1 = a2 * w + a1 * t; + t1 += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a1 - t1); + else + return (a1 + t1); +} diff --git a/usr/src/libm/src/LD/__sinl.c b/usr/src/libm/src/LD/__sinl.c new file mode 100644 index 0000000..375aca0 --- /dev/null +++ b/usr/src/libm/src/LD/__sinl.c @@ -0,0 +1,145 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sinl.c 1.8 06/01/31 SMI" + +/* INDENT OFF */ +/* + * __k_sinl( long double x; long double y ) + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by sin(-x) = -sin(x), need only to consider positive x + * 2. if x < 25/128 = [0x3ffc9000,0,0,0] = 0.1953125 , then + * if x < 2^-57 (hx < 0x3fc60000,0,0,0), return x (inexact if x!= 0) + * z = x*x; + * if x <= 1/64 = 2**-6 + * sin(x) = x + (y+(x*z)*(p1 + z*p2)) + * else + * sin(x) = x + (y+(x*z)*(p1 + z*(p2 + z*(p3 + z*p4)))) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * sin(t+x') + * = sin(t)cos(x')+cos(t)sin(x') + * = sin(t)(1+z*(qq1+z*qq2))+[cos(t)]*x*(1+z*(pp1+z*pp2)) + * = sin(t) + [sin(t)]*(z*(qq1+z*qq2))+ + * [cos(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_sin_hi[i], b = _TBL_sin_lo[i], c= _TBL_cos_hi[i], + * x = (x-t)+y + * z = x*x; + * sin(t+x) = a+(b+ ((c*x)*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], _TBL_cosl_hi[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+ pp5*x^11)| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ +p1 = -1.666666666666666666666666666666211262297e-0001L, +p2 = 8.333333333333333333333333301497876908541e-0003L, +p3 = -1.984126984126984126984041302881180621922e-0004L, +p4 = 2.755731922398589064100587351307269621093e-0006L, +p5 = -2.505210838544163129378906953765595393873e-0008L, +p6 = 1.605904383643244375050998243778534074273e-0010L, +p7 = -7.647162722800685516901456114270824622699e-0013L, +p8 = 2.810046428661902961725428841068844462603e-0015L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L; +/* INDENT ON */ +long double +__k_sinl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; +#if !defined(__i386) && !defined(__amd64) + hx = px[0]; +#else + XTOI(px, hx); +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc9000) { + if (ix < 0x3fc60000) + if (((int) x) == 0) + return (x); /* generate inexact */ + z = x * x; + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + z * + (p6 + z * (p7 + z * p8))))))); + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if !defined(__i386) && !defined(__amd64) + pt[0] = j; +#else + ITOX(j, pt); +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + t = _TBL_cosl_hi[i] * w + a * t; + t += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a - t); + else + return (a + t); +} diff --git a/usr/src/libm/src/LD/__tanl.c b/usr/src/libm/src/LD/__tanl.c new file mode 100644 index 0000000..d32299c --- /dev/null +++ b/usr/src/libm/src/LD/__tanl.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__tanl.c 1.7 06/01/31 SMI" + +/* INDENT OFF */ +/* + * __k_tanl( long double x; long double y; int k ) + * kernel tan/cotan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicate -- tan if k=0; else -1/tan + * + * Table look up algorithm + * 1. by tan(-x) = -tan(x), need only to consider positive x + * 2. if x < 5/32 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc40000 0), set w=x with inexact if x!= 0 + * else + * z = x*x; + * w = x + (y+(x*z)*(t1+z*(t2+z*(t3+z*(t4+z*(t5+z*t6)))))) + * return (k == 0 ? w : 1/w); + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7) + * By + * tan(t+x') + * = (tan(t)+tan(x'))/(1-tan(x')tan(t)) + * We have + * sin(x')+tan(t)*(tan(t)*sin(x')) + * = tan(t) + ------------------------------- for k=0 + * cos(x') - tan(t)*sin(x') + * + * cos(x') - tan(t)*sin(x') + * = - -------------------------------------- for k=1 + * tan(t) + tan(t)*(cos(x')-1) + sin(x') + * + * + * where tan(t) is from the table, + * sin(x') = x + pp1*x^3 + ...+ pp5*x^11 + * cos(x') = 1 + qq1*x^2 + ...+ qq5*x^10 + */ + +#include "libm.h" + +extern const long double _TBL_tanl_hi[], _TBL_tanl_lo[]; +static const long double +one = 1.0, +/* + * |sin(x) - (x+pp1*x^3+...+ pp5*x^11)| <= 2^-122.32 for |x|<1/64 + */ +pp1 = -1.666666666666666666666666666586782940810e-0001L, +pp2 = 8.333333333333333333333003723660929317540e-0003L, +pp3 = -1.984126984126984076045903483778337804470e-0004L, +pp4 = 2.755731922361906641319723106210900949413e-0006L, +pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ +qq1 = -4.999999999999999999999999999999378373641e-0001L, +qq2 = 4.166666666666666666666665478399327703130e-0002L, +qq3 = -1.388888888888888888058211230618051613494e-0003L, +qq4 = 2.480158730156105377771585658905303111866e-0005L, +qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * |tan(x) - (x+t1*x^3+...+t6*x^13)| + * |------------------------------ | <= 2^-59.73 for |x|<0.15625 + * | x | + */ +t1 = 3.333333333333333333333333333333423342490e-0001L, +t2 = 1.333333333333333333333333333093838744537e-0001L, +t3 = 5.396825396825396825396827906318682662250e-0002L, +t4 = 2.186948853615520282185576976994418486911e-0002L, +t5 = 8.863235529902196573354554519991152936246e-0003L, +t6 = 3.592128036572480064652191427543994878790e-0003L, +t7 = 1.455834387051455257856833807581901305474e-0003L, +t8 = 5.900274409318599857829983256201725587477e-0004L, +t9 = 2.391291152117265181501116961901122362937e-0004L, +t10 = 9.691533169382729742394024173194981882375e-0005L, +t11 = 3.927994733186415603228178184225780859951e-0005L, +t12 = 1.588300018848323824227640064883334101288e-0005L, +t13 = 6.916271223396808311166202285131722231723e-0006L; +/* INDENT ON */ +long double +__k_tanl(long double x, long double y, int k) { + long double a, t, z, w, s, c; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0; +#if !defined(__i386) && !defined(__amd64) + hx = px[0]; +#else + XTOI(px, hx); +#endif + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) { + if ((i = (int) x) == 0) /* generate inexact */ + w = x; + } else { + z = x * x; + if (ix < 0x3ff30000) /* 2**-12 */ + t = z * (t1 + z * (t2 + z * (t3 + z * t4))); + else + t = z * (t1 + z * (t2 + z * (t3 + z * (t4 + + z * (t5 + z * (t6 + z * (t7 + z * + (t8 + z * (t9 + z * (t10 + z * (t11 + + z * (t12 + z * t13)))))))))))); + t = y + x * t; + w = x + t; + } + return (k == 0 ? w : -one / w); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; +#if !defined(__i386) && !defined(__amd64) + pt[0] = j; +#else + ITOX(j, pt); +#endif + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_tanl_hi[i]; + z = x * x; + /* cos(x)-1 */ + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + /* sin(x) */ + s = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * + pp5))))); + if (k == 0) { + w = a * s; + t = _TBL_tanl_lo[i] + (s + a * w) / (one - (w - t)); + return (hx < 0 ? -a - t : a + t); + } else { + w = s + a * t; + c = w + _TBL_tanl_lo[i]; + z = (one - (a * s - t)); + return (hx >= 0 ? z / (-a - c) : z / (a + c)); + } +} diff --git a/usr/src/libm/src/LD/acoshl.c b/usr/src/libm/src/LD/acoshl.c new file mode 100644 index 0000000..be53cc4 --- /dev/null +++ b/usr/src/libm/src/LD/acoshl.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acoshl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak acoshl = __acoshl +#endif + +#include "libm.h" + +static const long double + zero = 0.0L, + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.e+20L; + +long double +acoshl(long double x) { + long double t; + + if (isnanl(x)) + return (x + x); + else if (x > big) + return (logl(x) + ln2); + else if (x > one) { + t = sqrtl(x - one); + return (log1pl(t * (t + sqrtl(x + one)))); + } else if (x == one) + return (zero); + else + return ((x - x) / (x - x)); +} diff --git a/usr/src/libm/src/LD/asinhl.c b/usr/src/libm/src/LD/asinhl.c new file mode 100644 index 0000000..453bcef --- /dev/null +++ b/usr/src/libm/src/LD/asinhl.c @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asinhl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak asinhl = __asinhl +#endif + +#include "libm.h" + +static const long double + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.0e+20L, + tiny = 1.0e-20L; + +long double +asinhl(long double x) { + long double t, w; + + w = fabsl(x); + if (isnanl(x)) + return (x + x); /* x is NaN */ + if (w < tiny) { +#ifndef lint + volatile long double dummy = x + big; /* inexact if x != 0 */ +#endif + return (x); /* tiny x */ + } else if (w < big) { + t = one / w; + return (copysignl(log1pl(w + w / (t + sqrtl(one + t * t))), x)); + } else + return (copysignl(logl(w) + ln2, x)); +} diff --git a/usr/src/libm/src/LD/atan2pil.c b/usr/src/libm/src/LD/atan2pil.c new file mode 100644 index 0000000..1d1c58f --- /dev/null +++ b/usr/src/libm/src/LD/atan2pil.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2pil.c 1.5 06/01/31 SMI" + +#pragma weak atan2pil = __atan2pil + +#include "libm.h" +#include "libm_synonyms.h" + +#define GENERIC long double +#define ATAN2PI atan2pil +#define ATAN2 atan2l + +/* ATAN2PI(y,x) + * + * ATAN2PI(y,x) = ATAN2(y,x)/pi + */ + +extern GENERIC ATAN2(); + +static GENERIC +invpi = (GENERIC) 3.183098861837906715377675267450287240689e-0001L; + +GENERIC ATAN2PI(y,x) +GENERIC y,x; +{ + return ATAN2(y,x)*invpi; +} diff --git a/usr/src/libm/src/LD/atanhl.c b/usr/src/libm/src/LD/atanhl.c new file mode 100644 index 0000000..d37bcf5 --- /dev/null +++ b/usr/src/libm/src/LD/atanhl.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atanhl.c 1.5 06/01/31 SMI" + +#pragma weak atanhl = __atanhl + +#include "libm.h" +#include "libm_synonyms.h" + +#define GENERIC long double +#define ATANH atanhl + +/* ATANH(x) + * 1 2x x + * ATANH(x) = --- * LOG(1 + -------) = 0.5 * LOG1P(2 * --------) + * 2 1 - x 1 - x + * Note: to guarantee ATANH(-x) = -ATANH(x), we use + * sign(x) |x| + * ATANH(x) = ------- * LOG1P(2*-------). + * 2 1 - |x| + * + * Special cases: + * ATANH(x) is NaN if |x| > 1 with signal; + * ATANH(NaN) is that NaN with no signal; + * ATANH(+-1) is +-INF with signal. + * + */ + +#define FABS fabsl +#define LOG1P log1pl +#define COPYSIGN copysignl + + +extern GENERIC FABS(),LOG1P(),COPYSIGN(); + +static GENERIC +zero = (GENERIC) 0.0, +half = (GENERIC) 0.5, +one = (GENERIC) 1.0; + +GENERIC ATANH(x) +GENERIC x; +{ + GENERIC t; + t = FABS(x); + if(t==one) return x/zero; + t = t/(one-t); + return COPYSIGN(half,x)*LOG1P(t+t); +} diff --git a/usr/src/libm/src/LD/cbrtl.c b/usr/src/libm/src/LD/cbrtl.c new file mode 100644 index 0000000..918c9b7 --- /dev/null +++ b/usr/src/libm/src/LD/cbrtl.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cbrtl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak cbrtl = __cbrtl +#endif + +#include "libm.h" + +static const double d_one = 1.0; + +long double +cbrtl(long double x) { + long double s, t, r, w, y; + double dx, dy; + int *py = (int *) &dy; + int n, m, m3, n0, sx; + + if (!finitel(x)) + return (x + x); + if (iszerol(x)) + return (x); + n0 = 0; + if (*((int *) &d_one) == 0) + n0 = 1; + sx = signbitl(x); + x = fabsl(x); + n = ilogbl(x); + m = n / 3; + m3 = m + m + m; + y = scalbnl(x, -m3); + dx = (double) y; + dy = cbrt(dx); + py[1 - n0] += 2; + if (py[1 - n0] == 0) + py[n0] += 1; + + /* one step newton iteration to 113 bits with error < 0.667ulps */ + t = (long double) dy; + t = scalbnl(t, m); + s = t * t; + r = x / s; + w = t + t; + r = (r - t) / (w + r); + t += t * r; + + return (sx == 0 ? t : -t); +} diff --git a/usr/src/libm/src/LD/coshl.c b/usr/src/libm/src/LD/coshl.c new file mode 100644 index 0000000..93604fc --- /dev/null +++ b/usr/src/libm/src/LD/coshl.c @@ -0,0 +1,107 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)coshl.c 1.7 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak coshl = __coshl +#endif + +#include "libm.h" + +/* + * COSH(X) + * RETURN THE HYPERBOLIC COSINE OF X + * + * Method : + * 1. Replace x by |x| (COSH(x) = COSH(-x)). + * 2. + * [ EXP(x) - 1 ]^2 + * 0 <= x <= 0.3465 : COSH(x) := 1 + ------------------- + * 2*EXP(x) + * + * EXP(x) + 1/EXP(x) + * 0.3465 <= x <= thresh : COSH(x) := ------------------- + * 2 + * thresh <= x <= lnovft : COSH(x) := EXP(x)/2 + * lnovft <= x < INF : COSH(x) := SCALBN(EXP(x-MEP1*ln2),ME) + * + * + * here + * 0.3465 a number that is near one half of ln2. + * thresh a number such that + * EXP(thresh)+EXP(-thresh)=EXP(thresh) + * lnovft logarithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * COSH(x) is |x| if x is +INF, -INF, or NaN. + * only COSH(0)=1 is exact for finite x. + */ + +static const long double C[] = { + 0.5L, + 1.0L, + 0.3465L, + 45.0L, + 1.135652340629414394879149e+04L, + 7.004447686242549087858985e-16L, + 2.710505431213761085018632e-20L, /* 2^-65 */ +}; + +#define half C[0] +#define one C[1] +#define thr1 C[2] +#define thr2 C[3] +#define lnovft C[4] +#define lnovlo C[5] +#define tinyl C[6] + +long double +coshl(long double x) { + long double w, t; + + w = fabsl(x); + if (!finitel(w)) + return (w + w); /* x is INF or NaN */ + if (w < thr1) { + if (w < tinyl) + return (one + w); /* inexact+directed rounding */ + t = expm1l(w); + w = one + t; + w = one + (t * t) / (w + w); + return (w); + } + if (w < thr2) { + t = expl(w); + return (half * (t + one / t)); + } + if (w <= lnovft) + return (half * expl(w)); + return (scalbnl(expl((w - lnovft) - lnovlo), 16383)); +} diff --git a/usr/src/libm/src/LD/cosl.c b/usr/src/libm/src/LD/cosl.c new file mode 100644 index 0000000..69d4239 --- /dev/null +++ b/usr/src/libm/src/LD/cosl.c @@ -0,0 +1,105 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cosl.c 1.9 06/01/31 SMI" + +#pragma weak cosl = __cosl + +/* INDENT OFF */ +/* cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +long double +cosl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + int *px = (int *) &x; + + /* trig(Inf or NaN) is NaN */ + if (!finitel(x)) + return x - x; + + /* High word of x. */ +#if !defined(__i386) + ix = px[0]; +#else + XTOI(px, ix); +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + return __k_cosl(x, z); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return __k_cosl(y[0], y[1]); + case 1: + return -__k_sinl(y[0], y[1]); + case 2: + return -__k_cosl(y[0], y[1]); + case 3: + return __k_sinl(y[0], y[1]); + /* NOTREACHED */ + } + } +} diff --git a/usr/src/libm/src/LD/erfl.c b/usr/src/libm/src/LD/erfl.c new file mode 100644 index 0000000..6305691 --- /dev/null +++ b/usr/src/libm/src/LD/erfl.c @@ -0,0 +1,347 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)erfl.c 1.8 06/01/31 SMI" + +/* long double function erf,erfc (long double x) + * K.C. Ng, September, 1989. + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * + * method: + * Since erf(-x) = -erf(x), we assume x>=0. + * For x near 0, we have the expansion + * + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....). + * + * Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688, + * we use x + x*P(x^2) to approximate erf(x). This formula will + * guarantee the error less than one ulp where x is not too far + * away from 0. We note that erf(x)=x at x = 0.6174...... After + * some experiment, we choose the following approximation on + * interval [0,0.84375]. + * + * For x in [0,0.84375] + * 2 2 4 40 + * P = P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x ) + * + * erf(x) = x + x*P + * erfc(x) = 1 - erf(x) if x<=0.25 + * = 0.5 + ((0.5-x)-x*P) if x in [0.25,0.84375] + * precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50 + * + * For x in [0.84375,1.25], let s = x - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = c + P1(s)/Q1(s) + * erfc(x) = (1-c) - P1(s)/Q1(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41 + * + * + * For x in [1.25,1.75], let s = x - 1.5, and + * c = 0.95478588343 rounded to single (24 bits) + * erf(x) = c + P2(s)/Q2(s) + * erfc(x) = (1-c) - P2(s)/Q2(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83 + * + * + * For x in [1.75,16/3] + * erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x) + * erf(x) = 1 - erfc(x) + * precision: absolute error of R1/S1 is bounded by 2**-124.03 + * + * For x in [16/3,107] + * erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x) + * erf(x) = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact) + * precision: absolute error of R2/S2 is bounded by 2**-120.07 + * + * Else if inf > x >= 107 + * erf(x) = 1 with inexact + * erfc(x) = 0 with underflow + * + * Special case: + * erf(inf) = 1 + * erfc(inf) = 0 + */ + +#pragma weak erfl = __erfl +#pragma weak erfcl = __erfcl + +#include "libm.h" +#include "longdouble.h" + +static long double +tiny = 1e-40L, +nearunfl = 1e-4000L, +half = 0.5L, +one = 1.0L, +onehalf = 1.5L, +L16_3 = 16.0L/3.0L; +/* + * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375] + */ +static long double P[] = { /* 21 coeffs */ + 1.283791670955125738961589031215451715556e-0001L, + -3.761263890318375246320529677071815594603e-0001L, + 1.128379167095512573896158903121205899135e-0001L, + -2.686617064513125175943235483344625046092e-0002L, + 5.223977625442187842111846652980454568389e-0003L, + -8.548327023450852832546626271083862724358e-0004L, + 1.205533298178966425102164715902231976672e-0004L, + -1.492565035840625097674944905027897838996e-0005L, + 1.646211436588924733604648849172936692024e-0006L, + -1.636584469123491976815834704799733514987e-0007L, + 1.480719281587897445302529007144770739305e-0008L, + -1.229055530170782843046467986464722047175e-0009L, + 9.422759064320307357553954945760654341633e-0011L, + -6.711366846653439036162105104991433380926e-0012L, + 4.463224090341893165100275380693843116240e-0013L, + -2.783513452582658245422635662559779162312e-0014L, + 1.634227412586960195251346878863754661546e-0015L, + -9.060782672889577722765711455623117802795e-0017L, + 4.741341801266246873412159213893613602354e-0018L, + -2.272417596497826188374846636534317381203e-0019L, + 8.069088733716068462496835658928566920933e-0021L, +}; + +/* + * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25] + */ +static long double C1 = (long double)((float)0.84506291151); +static long double P1[] = { /* 12 top coeffs */ + -2.362118560752659955654364917390741930316e-0003L, + 4.129623379624420034078926610650759979146e-0001L, + -3.973857505403547283109417923182669976904e-0002L, + 4.357503184084022439763567513078036755183e-0002L, + 8.015593623388421371247676683754171456950e-0002L, + -1.034459310403352486685467221776778474602e-0002L, + 5.671850295381046679675355719017720821383e-0003L, + 1.219262563232763998351452194968781174318e-0003L, + 5.390833481581033423020320734201065475098e-0004L, + -1.978853912815115495053119023517805528300e-0004L, + 6.184234513953600118335017885706420552487e-0005L, + -5.331802711697810861017518515816271808286e-0006L, +}; +static long double Q1[] = { /* 12 bottom coeffs with leading 1.0 hidden */ + 9.081506296064882195280178373107623196655e-0001L, + 6.821049531968204097604392183650687642520e-0001L, + 4.067869178233539502315055970743271822838e-0001L, + 1.702332233546316765818144723063881095577e-0001L, + 7.498098377690553934266423088708614219356e-0002L, + 2.050154396918178697056927234366372760310e-0002L, + 7.012988534031999899054782333851905939379e-0003L, + 1.149904787014400354649843451234570731076e-0003L, + 3.185620255011299476196039491205159718620e-0004L, + 1.273405072153008775426376193374105840517e-0005L, + 4.753866999959432971956781228148402971454e-0006L, + -1.002287602111660026053981728549540200683e-0006L, +}; +/* + * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5) + * on [1.25,1.75] + */ +static long double C2 = (long double)((float)0.95478588343); +static long double P2[] = { /* 12 top coeffs */ + 1.131926304864446730135126164594785863512e-0002L, + 1.273617996967754151544330055186210322832e-0001L, + -8.169980734667512519897816907190281143423e-0002L, + 9.512267486090321197833634271787944271746e-0002L, + -2.394251569804872160005274999735914368170e-0002L, + 1.108768660227528667525252333184520222905e-0002L, + 3.527435492933902414662043314373277494221e-0004L, + 4.946116273341953463584319006669474625971e-0004L, + -4.289851942513144714600285769022420962418e-0005L, + 8.304719841341952705874781636002085119978e-0005L, + -1.040460226177309338781902252282849903189e-0005L, + 2.122913331584921470381327583672044434087e-0006L, +}; +static long double Q2[] = { /* 13 bottom coeffs with leading 1.0 hidden */ + 7.448815737306992749168727691042003832150e-0001L, + 7.161813850236008294484744312430122188043e-0001L, + 3.603134756584225766144922727405641236121e-0001L, + 1.955811609133766478080550795194535852653e-0001L, + 7.253059963716225972479693813787810711233e-0002L, + 2.752391253757421424212770221541238324978e-0002L, + 7.677654852085240257439050673446546828005e-0003L, + 2.141102244555509687346497060326630061069e-0003L, + 4.342123013830957093949563339130674364271e-0004L, + 8.664587895570043348530991997272212150316e-0005L, + 1.109201582511752087060167429397033701988e-0005L, + 1.357834375781831062713347000030984364311e-0006L, + 4.957746280594384997273090385060680016451e-0008L, +}; +/* + * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3] + */ +static long double R1[] = { /* 14 top coeffs */ + 4.630195122654315016370705767621550602948e+0006L, + 1.257949521746494830700654204488675713628e+0007L, + 1.704153822720260272814743497376181625707e+0007L, + 1.502600568706061872381577539537315739943e+0007L, + 9.543710793431995284827024445387333922861e+0006L, + 4.589344808584091011652238164935949522427e+0006L, + 1.714660662941745791190907071920671844289e+0006L, + 5.034802147768798894307672256192466283867e+0005L, + 1.162286400443554670553152110447126850725e+0005L, + 2.086643834548901681362757308058660399137e+0004L, + 2.839793161868140305907004392890348777338e+0003L, + 2.786687241658423601778258694498655680778e+0002L, + 1.779177837102695602425897452623985786464e+0001L, + 5.641895835477470769043614623819144434731e-0001L, +}; +static long double S1[] = { /* 15 bottom coeffs with leading 1.0 hidden */ + 4.630195122654331529595606896287596843110e+0006L, + 1.780411093345512024324781084220509055058e+0007L, + 3.250113097051800703707108623715776848283e+0007L, + 3.737857099176755050912193712123489115755e+0007L, + 3.029787497516578821459174055870781168593e+0007L, + 1.833850619965384765005769632103205777227e+0007L, + 8.562719999736915722210391222639186586498e+0006L, + 3.139684562074658971315545539760008136973e+0006L, + 9.106421313731384880027703627454366930945e+0005L, + 2.085108342384266508613267136003194920001e+0005L, + 3.723126272693120340730491416449539290600e+0004L, + 5.049169878567344046145695360784436929802e+0003L, + 4.944274532748010767670150730035392093899e+0002L, + 3.153510608818213929982940249162268971412e+0001L, + 1.0e00L, +}; + +/* + * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107] + */ +static long double R2[] = { /* 15 top coeffs in reverse order!!*/ + 2.447288012254302966796326587537136931669e+0005L, + 8.768592567189861896653369912716538739016e+0005L, + 1.552293152581780065761497908005779524953e+0006L, + 1.792075924835942935864231657504259926729e+0006L, + 1.504001463155897344947500222052694835875e+0006L, + 9.699485556326891411801230186016013019935e+0005L, + 4.961449933661807969863435013364796037700e+0005L, + 2.048726544693474028061176764716228273791e+0005L, + 6.891532964330949722479061090551896886635e+0004L, + 1.888014709010307507771964047905823237985e+0004L, + 4.189692064988957745054734809642495644502e+0003L, + 7.362346487427048068212968889642741734621e+0002L, + 9.980359714211411423007641056580813116207e+0001L, + 9.426910895135379181107191962193485174159e+0000L, + 5.641895835477562869480794515623601280429e-0001L, +}; +static long double S2[] = { /* 16 coefficients */ + 2.447282203601902971246004716790604686880e+0005L, + 1.153009852759385309367759460934808489833e+0006L, + 2.608580649612639131548966265078663384849e+0006L, + 3.766673917346623308850202792390569025740e+0006L, + 3.890566255138383910789924920541335370691e+0006L, + 3.052882073900746207613166259994150527732e+0006L, + 1.885574519970380988460241047248519418407e+0006L, + 9.369722034759943185851450846811445012922e+0005L, + 3.792278350536686111444869752624492443659e+0005L, + 1.257750606950115799965366001773094058720e+0005L, + 3.410830600242369370645608634643620355058e+0004L, + 7.513984469742343134851326863175067271240e+0003L, + 1.313296320593190002554779998138695507840e+0003L, + 1.773972700887629157006326333696896516769e+0002L, + 1.670876451822586800422009013880457094162e+0001L, + 1.000L, +}; + +long double erfl(x) +long double x; +{ + long double erfcl(long double),s,y,t; + + if(!finitel(x)) { + if(x!=x) return x+x; /* NaN */ + return copysignl(one,x); /* return +-1.0 is x=Inf */ + } + + y = fabsl(x); + if(y <= 0.84375L) { + if(y<=tiny) return x+P[0]*x; + s = y*y; + t = __poly_libmq(s,21,P); + return x+x*t; + } + if(y<=1.25L) { + s = y-one; + t = C1+__poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + return (signbitl(x))? -t: t; + } else if(y<=1.75L) { + s = y-onehalf; + t = C2+__poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + return (signbitl(x))? -t: t; + } + if(y<=9.0L) t = erfcl(y); else t = tiny; + return (signbitl(x))? t-one: one-t; +} + +long double erfcl(x) +long double x; +{ + long double erfl(long double),s,y,t; + + if(!finitel(x)) { + if(x!=x) return x+x; /* NaN */ + /* return 2.0 if x= -inf + 0.0 if x= +inf */ + if(x<0.0L) return 2.0L; else return 0.0L; + } + + if(x <= 0.84375L) { + if(x<=0.25) return one-erfl(x); + s = x*x; + t = half-x; + t = t - x*__poly_libmq(s,21,P); + return half+t; + } + if(x<=1.25L) { + s = x-one; + t = one-C1; + return t - __poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + } else if(x<=1.75L) { + s = x-onehalf; + t = one-C2; + return t - __poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + } + if(x>=107.0L) return nearunfl*nearunfl; /* underflow */ + else if(x >= L16_3) { + y = __poly_libmq(x,15,R2); + t = y/__poly_libmq(x,16,S2); + } else { + y = __poly_libmq(x,14,R1); + t = y/__poly_libmq(x,15,S1); + } + /* see comment in ../Q/erfl.c */ + y = x; + *(int*)&y = 0; + t *= expl(-y*y)*expl(-(x-y)*(x+y)); + return t; +} diff --git a/usr/src/libm/src/LD/finitel.c b/usr/src/libm/src/LD/finitel.c new file mode 100644 index 0000000..2b355d3 --- /dev/null +++ b/usr/src/libm/src/LD/finitel.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)finitel.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak finitel = __finitel +#endif + +#include "libm.h" + +#if defined(__sparc) +int +finitel(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) < 0x7fff0000); +} +#elif defined(__i386) +int +finitel(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t != 0x7fff && ((px[1] & 0x80000000) != 0 || t == 0)); +#else + return (t != 0x7fff); +#endif +} +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/LD/gammal.c b/usr/src/libm/src/LD/gammal.c new file mode 100644 index 0000000..5293e40 --- /dev/null +++ b/usr/src/libm/src/LD/gammal.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gammal.c 1.6 06/01/31 SMI" +#pragma weak gammal = __gammal + +/* + * long double gammal(long double x); + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +gammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return y; +} diff --git a/usr/src/libm/src/LD/gammal_r.c b/usr/src/libm/src/LD/gammal_r.c new file mode 100644 index 0000000..f58766c --- /dev/null +++ b/usr/src/libm/src/LD/gammal_r.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gammal_r.c 1.4 06/01/31 SMI" + +/* + * long double gammal_r(long double x, int *signgamlp); + */ + +#pragma weak gammal_r = __gammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +gammal_r(long double x, int *signgamlp) { + return __k_lgammal(x, signgamlp); +} diff --git a/usr/src/libm/src/LD/hypotl.c b/usr/src/libm/src/LD/hypotl.c new file mode 100644 index 0000000..e2e6a54 --- /dev/null +++ b/usr/src/libm/src/LD/hypotl.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)hypotl.c 1.9 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak hypotl = __hypotl +#endif + +/* + * hypotl(x,y) + * Method : + * If z=x*x+y*y has error less than sqrt(2)/2 ulp than sqrt(z) has + * error less than 1 ulp. + * So, compute sqrt(x*x+y*y) with some care as follows: + * Assume x>y>0; + * 1. save and set rounding to round-to-nearest + * 2. if x > 2y use + * x1*x1+(y*y+(x2*(x+x2))) for x*x+y*y + * where x1 = x with lower 32 bits cleared, x2 = x-x1; else + * 3. if x <= 2y use + * t1*y1+((x-y)*(x-y)+(t1*y2+t2*y)) + * where t1 = 2x with lower 64 bits cleared, t2 = 2x-t1, y1= y with + * lower 32 bits cleared, y2 = y-y1. + * + * NOTE: DO NOT remove parenthsis! + * + * Special cases: + * hypot(x,y) is INF if x or y is +INF or -INF; else + * hypot(x,y) is NAN if x or y is NAN. + * + * Accuracy: + * hypot(x,y) returns sqrt(x^2+y^2) with error less than 1 ulps (units + * in the last place) + */ + +#include "libm.h" + +#if defined(__i386) +extern enum fp_direction_type __swap87RD(enum fp_direction_type); + +#define k 0x7fff + +long double +hypotl(long double x, long double y) { + long double t1, t2, y1, y2, w; + int *px = (int *) &x, *py = (int *) &y; + int *pt1 = (int *) &t1, *py1 = (int *) &y1; + enum fp_direction_type rd; + int j, nx, ny, nz; + + px[2] &= 0x7fff; /* clear sign bit and padding bits of x and y */ + py[2] &= 0x7fff; + nx = px[2]; /* biased exponent of x and y */ + ny = py[2]; + if (ny > nx) { + w = x; + x = y; + y = w; + nz = ny; + ny = nx; + nx = nz; + } /* force nx >= ny */ + if (nx - ny >= 66) + return (x + y); /* x / y >= 2**65 */ + if (nx < 0x5ff3 && ny > 0x205b) { /* medium x,y */ + /* save and set RD to Rounding to nearest */ + rd = __swap87RD(fp_nearest); + w = x - y; + if (w > y) { + pt1[2] = px[2]; + pt1[1] = px[1]; + pt1[0] = 0; + t2 = x - t1; + x = sqrtl(t1 * t1 - (y * (-y) - t2 * (x + t1))); + } else { + x += x; + py1[2] = py[2]; + py1[1] = py[1]; + py1[0] = 0; + y2 = y - y1; + pt1[2] = px[2]; + pt1[1] = px[1]; + pt1[0] = 0; + t2 = x - t1; + x = sqrtl(t1 * y1 - (w * (-w) - (t2 * y1 + y2 * x))); + } + if (rd != fp_nearest) + __swap87RD(rd); /* restore rounding mode */ + return (x); + } else { + if (nx == k || ny == k) { /* x or y is INF or NaN */ + /* since nx >= ny; nx is always k within this block */ + if (px[1] == 0x80000000 && px[0] == 0) + return (x); + else if (ny == k && py[1] == 0x80000000 && py[0] == 0) + return (y); + else + return (x + y); + } + if (ny == 0) { + if (y == 0.L || x == 0.L) + return (x + y); + pt1[2] = 0x3fff + 16381; + pt1[1] = 0x80000000; + pt1[0] = 0; + py1[2] = 0x3fff - 16381; + py1[1] = 0x80000000; + py1[0] = 0; + x *= t1; + y *= t1; + return (y1 * hypotl(x, y)); + } + j = nx - 0x3fff; + px[2] -= j; + py[2] -= j; + pt1[2] = nx; + pt1[1] = 0x80000000; + pt1[0] = 0; + return (t1 * hypotl(x, y)); + } +} +#endif diff --git a/usr/src/libm/src/LD/isnanl.c b/usr/src/libm/src/LD/isnanl.c new file mode 100644 index 0000000..98035d8 --- /dev/null +++ b/usr/src/libm/src/LD/isnanl.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)isnanl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak isnanl = __isnanl +#endif + +#include "libm.h" + +#if defined(__sparc) +int +isnanl(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) >= 0x7fff0000 && + ((px[0] & ~0xffff0000) | px[1] | px[2] | px[3]) != 0); +} +#elif defined(__i386) +int +isnanl(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0 || + t != 0 && (px[1] & 0x80000000) == 0); +#else + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0); +#endif +} +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/LD/j0l.c b/usr/src/libm/src/LD/j0l.c new file mode 100644 index 0000000..5628f03 --- /dev/null +++ b/usr/src/libm/src/LD/j0l.c @@ -0,0 +1,731 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)j0l.c 1.8 06/01/31 SMI" + +/* + * Floating point Bessel's function of the first and second kinds + * of order zero: j0(x),y0(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j0l = __j0l +#pragma weak y0l = __y0l + +#include "libm.h" +#include "libm_synonyms.h" +#include +#include + +#define GENERIC long double +static GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +eight = 8.0L, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pzero(), qzero(); +static GENERIC r0[7] = { + -2.499999999999999999999999999999998934492e-0001L, + 1.272657927360049786327618451133763714880e-0002L, + -2.694499763712963276900636693400659600898e-0004L, + 2.724877475058977576903234070919616447883e-0006L, + -1.432617103214330236967477495393076320281e-0008L, + 3.823248804080079168706683540513792224471e-0011L, + -4.183174277567983647337568504286313665065e-0014L, +}; +static GENERIC s0[7] = { + 1.0e0L, + 1.159368290559800854689526195462884666395e-0002L, + 6.629397597394973383009743876169946772559e-0005L, + 2.426779981394054406305431142501735094340e-0007L, + 6.097663491248511069094400469635449749883e-0010L, + 1.017019133340929220238747413216052224036e-0012L, + 9.012593179306197579518374581969371278481e-0016L, +}; + +GENERIC +j0l(x) GENERIC x;{ + GENERIC z, s, c, ss, cc, r, u, v; + int i; + + if(isnanl(x)) return x+x; + x = fabsl(x); + if(x > 1.28L){ + if(!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if(signbitl(s)!=signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if(x>1.0e120L) return (invsqrtpi*cc)/sqrtl(x); + u = pzero(x); v = qzero(x); + return invsqrtpi*(u*cc-v*ss)/sqrtl(x); + } + if(x<=small) { + if(x<=tiny) return one-x; + else return one-x*x*0.25L; + } + z = x*x; + r = r0[6]; s = s0[6]; + for(i=5;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + return(one+z*(r/s)); +} + +static GENERIC u0[8] = { + -7.380429510868722527434392794848301631220e-0002L, + 1.766855559625940791857536949301981816513e-0001L, + -1.386470722701047923235553251240162839408e-0002L, + 3.520149242724811578636970811631224862615e-0004L, + -3.978599663243790049853642275624951870025e-0006L, + 2.228801153263957224547222556806915479763e-0008L, + -6.121246764298785018658597179498837316177e-0011L, + 6.677103629722678833475965810525587396596e-0014L, +}; +static GENERIC v0[8] = { + 1.0e0L, + 1.247164416539111311571676766127767127970e-0002L, + 7.829144749639791500052900281489367443576e-0005L, + 3.247126540422245330511218321013360336606e-0007L, + 9.750516724789499678567062572549568447869e-0010L, + 2.156713223173591212250543390258458098776e-0012L, + 3.322169561597890004231482431236452752624e-0015L, + 2.821213295314000924252226486305726805093e-0018L, +}; + +GENERIC +y0l(x) GENERIC x;{ + GENERIC z, d, s, c, ss, cc, u, v; + int i; + + if(isnanl(x)) return x+x; + if(x <= zero){ + if(x==zero) + d= -one/(x-x); + else + d = zero/(x-x); + } + if(x > 1.28L){ + if(!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if(signbitl(s)!=signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi*x) * (P(0,x)*cc - Q(0,x)*ss) + * y0(x) = 1/sqrt(pi*x) * (P(0,x)*ss + Q(0,x)*cc) + */ + if(x>1.0e120L) return (invsqrtpi*ss)/sqrtl(x); + return invsqrtpi*(pzero(x)*ss+qzero(x)*cc)/sqrtl(x); + + } + if(x<=tiny) { + return(u0[0] + tpi*logl(x)); + } + z = x*x; + u = u0[7]; v = v0[7]; + for(i=6;i>=0;i--){ + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return(u/v + tpi*(j0l(x)*logl(x))); +} + +static GENERIC pr0[12] = { /* [16 -- inf] */ + 9.999999999999999999999999999999999997515e-0001L, + 1.065981615377273376425365823967550598358e+0003L, + 4.390991200927588978306374718984240719130e+0005L, + 9.072086218607986711847069407339321363103e+0007L, + 1.022552886177375367408408501046461671528e+0010L, + 6.420766912243658241570635854089597269031e+0011L, + 2.206451725126933913591080211081242266908e+0013L, + 3.928369596816895077363705478743346298368e+0014L, + 3.258159928874124597286701119721482876596e+0015L, + 1.025715808134188978860679130140685101348e+0016L, + 7.537170874795721255796001687024031280685e+0015L, + -1.579413901450157332307745586004207687796e+0014L, +}; +static GENERIC ps0[11] = { + 1.0e0L, + 1.066051927877273376425365823967550512687e+0003L, + 4.391739647168381592399173804329266353038e+0005L, + 9.075162261801343671805658294123888867884e+0007L, + 1.023186118519904751819581912075985995058e+0010L, + 6.427861860414223746340515376512730275061e+0011L, + 2.210861503237823589735481303627993406235e+0013L, + 3.943247335784292905915956840901818177989e+0014L, + 3.283720976777545142150200110647270004481e+0015L, + 1.045346918812754048903645641538728986759e+0016L, + 8.043455468065618900750599584291193680463e+0015L, +}; +static GENERIC pr1[12] = { /* [8 -- 16] */ + 9.999999999999999999999784422701108683618e-0001L, + 6.796098532948334207755488692777907062894e+0002L, + 1.840036112605722168824530758797169836042e+0005L, + 2.598490483191916637264894340635847598122e+0007L, + 2.105774863242707025525730249472054578523e+0009L, + 1.015822044230542426666314997796944979959e+0011L, + 2.931557457008110436764077699944189071875e+0012L, + 4.962885121125457633655259224179322808824e+0013L, + 4.705424055148223269155430598563351566279e+0014L, + 2.294439854910747229152056080910427001110e+0015L, + 4.905531843137486691500950019322475458629e+0015L, + 3.187543169710339218793442542845735994565e+0015L, +}; +static GENERIC ps1[14] = { + 1.0e0L, + 6.796801657948334207754571576066758180288e+0002L, + 1.840512891201300567325421059826676366447e+0005L, + 2.599777028312918975306252167127695075221e+0007L, + 2.107582572771047636846811284634244892537e+0009L, + 1.017275794694156108975782763889979940348e+0011L, + 2.938487645192463845428059755454762316011e+0012L, + 4.982512164735557054521042916182317924466e+0013L, + 4.737639900153703274792677468264564361437e+0014L, + 2.323398719123742743524249528275097100646e+0015L, + 5.033419107069210577868909797896984419391e+0015L, + 3.409036105931068609601317076759804716059e+0015L, + 7.505655364352679737585745147753521662166e+0013L, + -9.976837153983688250780198248297109118313e+0012L, +}; +static GENERIC pr2[12] = { /* [5 -- 8 ] */ + 9.999999999999999937857236789277366320220e-0001L, + 3.692848765268649571651602420376358849214e+0002L, + 5.373022067535476576926715900057760985410e+0004L, + 4.038738891191314969971504035057219430725e+0006L, + 1.728285706306940523397385566659762646999e+0008L, + 4.375400819645889911158688737206054788534e+0009L, + 6.598950418204912408375591217782088567076e+0010L, + 5.827182039183238492480275401520072793783e+0011L, + 2.884222642913492390887572414999490975844e+0012L, + 7.373278873797767721932837830628688632775e+0012L, + 8.338295457568973761205077964397969230489e+0012L, + 2.911383183467288345772308817209806922143e+0012L, +}; +static GENERIC ps2[14] = { + 1.0e0L, + 3.693551890268649477288896267171993213102e+0002L, + 5.375607880998361502474715133828068514297e+0004L, + 4.042477764024108249744998862572786367328e+0006L, + 1.731069838737016956685839588670132939513e+0008L, + 4.387147674049898778738226585935491417728e+0009L, + 6.628058659620653765349556940567715258165e+0010L, + 5.869659904164177740471685856367322160664e+0011L, + 2.919839445622817017058977559638969436383e+0012L, + 7.535314897696671402628203718612309253907e+0012L, + 8.696355561452933775773309859748610658935e+0012L, + 3.216155103141537221173601557697083216257e+0012L, + 4.756857081068942248246880159213789086363e+0010L, + -3.496356619666608032231074866481472824067e+0009L, +}; +static GENERIC pr3[13] = { /* [3.5 -- 5 ] */ + 9.999999999999916693107285612398196588247e-0001L, + 2.263975921282917721194425320484974336945e+0002L, + 1.994358386744245848889492762781484199966e+0004L, + 8.980067458430542243559962493831661323168e+0005L, + 2.282213787521372663705567756420087553508e+0007L, + 3.409784374889063618250288699908375135923e+0008L, + 3.024380857401448589254343517589811711108e+0009L, + 1.571110368046740246895071721443082286379e+0010L, + 4.603187020243604632153685300463160593768e+0010L, + 7.087196453409712719449549280664058793403e+0010L, + 5.046196021776346356803687409644239065041e+0010L, + 1.287758439080165765709154276618854799932e+0010L, + 5.900679773415023433787846658096813590784e+0008L, +}; +static GENERIC ps3[13] = { + 1.0e0L, + 2.264679046282855061328604619231774747116e+0002L, + 1.995939523988944553755653255389812103448e+0004L, + 8.993853144706348727038389967490183236820e+0005L, + 2.288326099634588843906989983704795468773e+0007L, + 3.424967100255240885169240956804790118282e+0008L, + 3.046311797972463991368023759640028910016e+0009L, + 1.589614961932826812790222479700797224003e+0010L, + 4.692406624527744816497089139325073939927e+0010L, + 7.320486495902008912866462849073108323948e+0010L, + 5.345945972828978289935309597742981360994e+0010L, + 1.444033091910423754121309915092247171008e+0010L, + 7.987714685115314668378957273824383610525e+0008L, +}; +static GENERIC pr4[13] = { /* [2.5 , 3.5] */ + 9.999999999986736677961118722747757712260e-0001L, + 1.453824980703800559037873123568378845663e+0002L, + 8.097327216430682288267610447006508661032e+0003L, + 2.273847252038264370231169686380192662135e+0005L, + 3.561056728046211111354759998976985449622e+0006L, + 3.244933588800096378434627029369680378599e+0007L, + 1.740112392860717950376210038908476792588e+0008L, + 5.426170187455893285197878563881579269524e+0008L, + 9.490107486454362321004377336020526281371e+0008L, + 8.688872439428470049801714121070005313806e+0008L, + 3.673315853166437222811910656900123215515e+0008L, + 5.577770470359303305164877446339693270239e+0007L, + 1.540438642031689641308197880181291865714e+0006L, +}; +static GENERIC ps4[13] = { /* [2.5 , 3.5] */ + 1.0e0L, + 1.454528105698159439773035951959131799816e+0002L, + 8.107442215200392397172179900434987859618e+0003L, + 2.279390393778242887574177096606328994140e+0005L, + 3.576251625592252008424781111770934135844e+0006L, + 3.267909499056932631405942058670933813863e+0007L, + 1.760021515330805537499778238099704648805e+0008L, + 5.525553787667353981242060222587465726729e+0008L, + 9.769870295912820457889384082671269328511e+0008L, + 9.110582071004774279226905629624018008454e+0008L, + 3.981857678621955599371967680343918454345e+0008L, + 6.482404686230769399073192961667697036706e+0007L, + 2.210046943095878402443535460329391782298e+0006L, +}; +static GENERIC pr5[13] = { /* [1.777..., 2.5] */ + 9.999999999114986107951817871144655880699e-0001L, + 9.252583736048588342568344570315435947614e+0001L, + 3.218726757856078715214631502407386264637e+0003L, + 5.554009964621111656479588505862577040831e+0004L, + 5.269993115643664338253196944523510290175e+0005L, + 2.874613773778430691192912190618220544575e+0006L, + 9.133538151103658353874146919613442436035e+0006L, + 1.673067041410338922825193013077354249193e+0007L, + 1.706913873848398011744790289200151840498e+0007L, + 9.067766583853288534551600235576747618679e+0006L, + 2.216746733457884568532695355036338655872e+0006L, + 1.945753880802872541235703812722344514405e+0005L, + 3.132374412921948071539195638885330951749e+0003L, +}; +static GENERIC ps5[13] = { /* [1.777..., 2.5] */ + 1.0e0L, + 9.259614983862181118883831670990340052982e+0001L, + 3.225125275462903384842124075132609290304e+0003L, + 5.575705362829101545292760055941855246492e+0004L, + 5.306049863037087855496170121958448492522e+0005L, + 2.907060758873509564309729903109018597215e+0006L, + 9.298059206584995898298257827131208539289e+0006L, + 1.720391071006963176836108026556547062980e+0007L, + 1.782614812922865190479394509487941920612e+0007L, + 9.708016389605273153536452032839879950155e+0006L, + 2.476495084688170096480215640962175140027e+0006L, + 2.363200660365585759668077790194604917187e+0005L, + 4.803239569848196077121203575704356936731e+0003L, +}; +static GENERIC pr6[13] = { /* [1.28, 1.777...] */ + 9.999999969777095495998606925524322559556e-0001L, + 5.825486719466194430503283824096872219216e+0001L, + 1.248155491637757281915184824965379905380e+0003L, + 1.302093199842358609321338417071710477615e+0004L, + 7.353835804186292782840961999810543016039e+0004L, + 2.356471661113686180549195092555751341757e+0005L, + 4.350553267429009581632987060942780847101e+0005L, + 4.588762661876600638719159826652389418235e+0005L, + 2.675796398548523436544221045225290128611e+0005L, + 8.077649557108971388298292919988449940464e+0004L, + 1.117640459221306873519068741664054573776e+0004L, + 5.544400072396814695175787511557757885585e+0002L, + 5.072550541191480498431289089905822910718e+0000L, +}; +static GENERIC ps6[13] = { /* [1.28, 1.777...] */ + 1.0e0L, + 5.832517925357165050639075848183613063291e+0001L, + 1.252144364743592128171256104364976466898e+0003L, + 1.310300234342216813579118022415585740772e+0004L, + 7.434667697093812197817292154032863632923e+0004L, + 2.398706595587719165726469002404004614711e+0005L, + 4.472737517625103157004869372427480602511e+0005L, + 4.786313523337761975294171429067037723611e+0005L, + 2.851161872872731228472536061865365370192e+0005L, + 8.891648269899148412331918021801385815586e+0004L, + 1.297097489535351517572978123584751042287e+0004L, + 7.096761640545975756202184143400469812618e+0002L, + 8.378049338590233325977702401733340820351e+0000L, +}; +static GENERIC sixteen = 16.0L; +static GENERIC huge = 1.0e30L; + +static GENERIC pzero(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return one; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for(i=9;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if (x > eight){ + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for(i=10;i>=0;i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if (x > five){ /* x > 5.0 */ + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for(i=10;i>=0;i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if( x>3.5L) { + r = pr3[12]; s = ps3[12]; + for(i=11;i>=0;i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if( x>2.5L) { + r = pr4[12]; s = ps4[12]; + for(i=11;i>=0;i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if( x> (1.0L/0.5625L)){ + r = pr5[12]; s = ps5[12]; + for(i=11;i>=0;i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for(i=11;i>=0;i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return r/s; +} + + +static GENERIC qr0[12] = { /* [16, inf] */ + -1.249999999999999999999999999999999972972e-0001L, + -1.425179595545670577414395762503991596897e+0002L, + -6.312499645625970845534460257936222407219e+0004L, + -1.411374326457208384315121243698814446848e+0007L, + -1.735034212758873581410984757860787252842e+0009L, + -1.199777647512789489421826342485055280680e+0011L, + -4.596025334081655714499860409699100373644e+0012L, + -9.262525628201284107792924477031653399187e+0013L, + -8.858394728685039245344398842180662867639e+0014L, + -3.267527953687534887623740622709505972113e+0015L, + -2.664222971186311967587129347029450062019e+0015L, + 3.442464060723987869585180095344504100204e+0014L, +}; +static GENERIC qs0[11] = { + 1.0e0L, + 1.140729613936536461931516610003185687881e+0003L, + 5.056665510442299351009198186490085803580e+0005L, + 1.132041763825642787943941650522718199115e+0008L, + 1.394570111872581606392620678214246479767e+0010L, + 9.677945218152264789534431079563744378421e+0011L, + 3.731140327851536828225143058896348502096e+0013L, + 7.612785951064869291722846681020881676410e+0014L, + 7.476077016406764891730191004811863975940e+0015L, + 2.951246482613592035421503427100393831709e+0016L, + 3.108361803691811711136854587074302034901e+0016L, +}; +static GENERIC qr1[12] = { /* [8, 16 ] */ + -1.249999999999999999997949010383433818157e-0001L, + -9.051215166393822640636752244895124126934e+0001L, + -2.620782703428148837671179031904208303947e+0004L, + -3.975571261553504457766177974508785790884e+0006L, + -3.479029330759311306270072218074074994090e+0008L, + -1.823955008124268573036216746186239829089e+0010L, + -5.765932697111801375765156029221568664435e+0011L, + -1.079843680798742592954002192417934779114e+0013L, + -1.146893630504592739082205764611581332897e+0014L, + -6.367016059683898464936104447282880704182e+0014L, + -1.583109041961213490464459111903484209098e+0015L, + -1.230149555764242473103128650135795639412e+0015L, +}; +static GENERIC qs1[14] = { + 1.0e0L, + 7.246831508115058112438579847778014458432e+0002L, + 2.100854184439168518399383786306927037611e+0005L, + 3.192636418837951507430188285940994235122e+0007L, + 2.801558443383354674538443461124434216152e+0009L, + 1.475026997664373739293483927250653467487e+0011L, + 4.694486824913954608552363821799927145318e+0012L, + 8.890350100919200250838438709601547334021e+0013L, + 9.626844429082905144874701068760469752067e+0014L, + 5.541110744600460773528263862687521642140e+0015L, + 1.486500494789452556727470329232123096563e+0016L, + 1.415840104845959400365430773732093899210e+0016L, + 1.780866095241517418081312567239682336483e+0015L, + -2.359230917384889357887631544079990129494e+0014L, +}; +static GENERIC qr2[12] = { /* [5, 8] */ + -1.249999999999999531937744362527772181614e-0001L, + -4.944373897356969774839375977239241573966e+0001L, + -7.728449175433465285314261650078450473909e+0003L, + -6.262574329612752346336901434651220705903e+0005L, + -2.900948220220943306027235217424380672732e+0007L, + -7.988719647634192770463917157562874119535e+0008L, + -1.318228171927181389547760026626357012375e+0010L, + -1.282439773983029245309263271945424928196e+0011L, + -7.050925570827818040186149940257918845138e+0011L, + -2.021751882573871990004205616874202684429e+0012L, + -2.592939962400668552384333900573812635658e+0012L, + -1.038267109518891262840601514932972850326e+0012L, +}; +static GENERIC qs2[14] = { + 1.0e0L, + 3.961358492885570003202784022894248952116e+0002L, + 6.205788738864701882828752634586510926968e+0004L, + 5.045715603932670286550673813011764406749e+0006L, + 2.349248611362658323353343389430968751429e+0008L, + 6.520244524415828635917683553721880063911e+0009L, + 1.089111211223507719337067159886281887722e+0011L, + 1.080406000905359867958779409414903018610e+0012L, + 6.135645280895514703514154680623769562148e+0012L, + 1.862433040246625874245867151368643668215e+0013L, + 2.667780805786648888840777888702193708994e+0013L, + 1.394401107289087774765300711809313112824e+0013L, + 1.093247500616320375562898297156722445484e+0012L, + -7.228875530378928722826604216491493780775e+0010L, +}; +static GENERIC qr3[13] = { /* [3.5 5] */ + -1.249999999999473067748420379578481661075e-0001L, + -3.044549048635289351913574324803250977998e+0001L, + -2.890081140649769078496693003524681440869e+0003L, + -1.404922456817202235879343275330529107684e+0005L, + -3.862746614385573443518177403617349281869e+0006L, + -6.257517309110249049201133708911155047689e+0007L, + -6.031451330920839916987079782727323477520e+0008L, + -3.411542405173830611454025765755854382346e+0009L, + -1.089392478149726672133014498723021526099e+0010L, + -1.824934078420210941290140903415956782726e+0010L, + -1.400780278304358710423481070486939531139e+0010L, + -3.716484136064917363926635716743771092093e+0009L, + -1.397591075296425529970434890954904331580e+0008L, +}; +static GENERIC qs3[13] = { + 1.0e0L, + 2.441498613904962049391000187014945858042e+0002L, + 2.326188882072370711500164222341514337043e+0004L, + 1.137138213121231338494977104659239578165e+0006L, + 3.152918070735662728722998452605364253517e+0007L, + 5.172877993426507259314270488444013595108e+0008L, + 5.083086439731669807455961078856470774115e+0009L, + 2.961842732066434123119325521139476909941e+0010L, + 9.912185866862440735829781856081353151390e+0010L, + 1.793560561251622234430564181567297983598e+0011L, + 1.577090119341228122525265108497940403073e+0011L, + 5.509910306780166194333889999985463681636e+0010L, + 4.761691134078874491202320181517936758141e+0009L, +}; +static GENERIC qr4[13] = { /* [2.5 3.5] */ + -1.249999999928567734339745043490705340835e-0001L, + -1.967201748731419063051601624435565528481e+0001L, + -1.186329146714562236407099740615528170707e+0003L, + -3.607736959222941810356301491152457934060e+0004L, + -6.119200717978104904932828468575194267125e+0005L, + -6.037847781158358226670305078652205586384e+0006L, + -3.503558153336140359700536720393565984740e+0007L, + -1.180196478268225718757218523746787309773e+0008L, + -2.221860232085134915841426363505169680528e+0008L, + -2.173372505452747585296176761701746236760e+0008L, + -9.649364865061237558517730539506568013963e+0007L, + -1.465429227847933034546039640094862650385e+0007L, + -3.083003197920262085170581866246663380607e+0005L, +}; +static GENERIC qs4[13] = { /* [2.5 3.5] */ + 1.0e0L, + 1.579620773732259142752614142139986854055e+0002L, + 9.581372220329138733203879503753685054968e+0003L, + 2.939598672379108095776114131010825885308e+0005L, + 5.052183049314542218630341818692588448168e+0006L, + 5.083497695595206639433839326338971980149e+0007L, + 3.036385361800553388049719014005099206516e+0008L, + 1.067826481452753409910563785161661492137e+0009L, + 2.145644125557118044720741775125319669272e+0009L, + 2.324115615959719949363946673491552216799e+0009L, + 1.223262962112070757966959855619847011146e+0009L, + 2.569765553318495423738478585947110270709e+0008L, + 1.354744744299227127897905787732636565504e+0007L, +}; +static GENERIC qr5[13] = { /* [1.777.., 2.5] */ + -1.249999995936639697637680428174576069971e-0001L, + -1.260846055371311453485891923426489068315e+0001L, + -4.772398467544467480801174330290141578895e+0002L, + -8.939852599990298486613760833996490599724e+0003L, + -9.184070787149542050979542226446134243197e+0004L, + -5.406038945018274458362637897739280435171e+0005L, + -1.845896544705190261018653728678171084418e+0006L, + -3.613616990680809501878667570653308071547e+0006L, + -3.908782978135693252252557720414348623779e+0006L, + -2.173711022517323927109138170588442768176e+0006L, + -5.431253130679918485836408549007856244495e+0005L, + -4.591098546452684510082591587275940765959e+0004L, + -5.244711364168207806835520057792229646578e+0002L, +}; +static GENERIC qs5[13] = { /* [1.777.., 2.5] */ + 1.0e0L, + 1.014536210851290878350892750972474861447e+0002L, + 3.875547510687135314064434160096139681076e+0003L, + 7.361913122670079814955259281995617732580e+0004L, + 7.720288944218771126581086539585529314636e+0005L, + 4.681529554446752496404431433608306558038e+0006L, + 1.667882621940503925455031252308367745820e+0007L, + 3.469403153761399881888272620855305156241e+0007L, + 4.096992047964210711867089384719947863019e+0007L, + 2.596804755829217449311530735959560630554e+0007L, + 7.983933774697889238154465064019410763845e+0006L, + 9.818133816979900819087242425280757938152e+0005L, + 3.061083930868694396013541535670745443560e+0004L, +}; + +static GENERIC qr6[13] = { /* [1.28, 1.777..] */ + -1.249999881577289001807137282824929082771e-0001L, + -7.998273510053110759610810594119533619282e+0000L, + -1.872481955335172543369089617771565632719e+0002L, + -2.122116786726300805079874003303799646812e+0003L, + -1.293850285839529282503178263484773478457e+0004L, + -4.445024742266316181033354192262529356093e+0004L, + -8.730161378334357767668344467356505347070e+0004L, + -9.706222895172078442801444972505315054736e+0004L, + -5.896325518259858270165531513618195321041e+0004L, + -1.823172034368108822276420827074668832233e+0004L, + -2.509304178635055926638833040337472387175e+0003L, + -1.156608965715779237316769828941729964099e+0002L, + -7.028005789650731396887346826397785210442e-0001L, +}; +static GENERIC qs6[13] = { /* [1.28, 1.777..] */ + 1.0e0L, + 6.457211085058064845601261321277721075900e+0001L, + 1.534005216588011210342824555136008682950e+0003L, + 1.777217999176441782593357660462379097171e+0004L, + 1.118372652642469468091084810263231199696e+0005L, + 4.015242433858461813142365748386473605294e+0005L, + 8.377081045517098645448616514388280497673e+0005L, + 1.011495020008010352575398009604164287337e+0006L, + 6.886722075290430568652227875200208955970e+0005L, + 2.504735189948021472047157148613171956537e+0005L, + 4.408138920171044846941001844352009817062e+0004L, + 3.105572178072115145673058722853640854884e+0003L, + 5.588294821118916113437396504573817033678e+0001L, +}; +static GENERIC qzero(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return -0.125L/x; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for(i=9;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if(x>eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for(i=10;i>=0;i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if(x>five){ /* assume x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for(i=10;i>=0;i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if(x>3.5L) { + r = qr3[12]; s = qs3[12]; + for(i=11;i>=0;i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if(x>2.5L) { + r = qr4[12]; s = qs4[12]; + for(i=11;i>=0;i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if(x> (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for(i=11;i>=0;i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for(i=11;i>=0;i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return t*(r/s); +} diff --git a/usr/src/libm/src/LD/j1l.c b/usr/src/libm/src/LD/j1l.c new file mode 100644 index 0000000..7941a29 --- /dev/null +++ b/usr/src/libm/src/LD/j1l.c @@ -0,0 +1,731 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)j1l.c 1.9 06/01/31 SMI" + +/* + * floating point Bessel's function of the first and second kinds + * of order zero: j1(x),y1(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j1l = __j1l +#pragma weak y1l = __y1l + +#include "libm.h" +#include "libm_synonyms.h" +#include +#include + +#define GENERIC long double +static GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pone(), qone(); +static GENERIC r0[7] = { + -6.249999999999999999999999999999999627320e-0002L, + 1.940606727194041716205384618494641565464e-0003L, + -3.005630423155733701856481469986459043883e-0005L, + 2.345586219403918667468341047369572169358e-0007L, + -9.976809285885253587529010109133336669724e-0010L, + 2.218743258363623946078958783775107473381e-0012L, + -2.071079656218700604767650924103578046280e-0015L, +}; +static GENERIC s0[7] = { + 1.0e0L, + 1.061695903156199920738051277075003059555e-0002L, + 5.521860513111180371566951179398862692060e-0005L, + 1.824214367413754193524107877084979441407e-0007L, + 4.098957778439576834818838198039029353925e-0010L, + 6.047735079699666389853240090925264056197e-0013L, + 4.679044728878836197247923279512047035041e-0016L, +}; + +GENERIC +j1l(x) GENERIC x;{ + GENERIC z, d, s, c, ss, cc, r; + int i, sgn; + + if(!finitel(x)) return one/x; + sgn = signbitl(x); + x = fabsl(x); + if(x > 1.28L){ + s = sinl(x); + c = cosl(x); + /* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if(signbitl(s)!=signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if(x>1.0e120L) return (invsqrtpi*cc)/sqrtl(x); + d = invsqrtpi*(pone(x)*cc-qone(x)*ss)/sqrtl(x); + if(sgn==0) return d; else return -d; + } + if(x<=small) { + if(x<=tiny) d = 0.5L*x; + else d = x*(0.5L-x*x*0.125L); + if(sgn==0) return d; else return -d; + } + z = x*x; + r = r0[6]; + s = s0[6]; + for(i=5;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + d = x*0.5L+x*(z*(r/s)); + if(sgn==0) return d; else return -d; +} + +static GENERIC u0[7] = { + -1.960570906462389484060557273467558703503e-0001L, + 5.166389353148318460304315890665450006495e-0002L, + -2.229699464105910913337190798743451115604e-0003L, + 3.625437034548863342715657067759078267158e-0005L, + -2.689902826993117212255524537353883987171e-0007L, + 9.304570592456930912969387719010256018466e-0010L, + -1.234878126794286643318321347997500346131e-0012L, +}; +static GENERIC v0[8] = { + 1.0e0L, + 1.369394302535807332517110204820556695644e-0002L, + 9.508438148097659501433367062605935379588e-0005L, + 4.399007309420092056052714797296467565655e-0007L, + 1.488083087443756398305819693177715000787e-0009L, + 3.751609832625793536245746965768587624922e-0012L, + 6.680926434086257291872903276124244131448e-0015L, + 6.676602383908906988160099057991121446058e-0018L, +}; + +GENERIC +y1l(x) GENERIC x;{ + GENERIC z, s, c, ss, cc, u, v; + int i; + + if(isnanl(x)) return x+x; + if(x <= zero){ + if(x==zero) + return -one/zero; + else + return zero/zero; + } + if(x > 1.28L){ + if(!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if(signbitl(s)!=signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if(x>1.0e91L) return (invsqrtpi*ss)/sqrtl(x); + return invsqrtpi*(pone(x)*ss+qone(x)*cc)/sqrtl(x); + } + if(x<=tiny) { + return(-tpi/x); + } + z = x*x; + u = u0[6]; v = v0[6]+z*v0[7]; + for(i=5;i>=0;i--){ + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return(x*(u/v) + tpi*(j1l(x)*logl(x)-one/x)); +} + +static GENERIC pr0[12] = { + 1.000000000000000000000000000000000000267e+0000L, + 1.060717875045891455602180843276758003035e+0003L, + 4.344347542892127024446687712181105852335e+0005L, + 8.915680220724007016377924252717410457094e+0007L, + 9.969502259938406062809873257569171272819e+0009L, + 6.200290193138613035646510338707386316595e+0011L, + 2.105978548788015119851815854422247330118e+0013L, + 3.696635772784601239371730810311998368948e+0014L, + 3.015913097920694682057958412534134515156e+0015L, + 9.370298471339353098123277427328592725921e+0015L, + 7.190349005196335967340799265074029443057e+0015L, + 2.736097786240689996880391074927552517982e+0014L, +}; +static GENERIC ps0[11] = { + 1.0e0L, + 1.060600687545891455602180843276758095107e+0003L, + 4.343106093416975589147153906505338900961e+0005L, + 8.910605869002176566582072242244353399059e+0007L, + 9.959122058635087888690713917622056540190e+0009L, + 6.188744967234948231792482949171041843894e+0011L, + 2.098863976953783506401759873801990304907e+0013L, + 3.672870357018063196746729751479938908450e+0014L, + 2.975538419246824921049011529574385888420e+0015L, + 9.063657659995043205018686029284479837091e+0015L, + 6.401953344314747916729366441508892711691e+0015L, +}; +static GENERIC pr1[12] = { + 1.000000000000000000000023667524130660984e+0000L, + 6.746154419979618754354803488126452971204e+0002L, + 1.811210781083390154857018330296145970502e+0005L, + 2.533098390379924268038005329095287842244e+0007L, + 2.029683619805342145252338570875424600729e+0009L, + 9.660859662192711465301069401598929980319e+0010L, + 2.743396238644831519934098967716621316316e+0012L, + 4.553097354140854377931023170263455246288e+0013L, + 4.210245069852219757476169864974870720374e+0014L, + 1.987334056229596485076645967176169801727e+0015L, + 4.067120052787096893838970455751338930462e+0015L, + 2.486539606380406398310845264910691398133e+0015L, +}; +static GENERIC ps1[14] = { + 1.0e0L, + 6.744982544979618754355808680196859521782e+0002L, + 1.810421795396966762032155290441364740350e+0005L, + 2.530986460644310651529583759699988435573e+0007L, + 2.026743276048023121360249288818290224145e+0009L, + 9.637461924407405935245269407052641341836e+0010L, + 2.732378628423766417402292797028314160831e+0012L, + 4.522345274960527124354844364012184278488e+0013L, + 4.160650668341743132685335758415469856545e+0014L, + 1.943730242988858208243492424892435901211e+0015L, + 3.880228532692127989901131618598067450001e+0015L, + 2.178020816161154615841000173683302999728e+0015L, + -8.994062666842225551554346698171600634173e+0013L, + 1.368520368508851253495764806934619574990e+0013L, +}; +static GENERIC pr2[12] = { + 1.000000000000000006938651621840396237282e+0000L, + 3.658416291850404981407101077037948144698e+0002L, + 5.267073772170356547709794670602812447537e+0004L, + 3.912012101226837463014925210735894620442e+0006L, + 1.651295648974103957193874928714180765625e+0008L, + 4.114901144480797609972484998142146783499e+0009L, + 6.092524309766036681542980572526335147672e+0010L, + 5.263913178071282616719249969074134570577e+0011L, + 2.538408581124324223367341020538081330994e+0012L, + 6.288607929360291027895126983015365677648e+0012L, + 6.848330048211148419047055075386525945280e+0012L, + 2.290309646838867941423178163991423244690e+0012L, +}; +static GENERIC ps2[14] = { + 1.0e0L, + 3.657244416850405086459410165762319861856e+0002L, + 5.262802358425023243992387075861237306312e+0004L, + 3.905896813959919648136295861661483848364e+0006L, + 1.646791907791461220742694842108202772763e+0008L, + 4.096132803064256022224954120208201437344e+0009L, + 6.046665195915950447544429445730680236759e+0010L, + 5.198061739781991313414052212328653295168e+0011L, + 2.484233851814333966401527626421254279796e+0012L, + 6.047868806925315879339651539434315255940e+0012L, + 6.333103831254091652501642567294101813354e+0012L, + 1.875143098754284994467609936924685024968e+0012L, + -5.238330920563392692965412762508813601534e+0010L, + 4.656888609439364725427789198383779259957e+0009L, +}; +static GENERIC pr3[13] = { + 1.000000000000009336887318068056137842897e+0000L, + 2.242719942728459588488051572002835729183e+0002L, + 1.955450611382026550266257737331095691092e+0004L, + 8.707143293993619899395400562409175590739e+0005L, + 2.186267894487004565948324289010954505316e+0007L, + 3.224328510541957792360691585667502864688e+0008L, + 2.821057355151380597331792896882741364897e+0009L, + 1.445371387295422404365584793796028979840e+0010L, + 4.181743160669891357783011002656658107864e+0010L, + 6.387371088767993119325536137794535513922e+0010L, + 4.575619999412716078064070587767416436396e+0010L, + 1.228415651211639160620284441690503550842e+0010L, + 7.242170349875563053436050532153112882072e+0008L, +}; +static GENERIC ps3[13] = { + 1.0e0L, + 2.241548067728529551049804610486061401070e+0002L, + 1.952838216795552145132137932931237181307e+0004L, + 8.684574926493185744628127341069974575526e+0005L, + 2.176357771067037962940853412819852189164e+0007L, + 3.199958682356132977319258783167122100567e+0008L, + 2.786218931525334687844675219914201872570e+0009L, + 1.416283776951741549631417572317916039767e+0010L, + 4.042962659271567948735676834609348842922e+0010L, + 6.028168462646694510083847222968444402161e+0010L, + 4.118410226794641413833887606580085281111e+0010L, + 9.918735736297038430744161253338202230263e+0009L, + 4.092967198238098023219124487437130332038e+0008L, +}; +static GENERIC pr4[13] = { + 1.000000000001509220978157399042059553390e+0000L, + 1.437551868378147851133499996323782607787e+0002L, + 7.911335537418177296041518061404505428004e+0003L, + 2.193710939115317214716518908935756104804e+0005L, + 3.390662495136730962513489796538274984382e+0006L, + 3.048655347929348891006070609293884274789e+0007L, + 1.613781633489496606354045161527450975195e+0008L, + 4.975089835037230277110156150038482159988e+0008L, + 8.636047087015115403880904418339566323264e+0008L, + 7.918202912328366140110671223076949101509e+0008L, + 3.423294665798984733439650311722794853294e+0008L, + 5.621904953441963961040503934782662613621e+0007L, + 2.086303543310240260758670404509484499793e+0006L, +}; +static GENERIC ps4[13] = { + 1.0e0L, + 1.436379993384532371670493319591847362304e+0002L, + 7.894647154785430678061053848847436659499e+0003L, + 2.184659753392097529008981741550878586174e+0005L, + 3.366109083305465176803513738147049499361e+0006L, + 3.011911545968996817697665866587226343186e+0007L, + 1.582262913779689851316760148459414895301e+0008L, + 4.819268809494937919217938589530138201770e+0008L, + 8.201355762990450679702837123432527154830e+0008L, + 7.268232093982510937417446421282341425212e+0008L, + 2.950911909015572933262131323934036480462e+0008L, + 4.242839924305934423010858966540621219396e+0007L, + 1.064387620445090779182117666330405186866e+0006L, +}; +static GENERIC pr5[13] = { + 1.000000000102434805241171427253847353861e+0000L, + 9.129332257083629259060502249025963234821e+0001L, + 3.132238483586953037576119377504557191413e+0003L, + 5.329782528269307971278943122454171107861e+0004L, + 4.988460157184117790692873002103052944145e+0005L, + 2.686602071615786816147010334256047469378e+0006L, + 8.445418526028961197703799808701268301831e+0006L, + 1.536575358646141157475725889907900827390e+0007L, + 1.568405818236523821796862770586544811945e+0007L, + 8.450876239888770102387618667362302173547e+0006L, + 2.154414900139567328424026827163203446077e+0006L, + 2.105656926565043898888460254808062352205e+0005L, + 4.739165011023396507022134303736862812975e+0003L, +}; +static GENERIC ps5[13] = { + 1.0e0L, + 9.117613509595327476509152673394703847793e+0001L, + 3.121697972484015639301279229281770795147e+0003L, + 5.294447222735893568040911873834576440255e+0004L, + 4.930368882192772335798256684110887882807e+0005L, + 2.634854685641165298302167435798357437768e+0006L, + 8.185462775400326393555896157031818280918e+0006L, + 1.462417423080215192609668642663030667086e+0007L, + 1.450624993985851675982860844153954896015e+0007L, + 7.460467647561995283219086567162006113864e+0006L, + 1.754210981405612478869227142579056338965e+0006L, + 1.463286721155271971526264914524746699596e+0005L, + 2.155894725796702015341211116579827039459e+0003L, +}; +static GENERIC pr6[13] = { + 1.000000003564855546741735920315743157129e+0000L, + 5.734003934862540458119423509909510288366e+0001L, + 1.209572491935850486086559692291796887976e+0003L, + 1.243398391422281247933674779163660286838e+0004L, + 6.930996755181437937258220998601708278787e+0004L, + 2.198067659532757598646722249966767620099e+0005L, + 4.033659432712058633933179115820576858455e+0005L, + 4.257759657219008027016047206574574358678e+0005L, + 2.511917395876004349480721277445763916389e+0005L, + 7.813756153070623654178731651381881953552e+0004L, + 1.152069173381127881385588092905864352891e+0004L, + 6.548580782804088553777816037551523398082e+0002L, + 8.668725370116906132327542766127938496880e+0000L, +}; +static GENERIC ps6[13] = { + 1.0e0L, + 5.722285236357114566499221525736286205184e+0001L, + 1.203010842878317935444582950620339570506e+0003L, + 1.230058335378583550155825502172435371208e+0004L, + 6.800998550607861288865300438648089894412e+0004L, + 2.130767829599304262987769347536850885921e+0005L, + 3.840483466643916681759936972992155310026e+0005L, + 3.947432373459225542861819148108081160393e+0005L, + 2.237816239393081111481588434457838526738e+0005L, + 6.545820495124419723398946273790921540774e+0004L, + 8.729563630320892741500726213278834737196e+0003L, + 4.130762660291894753450174794196998813709e+0002L, + 3.480368898672684645130335786015075595598e+0000L, +}; +static GENERIC sixteen = 16.0L; +static GENERIC eight = 8.0L; +static GENERIC huge = 1.0e30L; + +static GENERIC pone(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return one; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for(i=9;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if(x>eight) { + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for(i=10;i>=0;i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if(x>five) { + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for(i=10;i>=0;i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if( x>3.5L) { + r = pr3[12]; s = ps3[12]; + for(i=11;i>=0;i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if( x>2.5L) { + r = pr4[12]; s = ps4[12]; + for(i=11;i>=0;i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if( x> (1.0L/0.5625L)){ + r = pr5[12]; s = ps5[12]; + for(i=11;i>=0;i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for(i=11;i>=0;i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return r/s; +} + + +static GENERIC qr0[12] = { + 3.749999999999999999999999999999999971033e-0001L, + 4.256726035237050601607682277433094262226e+0002L, + 1.875976490812878489192409978945401066066e+0005L, + 4.170314268048041914273603680317745592790e+0007L, + 5.092750132543855817293451118974555746551e+0009L, + 3.494749676278488654103505795794139483404e+0011L, + 1.327062148257437316997667817096694173709e+0013L, + 2.648993138273427226907503742066551150490e+0014L, + 2.511695665909547412222430494473998127684e+0015L, + 9.274694506662289043224310499164702306096e+0015L, + 8.150904170663663829331320302911792892002e+0015L, + -5.001918733707662355772037829620388765122e+0014L, +}; +static GENERIC qs0[11] = { + 1.0e0L, + 1.135400380229880160428715273982155760093e+0003L, + 5.005701183877126164326765545516590744360e+0005L, + 1.113444200113712167984337603933040102987e+0008L, + 1.361074819925223062778717565699039471124e+0010L, + 9.355750985802849484438933905325982809653e+0011L, + 3.563462786008988825003965543857998084828e+0013L, + 7.155145113900094163648726863803802910454e+0014L, + 6.871266835834472758055559013851843654113e+0015L, + 2.622030899226736712644974988157345234092e+0016L, + 2.602912729172876330650077021706139707746e+0016L, +}; +static GENERIC qr1[12] = { + 3.749999999999999999997762458207284405806e-0001L, + 2.697883998881706839929255517498189980485e+0002L, + 7.755195925781028489386938870473834411019e+0004L, + 1.166777762104017777198211072895528968355e+0007L, + 1.011504772984321168320010084520261069362e+0009L, + 5.246007703574156853577754571720205550010e+0010L, + 1.637692549885592683166116551691266537647e+0012L, + 3.022303623698185669912990310925039382495e+0013L, + 3.154769927290655684846107030265909987946e+0014L, + 1.715819913441554770089730934808123360921e+0015L, + 4.165044355759732622273534445131736188510e+0015L, + 3.151381420874174705643100381708086287596e+0015L, +}; +static GENERIC qs1[14] = { + 1.0e0L, + 7.197091705351218239785633172408276982828e+0002L, + 2.070012799599548685544883041297609861055e+0005L, + 3.117014815317656221871840152778458754516e+0007L, + 2.705719678902554974863325877025902971727e+0009L, + 1.406113614727345726925060648750867264098e+0011L, + 4.403777536067131320363005978631674817359e+0012L, + 8.170725690209322283061499386703167242894e+0013L, + 8.609458844975495289227794126964431210566e+0014L, + 4.766766367015473481257280600694952920204e+0015L, + 1.202286587943342194863557940888115641650e+0016L, + 1.012474328306200909525063936061756024120e+0016L, + 6.183552022678917858273222879615824070703e+0014L, + -9.756731548558226997573737400988225722740e+0013L, +}; +static GENERIC qr2[12] = { + 3.749999999999999481245647262226994293189e-0001L, + 1.471366807289771354491181140167359026735e+0002L, + 2.279432486768448220142080962843526951250e+0004L, + 1.828943048523771225163804043356958285893e+0006L, + 8.379828388647823135832220596417725010837e+0007L, + 2.279814029335044024585393671278378022053e+0009L, + 3.711653952257118120832817785271466441420e+0010L, + 3.557650914518554549916730572553105048068e+0011L, + 1.924583483146095896259774329498934160650e+0012L, + 5.424386256063736390759567088291887140278e+0012L, + 6.839325621241776786206509704671746841737e+0012L, + 2.702169563144001166291686452305436313971e+0012L, +}; +static GENERIC qs2[14] = { + 1.0e0L, + 3.926379194439388135703211933895203191089e+0002L, + 6.089148804106598297488336063007609312276e+0004L, + 4.893546162973278583711376356041614150645e+0006L, + 2.247571119114497845046388801813832219404e+0008L, + 6.137635663350177751290469334200757872645e+0009L, + 1.005115019784102856424493519524998953678e+0011L, + 9.725664462014503832860151384604677240620e+0011L, + 5.345525100485511116148634192844434636072e+0012L, + 1.549944007398946691720862738173956994779e+0013L, + 2.067148441178952625710302124163264760362e+0013L, + 9.401565402641963611295119487242595462301e+0012L, + 3.548217088622398274748837287769709374385e+0011L, + -2.934470341719047120076509938432417352365e+0010L, +}; +static GENERIC qr3[13] = { + 3.749999999999412724084579833297451472091e-0001L, + 9.058478580291706212422978492938435582527e+0001L, + 8.524056033161038750461083666711724381171e+0003L, + 4.105967158629109427753434569223631014730e+0005L, + 1.118326603378531348259783091972623333657e+0007L, + 1.794636683403578918528064904714132329343e+0008L, + 1.714314157463635959556133236004368896724e+0009L, + 9.622092032236084846572067257267661456030e+0009L, + 3.057759524485859159957762858780768355020e+0010L, + 5.129306780754798531609621454415938890020e+0010L, + 3.999122002794961070680636194346316041352e+0010L, + 1.122298454643493485989721564358100345388e+0010L, + 5.603981987645989709668830968522362582221e+0008L, +}; +static GENERIC qs3[13] = { + 1.0e0L, + 2.418328663076578169836155170053634419922e+0002L, + 2.279620205900121042587523541281272875520e+0004L, + 1.100984222585729521470129014992217092794e+0006L, + 3.010743223679247091004262516286654516282e+0007L, + 4.860925542827367817289619265215599433996e+0008L, + 4.686668111035348691982715864307839581243e+0009L, + 2.668701788405102017427214705946730894074e+0010L, + 8.677395746106802640390580944836650584903e+0010L, + 1.511936455574951790658498795945106643036e+0011L, + 1.260845604432623478002018696873608353093e+0011L, + 4.052692278419853853911440231600864589805e+0010L, + 2.965516519212226064983267822243329694729e+0009L, +}; +static GENERIC qr4[13] = { + 3.749999999919234164154669754440123072618e-0001L, + 5.844218580776819864791168253485055101858e+0001L, + 3.489273514092912982675669411371435670220e+0003L, + 1.050523637774575684509663430018995479594e+0005L, + 1.764549172059701565500717319792780115289e+0006L, + 1.725532438844133795028063102681497371154e+0007L, + 9.938114847359778539965140247590176334874e+0007L, + 3.331710808184595545396883770200772842314e+0008L, + 6.271970557641881511609560444872797282698e+0008L, + 6.188529798677357075020774923903737913285e+0008L, + 2.821905302742849974509982167877885011629e+0008L, + 4.615467358646911976773290256984329814896e+0007L, + 1.348140608731546467396685802693380693275e+0006L, +}; +static GENERIC qs4[13] = { + 1.0e0L, + 1.561192663112345185261418296389902133372e+0002L, + 9.346678031144098270547225423124213083072e+0003L, + 2.825851246482293547838023847601704751590e+0005L, + 4.776572711622156091710902891124911556293e+0006L, + 4.715106953717135402977938048006267859302e+0007L, + 2.753962350894311316439652227611209035193e+0008L, + 9.428501434615463207768964787500411575223e+0008L, + 1.832650858775206787088236896454141572617e+0009L, + 1.901697378939743226948920874296595242257e+0009L, + 9.433322226854293780627188599226380812725e+0008L, + 1.808520540608671608680284520798858587370e+0008L, + 7.983342331736662753157217446919462398008e+0006L, +}; +static GENERIC qr5[13] = { + 3.749999995331364437028988850515190446719e-0001L, + 3.739356381766559882677514593041627547911e+0001L, + 1.399562500629413529355265462912819802551e+0003L, + 2.594154053098947925345332218062210111753e+0004L, + 2.640149879297408640394163979394594318371e+0005L, + 1.542471854873199142031889093591449397995e+0006L, + 5.242272868972053374067572098992335425895e+0006L, + 1.025834487769410221329633071426044839935e+0007L, + 1.116553924239448940142230579060124209622e+0007L, + 6.318076065595910176374916303525884653514e+0006L, + 1.641218086168640408527639735915512881785e+0006L, + 1.522369793529178644168813882912134706444e+0005L, + 2.526530541062297200914180060208669584055e+0003L, +}; +static GENERIC qs5[13] = { + 1.0e0L, + 9.998960735935075380397545659016287506660e+0001L, + 3.758767417842043742686475060540416737562e+0003L, + 7.013652806952306520121959742519780781653e+0004L, + 7.208949808818615099246529616211730446850e+0005L, + 4.272753927109614455417836186072202009252e+0006L, + 1.482524411356470699336129814111025434703e+0007L, + 2.988750366665678233425279237627700803473e+0007L, + 3.396957890261080492694709150553619185065e+0007L, + 2.050652487738593004111578091156304540386e+0007L, + 5.900504120811732547616511555946279451316e+0006L, + 6.563391409260160897024498082273183468347e+0005L, + 1.692629845012790205348966731477187041419e+0004L, +}; +static GENERIC qr6[13] = { + 3.749999861516664133157566870858975421296e-0001L, + 2.367863756747764863120797431599473468918e+0001L, + 5.476715802114976248882067325630793143777e+0002L, + 6.143190357869842894025012945444096170251e+0003L, + 3.716250534677997850513733595140463851730e+0004L, + 1.270883463823876752138326905022875657430e+0005L, + 2.495301449636814481646371665429083801388e+0005L, + 2.789578988212952248340486296254398601942e+0005L, + 1.718247946911109055931819087137397324634e+0005L, + 5.458973214011665714330326732204106364229e+0004L, + 7.912102686687948786048943339759596652813e+0003L, + 4.077961006160866935722030715149087938091e+0002L, + 3.765206972770245085551057237882528510428e+0000L, +}; +static GENERIC qs6[13] = { + 1.0e0L, + 6.341646532940517305641893852673926809601e+0001L, + 1.477058277414040790932597537920671025359e+0003L, + 1.674406564031044491436044253393536487604e+0004L, + 1.028516501369755949895050806908994650768e+0005L, + 3.593620042532885295087463507733285434207e+0005L, + 7.267924991381020915185873399453724799625e+0005L, + 8.462277510768818399961191426205006083088e+0005L, + 5.514399892230892163373611895645500250514e+0005L, + 1.898084241009259353540620272932188102299e+0005L, + 3.102941242117739015721984123081026253068e+0004L, + 1.958971184431466907681440650181421086143e+0003L, + 2.878853357310495087181721613889455121867e+0001L, +}; +static GENERIC qone(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return 0.375L/x; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for(i=9;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if(x>eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for(i=10;i>=0;i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if (x>five) { /* x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for(i=10;i>=0;i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if(x>3.5L) { + r = qr3[12]; s = qs3[12]; + for(i=11;i>=0;i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if(x>2.5L) { + r = qr4[12]; s = qs4[12]; + for(i=11;i>=0;i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if(x> (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for(i=11;i>=0;i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for(i=11;i>=0;i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return t*(r/s); +} diff --git a/usr/src/libm/src/LD/jnl.c b/usr/src/libm/src/LD/jnl.c new file mode 100644 index 0000000..04fb096 --- /dev/null +++ b/usr/src/libm/src/LD/jnl.c @@ -0,0 +1,266 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)jnl.c 1.10 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak jnl = __jnl +#pragma weak ynl = __ynl +#endif + +/* + * floating point Bessel's function of the 1st and 2nd kind + * of order n: jn(n,x),yn(n,x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for nx, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include "libm.h" +#include /* LDBL_MAX */ + +#define GENERIC long double + +static const GENERIC +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +two = 2.0L, +zero = 0.0L, +one = 1.0L; + +GENERIC +jnl(n,x) int n; GENERIC x;{ + int i, sgn; + GENERIC a, b, temp, z, w; + + /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + if(n<0){ + n = -n; + x = -x; + } + if(n==0) return(j0l(x)); + if(n==1) return(j1l(x)); + if(x!=x) return x+x; + if((n&1)==0) + sgn=0; /* even n */ + else + sgn = signbitl(x); /* old n */ + x = fabsl(x); + if(x == zero||!finitel(x)) b = zero; + else if((GENERIC)n<=x) { /* Safe to use + J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + */ + if(x>1.0e91L) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s=sin(x), c=cos(x), + xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch(n&3) { + case 0: temp = cosl(x)+sinl(x); break; + case 1: temp = -cosl(x)+sinl(x); break; + case 2: temp = -cosl(x)-sinl(x); break; + case 3: temp = cosl(x)-sinl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = j0l(x); + b = j1l(x); + for(i=1;i 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quaduple + */ + /* determin k */ + GENERIC t,v; + double q0,q1,h,tmp; int k,m; + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; z = w+h; q1 = w*z - 1.0; k=1; + while(q1<1.0e17) { + k += 1; z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for(t=zero, i = 2*(n+k); i>=m; i -= 2) t = one/(i/x-t); + a = t; + b = one; + /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + hence, if n*(log(2n/x)) > ... + single 8.8722839355e+01 + double 7.09782712893383973096e+02 + long double 1.1356523406294143949491931077970765006170e+04 + then recurrent value may overflow and the result is + likely underflow to zero + */ + tmp = n; + v = two/x; + tmp = tmp*logl(fabsl(v*tmp)); + if(tmp<1.1356523406294143949491931077970765e+04L) { + for(i=n-1;i>0;i--){ + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + } + } else { + for(i=n-1;i>0;i--){ + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + if(b>1e1000L) { + a /= b; + t /= b; + b = 1.0; + } + } + } + b = (t*j0l(x)/b); + } + } + if(sgn==1) return -b; else return b; +} + +GENERIC ynl(n,x) +int n; GENERIC x;{ + int i; + int sign; + GENERIC a, b, temp; + + if(x!=x) return x+x; + if (x <= zero) + if(x==zero) + return -one/zero; + else + return zero/zero; + sign = 1; + if(n<0){ + n = -n; + if((n&1) == 1) sign = -1; + } + if(n==0) return(y0l(x)); + if(n==1) return(sign*y1l(x)); + if(!finitel(x)) return zero; + + if(x>1.0e91L) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s=sin(x), c=cos(x), + xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch(n&3) { + case 0: temp = sinl(x)-cosl(x); break; + case 1: temp = -sinl(x)-cosl(x); break; + case 2: temp = -sinl(x)+cosl(x); break; + case 3: temp = sinl(x)+cosl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = y0l(x); + b = y1l(x); + /* + * fix 1262058 and take care of non-default rounding + */ + for (i = 1; i < n; i++) { + temp = b; + b *= (GENERIC) (i + i) / x; + if (b <= -LDBL_MAX) + break; + b -= a; + a = temp; + } + } + if(sign>0) return b; else return -b; +} diff --git a/usr/src/libm/src/LD/lgammal.c b/usr/src/libm/src/LD/lgammal.c new file mode 100644 index 0000000..159ea47 --- /dev/null +++ b/usr/src/libm/src/LD/lgammal.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgammal.c 1.11 06/01/31 SMI" + +#pragma weak lgammal = __lgammal + +/* + * long double lgammal(long double x); + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +lgammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return y; +} diff --git a/usr/src/libm/src/LD/lgammal_r.c b/usr/src/libm/src/LD/lgammal_r.c new file mode 100644 index 0000000..373ccaf --- /dev/null +++ b/usr/src/libm/src/LD/lgammal_r.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgammal_r.c 1.4 06/01/31 SMI" + +/* + * long double lgammal_r(long double x, int *signgamlp); + */ + +#pragma weak lgammal_r = __lgammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +lgammal_r(long double x, int *signgamlp) { + return __k_lgammal(x, signgamlp); +} diff --git a/usr/src/libm/src/LD/log1pl.c b/usr/src/libm/src/LD/log1pl.c new file mode 100644 index 0000000..6391623 --- /dev/null +++ b/usr/src/libm/src/LD/log1pl.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log1pl.c 1.7 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak log1pl = __log1pl +#endif + +/* + * log1pl(x) + * Kahan's trick based on log(1+x)/x being a slow varying function. + */ + +#include "libm.h" + +#if defined(__i386) +#define __swapRD __swap87RD +#endif +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +long double +log1pl(long double x) { + long double y; + enum fp_direction_type rd; + + if (x != x) + return (x + x); + if (x < -1.L) + return (logl(x)); + rd = __swapRD(fp_nearest); + y = 1.L + x; + if (y != 1.L) + if (y == x) + x = logl(x); + else + x *= logl(y) / (y - 1.L); + if (rd != fp_nearest) + (void) __swapRD(rd); + return (x); +} diff --git a/usr/src/libm/src/LD/logbl.c b/usr/src/libm/src/LD/logbl.c new file mode 100644 index 0000000..52b4dc6 --- /dev/null +++ b/usr/src/libm/src/LD/logbl.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)logbl.c 1.10 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak logbl = __logbl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(__sparc) +#define ISNORMALL(k, x) (k != 0x7fff) /* assuming k != 0 */ +#define X86PDNRM(k, x) +#define XSCALE_OFFSET 0x406f /* 0x3fff + 112 */ +static const long double xscale = 5192296858534827628530496329220096.0L; + /* 2^112 */ +#elif defined(__i386) +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM(k, x) if (k == 0 && (((int *) &x)[1] & 0x80000000) != 0) \ + ((int *) &x)[2] |= k = 1 +#if defined(HANDLE_UNSUPPORTED) /* assuming k != 0 */ +#define ISNORMALL(k, x) (k != 0x7fff && (((int *) &x)[1] & 0x80000000) != 0) +#else +#define ISNORMALL(k, x) (k != 0x7fff) +#endif +#define XSCALE_OFFSET 0x403e /* 0x3fff + 63 */ +static const long double xscale = 9223372036854775808.0L; /* 2^63 */ +#endif + +static long double +raise_division(long double v) { +#pragma STDC FENV_ACCESS ON + static const long double zero = 0.0L; + return (v / zero); +} + +long double +logbl(long double x) { + int k = XBIASED_EXP(x); + + X86PDNRM(k, x); + if (k == 0) { + if (ISZEROL(x)) + return (raise_division(-1.0L)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { + x *= xscale; /* scale up by 2^112 or 2^63 */ + return (long double) (XBIASED_EXP(x) - XSCALE_OFFSET); + } else + return ((long double) (-16382)); + } else if (ISNORMALL(k, x)) + return ((long double) (k - 0x3fff)); + else + return (x * x); +} diff --git a/usr/src/libm/src/LD/longdouble.h b/usr/src/libm/src/LD/longdouble.h new file mode 100644 index 0000000..0a0c878 --- /dev/null +++ b/usr/src/libm/src/LD/longdouble.h @@ -0,0 +1,155 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)longdouble.h 1.8 06/01/31 SMI" + +#include + +extern long double __k_cosl(long double, long double); +extern long double __k_lgammal(long double, int *); +extern long double __k_sincosl(long double, long double, long double *); +extern long double __k_sinl(long double, long double); +extern long double __k_tanl(long double, long double, int); +extern long double __poly_libmq(long double, int, long double *); +extern int __rem_pio2l(long double, long double *); + +extern long double acosdl(long double); +extern long double acoshl(long double); +extern long double acosl(long double); +extern long double acospil(long double); +extern long double acospl(long double); +extern long double aintl(long double); +extern long double anintl(long double); +extern long double annuityl(long double, long double); +extern long double asindl(long double); +extern long double asinhl(long double); +extern long double asinl(long double); +extern long double asinpil(long double); +extern long double asinpl(long double); +extern long double atan2dl(long double, long double); +extern long double atan2l(long double, long double); +extern long double atan2pil(long double, long double); +extern long double atandl(long double); +extern long double atanhl(long double); +extern long double atanl(long double); +extern long double atanpil(long double); +extern long double atanpl(long double); +extern long double cbrtl(long double); +extern long double ceill(long double); +extern long double compoundl(long double, long double); +extern long double copysignl(long double, long double); +extern long double cosdl(long double); +extern long double coshl(long double); +extern long double cosl(long double); +extern long double cospil(long double); +extern long double cospl(long double); +extern long double erfcl(long double); +extern long double erfl(long double); +extern long double exp10l(long double); +extern long double exp2l(long double); +extern long double expl(long double); +extern long double expm1l(long double); +extern long double fabsl(long double); +extern int finitel(long double); +extern long double floorl(long double); +extern long double fmodl(long double, long double); +extern enum fp_class_type fp_classl(long double); +extern long double gammal(long double); +extern long double hypotl(long double, long double); +extern int ilogbl(long double); +extern long double infinityl(void); +extern int irintl(long double); +extern int isinfl(long double); +extern int isnanl(long double); +extern int isnormall(long double); +extern int issubnormall(long double); +extern int iszerol(long double); +extern long double j0l(long double); +extern long double j1l(long double); +extern long double jnl(int, long double); +extern long double lgammal(long double); +extern long double log10l(long double); +extern long double log1pl(long double); +extern long double log2l(long double); +extern long double logbl(long double); +extern long double logl(long double); +extern long double max_normall(void); +extern long double max_subnormall(void); +extern long double min_normall(void); +extern long double min_subnormall(void); +extern long double nextafterl(long double, long double); +extern int nintl(long double); +extern long double pow_li(long double *, int *); +extern long double powl(long double, long double); +extern long double quiet_nanl(long); +extern long double remainderl(long double, long double); +extern long double rintl(long double); +extern long double scalbl(long double, long double); +extern long double scalbnl(long double, int); +extern long double signaling_nanl(long); +extern int signbitl(long double); +extern long double significandl(long double); +extern void sincosdl(long double, long double *, long double *); +extern void sincosl(long double, long double *, long double *); +extern void sincospil(long double, long double *, long double *); +extern void sincospl(long double, long double *, long double *); +extern long double sindl(long double); +extern long double sinhl(long double); +extern long double sinl(long double); +extern long double sinpil(long double); +extern long double sinpl(long double); +extern long double sqrtl(long double); +extern long double tandl(long double); +extern long double tanhl(long double); +extern long double tanl(long double); +extern long double tanpil(long double); +extern long double tanpl(long double); +extern long double y0l(long double); +extern long double y1l(long double); +extern long double ynl(int, long double); + +extern long double q_copysign_(long double *, long double *); +extern long double q_fabs_(long double *); +extern int iq_finite_(long double *); +extern long double q_fmod_(long double *, long double *); +extern enum fp_class_type iq_fp_class_(long double *); +extern int iq_ilogb_(long double *); +extern long double q_infinity_(void); +extern int iq_isinf_(long double *); +extern int iq_isnan_(long double *); +extern int iq_isnormal_(long double *); +extern int iq_issubnormal_(long double *); +extern int iq_iszero_(long double *); +extern long double q_max_normal_(void); +extern long double q_max_subnormal_(void); +extern long double q_min_normal_(void); +extern long double q_min_subnormal_(void); +extern long double q_nextafter_(long double *, long double *); +extern long double q_quiet_nan_(long *); +extern long double q_remainder_(long double *, long double *); +extern long double q_scalbn_(long double *, int *); +extern long double q_signaling_nan_(long *); +extern int iq_signbit_(long double *); diff --git a/usr/src/libm/src/LD/nextafterl.c b/usr/src/libm/src/LD/nextafterl.c new file mode 100644 index 0000000..f1d042f --- /dev/null +++ b/usr/src/libm/src/LD/nextafterl.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nextafterl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nextafterl = __nextafterl +#endif + +#include "libm.h" +#include /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__i386) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nextafterl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/libm/src/LD/scalbl.c b/usr/src/libm/src/LD/scalbl.c new file mode 100644 index 0000000..b49ec2d --- /dev/null +++ b/usr/src/libm/src/LD/scalbl.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbl.c 1.8 06/01/31 SMI" + +#pragma weak scalbl = __scalbl + +/* + * scalbl(x,n): return x * 2**n by manipulating exponent. + */ + +#include "libm.h" +#include "longdouble.h" + +long double +scalbl(long double x, long double fn) { + int *py = (int *) &fn, n; + long double z; + + if (isnanl(x) || isnanl(fn)) + return x * fn; + + /* fn is +/-Inf */ +#if !defined(__i386) + if ((py[0] & 0x7fff0000) == 0x7fff0000) + if ((py[0] & 0x80000000) != 0) +#else + if ((py[2] & 0x7fff) == 0x7fff) + if ((py[2] & 0x8000) != 0) +#endif + return x / (-fn); + else + return x * fn; + + if (rintl(fn) != fn) + return (fn - fn) / (fn - fn); + if (fn > 65000.0L) + z = scalbnl(x, 65000); + else if (-fn > 65000.0L) + z = scalbnl(x, -65000); + else { + n = (int) fn; + z = scalbnl(x, n); + } + return z; +} diff --git a/usr/src/libm/src/LD/signgaml.c b/usr/src/libm/src/LD/signgaml.c new file mode 100644 index 0000000..a11970c --- /dev/null +++ b/usr/src/libm/src/LD/signgaml.c @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)signgaml.c 1.4 06/01/31 SMI" + +#pragma weak signgaml = __signgaml + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +int signgaml = 0; diff --git a/usr/src/libm/src/LD/significandl.c b/usr/src/libm/src/LD/significandl.c new file mode 100644 index 0000000..8c31902 --- /dev/null +++ b/usr/src/libm/src/LD/significandl.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)significandl.c 1.8 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak significandl = __significandl +#endif + +#include "libm.h" + +long double +significandl(long double x) { + if (ISZEROL(x) || XBIASED_EXP(x) == 0x7fff) /* 0/+-Inf/NaN */ + return (x + x); + else + return (scalbnl(x, -ilogbl(x))); +} diff --git a/usr/src/libm/src/LD/sincosl.c b/usr/src/libm/src/LD/sincosl.c new file mode 100644 index 0000000..1cf0205 --- /dev/null +++ b/usr/src/libm/src/LD/sincosl.c @@ -0,0 +1,112 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincosl.c 1.9 06/01/31 SMI" + +#pragma weak sincosl = __sincosl + +/* INDENT OFF */ +/* cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sincosl ... sin and cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +void +sincosl(long double x, long double *s, long double *c) { + long double y[2], z = 0.0L; + int n, ix; +#if defined(__i386) + int *px = (int *) &x; +#endif + + /* trig(Inf or NaN) is NaN */ + if (!finitel(x)) { + *s = *c = x - x; + return; + } + + /* High word of x. */ +#if !defined(__i386) + ix = *(int *) &x; +#else + XTOI(px, ix); +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + *s = __k_sincosl(x, z, c); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + *s = __k_sincosl(y[0], y[1], c); + break; + case 1: + *c = -__k_sincosl(y[0], y[1], s); + break; + case 2: + *s = -__k_sincosl(y[0], y[1], c); + *c = -*c; + break; + case 3: + *c = __k_sincosl(y[0], y[1], s); + *s = -*s; + } + } +} diff --git a/usr/src/libm/src/LD/sincospil.c b/usr/src/libm/src/LD/sincospil.c new file mode 100644 index 0000000..d454aaf --- /dev/null +++ b/usr/src/libm/src/LD/sincospil.c @@ -0,0 +1,205 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincospil.c 1.11 06/01/31 SMI" + +#pragma weak sincospil = __sincospil + +/* + * void sincospil(long double x, long double *s, long double *c) + * *s = sinl(pi*x); *c = cosl(pi*x); + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y==z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__LITTLE_ENDIAN) || defined(__i386) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof(long double) / sizeof(int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +void +sincospil(long double x, long double *s, long double *c) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) { + *s = *c = x - x; + } + else { + if (k >= PREC) { + *s = zero; + *c = one; + } + else if (k == PRECM1) { + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } + else { + *s = -zero; + *c = -one; + } + } + else { /* k = Prec - 2 */ + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } + else { + *s = one; + *c = zero; + } + if ((lx & 2) != 0) { + *s = -*s; + *c = -*c; + } + } + } + } + else if (k < -2) /* |x| < 0.25 */ + *s = __k_sincosl(pi * fabsl(x), zero, c); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } + else + t = (y - t) * quater; + } + else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if((n & 1) != 0) + *s = *c = sqrth + tiny; + else + if ((n & 2) == 0) { + *s = zero; + *c = one; + } + else { + *s = one; + *c = zero; + } + if ((n & 4) != 0) + *s = -*s; + if (((n + 1) & 4) != 0) + *c = -*c; + } + else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + *s = __k_sincosl(pi * t, zero, c); + else + *c = __k_sincosl(pi * t, zero, s); + if ((n & 4) != 0) + *s = -*s; + if (((n + 2) & 4) != 0) + *c = -*c; + } + } + if (hx < 0) + *s = -*s; +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/libm/src/LD/sinhl.c b/usr/src/libm/src/LD/sinhl.c new file mode 100644 index 0000000..cde48f3 --- /dev/null +++ b/usr/src/libm/src/LD/sinhl.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinhl.c 1.7 06/01/31 SMI" + +#pragma weak sinhl = __sinhl + +#include "libm.h" + +/* SINH(X) + * RETURN THE HYPERBOLIC SINE OF X + * + * Method : + * 1. reduce x to non-negative by SINH(-x) = - SINH(x). + * 2. + * + * EXPM1(x) + EXPM1(x)/(EXPM1(x)+1) + * 0 <= x <= lnovft : SINH(x) := -------------------------------- + * 2 + * + * lnovft <= x < INF : SINH(x) := EXP(x-MEP1*ln2)*2**ME + * + * here + * lnovft logarithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * SINH(x) is x if x is +INF, -INF, or NaN. + * only SINH(0)=0 is exact for finite argument. + * + */ + +static const long double C[] = { + 0.5L, + 1.0L, + 1.135652340629414394879149e+04L, + 7.004447686242549087858985e-16L +}; + +#define half C[0] +#define one C[1] +#define lnovft C[2] +#define lnovlo C[3] + +long double +sinhl(long double x) +{ + long double r, t; + + if (!finitel(x)) + return (x + x); /* x is INF or NaN */ + r = fabsl(x); + if (r < lnovft) { + t = expm1l(r); + r = copysignl((t + t / (one + t)) * half, x); + } else { + r = copysignl(expl((r - lnovft) - lnovlo), x); + r = scalbnl(r, 16383); + } + return (r); +} diff --git a/usr/src/libm/src/LD/sinl.c b/usr/src/libm/src/LD/sinl.c new file mode 100644 index 0000000..ebeb5da --- /dev/null +++ b/usr/src/libm/src/LD/sinl.c @@ -0,0 +1,107 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinl.c 1.9 06/01/31 SMI" + +#pragma weak sinl = __sinl + +/* INDENT OFF */ +/* sinl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +long double +sinl(long double x) { + long double y[2], z = 0.0L; + int n, ix; +#if defined(__i386) + int *px = (int *) &x; +#endif + + /* sin(Inf or NaN) is NaN */ + if (!finitel(x)) + return x - x; + + /* High word of x. */ +#if !defined(__i386) + ix = *(int *) &x; +#else + XTOI(px, ix); +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + return __k_sinl(x, z); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return __k_sinl(y[0], y[1]); + case 1: + return __k_cosl(y[0], y[1]); + case 2: + return -__k_sinl(y[0], y[1]); + case 3: + return -__k_cosl(y[0], y[1]); + /* NOTREACHED */ + } + } +} diff --git a/usr/src/libm/src/LD/sinpil.c b/usr/src/libm/src/LD/sinpil.c new file mode 100644 index 0000000..9114037 --- /dev/null +++ b/usr/src/libm/src/LD/sinpil.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinpil.c 1.11 06/01/31 SMI" + +#pragma weak sinpil = __sinpil + +/* long double sinpil(long double x), + * return long double precision sinl(pi*x). + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y==z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__LITTLE_ENDIAN) || defined(__i386) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof(long double) / sizeof(int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +long double +sinpil(long double x) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) + y = x - x; + else { + if (k >= PREC) + y = zero; + else if (k == PRECM1) + y = (lx & 1) == 0 ? zero: -zero; + else { /* k = Prec - 2 */ + y = (lx & 1) == 0 ? zero : one; + if ((lx & 2) != 0) + y = -y; + } + } + } + else if (k < -2) /* |x| < 0.25 */ + y = __k_sinl(pi * fabsl(x), zero); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } + else + t = (y - t) * quater; + } + else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if((n & 1) != 0) + y = sqrth + tiny; + else + y = (n & 2) == 0 ? zero : one; + if ((n & 4) != 0) + y = -y; + } + else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + y = __k_sinl(pi * t, zero); + else + y = __k_cosl(pi * t, zero); + if ((n & 4) != 0) + y = -y; + } + } + return hx >= 0 ? y : -y; +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/libm/src/LD/tanhl.c b/usr/src/libm/src/LD/tanhl.c new file mode 100644 index 0000000..a9ffad7 --- /dev/null +++ b/usr/src/libm/src/LD/tanhl.c @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanhl.c 1.7 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak tanhl = __tanhl +#endif + +/* + * tanhl(x) returns the hyperbolic tangent of x + * + * Method : + * 1. reduce x to non-negative: tanhl(-x) = - tanhl(x). + * 2. + * 0 < x <= small : tanhl(x) := x + * -expm1l(-2x) + * small < x <= 1 : tanhl(x) := -------------- + * expm1l(-2x) + 2 + * 2 + * 1 <= x <= threshold : tanhl(x) := 1 - --------------- + * expm1l(2x) + 2 + * threshold < x <= INF : tanhl(x) := 1. + * + * where + * single : small = 1.e-5 threshold = 11.0 + * double : small = 1.e-10 threshold = 22.0 + * quad : small = 1.e-20 threshold = 45.0 + * + * Note: threshold was chosen so that + * fl(1.0+2/(expm1(2*threshold)+2)) == 1. + * + * Special cases: + * tanhl(NaN) is NaN; + * only tanhl(0.0)=0.0 is exact for finite argument. + */ + +#include "libm.h" + +static const long double small = 1.0e-20L, one = 1.0, two = 2.0, +#ifndef lint + big = 1.0e+20L, +#endif + threshold = 45.0L; + +long double +tanhl(long double x) { + long double t, y, z; + int signx; + + if (isnanl(x)) + return (x + x); /* x is NaN */ + signx = signbitl(x); + t = fabsl(x); + z = one; + if (t <= threshold) { + if (t > one) + z = one - two / (expm1l(t + t) + two); + else if (t > small) { + y = expm1l(-t - t); + z = -y / (y + two); + } else { +#ifndef lint + volatile long double dummy = t + big; + /* inexact if t != 0 */ +#endif + return (x); + } + } else if (!finitel(t)) + return (copysignl(one, x)); + else + return (signx ? -z + small * small : z - small * small); + return (signx ? -z : z); +} diff --git a/usr/src/libm/src/LD/tanl.c b/usr/src/libm/src/LD/tanl.c new file mode 100644 index 0000000..8231612 --- /dev/null +++ b/usr/src/libm/src/LD/tanl.c @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanl.c 1.9 06/01/31 SMI" + +#pragma weak tanl = __tanl + +/* INDENT OFF */ +/* cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_tanl ... tangent function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "longdouble.h" + +long double +tanl(long double x) { + long double y[2], z = 0.0L; + int n, ix; +#if defined(__i386) + int *px = (int *) &x; +#endif + + /* trig(Inf or NaN) is NaN */ + if (!finitel(x)) + return x - x; + + /* High word of x. */ +#if !defined(__i386) + ix = *(int *) &x; +#else + XTOI(px, ix); +#endif + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + return __k_tanl(x, z, 0); + + /* argument reduction needed */ + else { + n = __rem_pio2l(x, y); + return __k_tanl(y[0], y[1], n & 1); + } +} diff --git a/usr/src/libm/src/Q/_TBL_atanl.c b/usr/src/libm/src/Q/_TBL_atanl.c new file mode 100644 index 0000000..033c857 --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_atanl.c @@ -0,0 +1,234 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_atanl.c 1.8 06/01/31 SMI" + +/* + * Table of constants for atanl. + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const long double _TBL_atanl_hi[] = { + +1.243549945467614350313548491638710241657e-0001L, + +1.320397616146387492746844065265695322625e-0001L, + +1.397088742891636451833677767390950568161e-0001L, + +1.473614810886516356098027603968455182107e-0001L, + +1.549967419239409823037143749334921913337e-0001L, + +1.626138285979485753736415637615578006202e-0001L, + +1.702119252854744044904966070997617136954e-0001L, + +1.777902289926760707966247992158246889946e-0001L, + +1.853479499956947648860259612285446466726e-0001L, + +1.928843122579746641970587106902273034988e-0001L, + +2.003985538258785146539457850343783844615e-0001L, + +2.078899272022629936053349831029943247563e-0001L, + +2.153576996977380480244596271664896416574e-0001L, + +2.228011537593945157710321221404325552502e-0001L, + +2.302195872768437302401709596798029906555e-0001L, + +2.376123138654712524738836343256377791989e-0001L, + +2.449786631268641541720824812112758064196e-0001L, + +2.596296294082575310299464431839719056011e-0001L, + +2.741674511196587975993718983421757859244e-0001L, + +2.885873618940773956236114199582183450433e-0001L, + +3.028848683749714055605560945055582181228e-0001L, + +3.170557532091470098090155766744673297585e-0001L, + +3.310960767041320949443387877569445542126e-0001L, + +3.450021772071051088676812869000516840829e-0001L, + +3.587706702705722203959200639264605221536e-0001L, + +3.723984466767542219236550382837018264141e-0001L, + +3.858826693980737758976954846072313963819e-0001L, + +3.992207695752525656147166961588647649110e-0001L, + +4.124104415973873068997912896671269426092e-0001L, + +4.254496373700422895422636051807923301382e-0001L, + +4.383365598579578054456160492147713089588e-0001L, + +4.510696559885234763756392572821934407380e-0001L, + +4.636476090008061162142562314612143971334e-0001L, + +4.883339510564055238671649607470648445964e-0001L, + +5.123894603107377066666010205842592380556e-0001L, + +5.358112379604637002690850687076914469847e-0001L, + +5.585993153435624359715082164016612287587e-0001L, + +5.807563535676703992032744750015008237512e-0001L, + +6.022873461349641816821226942042329192246e-0001L, + +6.231993299340659309924753490603745936779e-0001L, + +6.435011087932843868028092287173226044727e-0001L, + +6.632029927060932553632543102382758341723e-0001L, + +6.823165548747480782564299817111529878473e-0001L, + +7.008544078844501724579512817867512731862e-0001L, + +7.188299996216245054170141515259046989104e-0001L, + +7.362574289814281317428352710891466247927e-0001L, + +7.531512809621943895247393702690288860057e-0001L, + +7.695264804056582604068200359856540172660e-0001L, + +7.853981633974483096156608458198756993698e-0001L, + +8.156919233162234110214608387456458267228e-0001L, + +8.441539861131710025178441482716474673863e-0001L, + +8.709034570756529531401731125978140729165e-0001L, + +8.960553845713439561748007180299377954660e-0001L, + +9.197196053504168172286034548210894096931e-0001L, + +9.420000403794636647379371705345936211589e-0001L, + +9.629943306809362018151958359970998967730e-0001L, + +9.827937232473290679857106110146660376257e-0001L, + +1.001483135694234732918329595301437489634e+0000L, + +1.019141344266349734638342917023063621235e+0000L, + +1.035841253008800176584694470325444073548e+0000L, + +1.051650212548373667459867312086299902692e+0000L, + +1.066630365315743563079176347420279908601e+0000L, + +1.080839000541168310887156729217199785900e+0000L, + +1.094328907321189919892788314610235276303e+0000L, + +1.107148717794090503017065460178537049754e+0000L, + +1.130953743979160446470933515536327756003e+0000L, + +1.152571997215667518040149862612751467283e+0000L, + +1.172273881128476386600594944133704600686e+0000L, + +1.190289949682531732927733774829318280338e+0000L, + +1.206817370285252530395511580056557662568e+0000L, + +1.222025323210989637041741743922570412029e+0000L, + +1.236059489478081941909451971109078614621e+0000L, + +1.249045772398254425829917077281090048355e+0000L, + +1.261093382252440419313940881247335764012e+0000L, + +1.272297395208717341296193749822480574646e+0000L, + +1.282740879744270747362885251136495516407e+0000L, + +1.292496667789785267903091421407081672353e+0000L, + +1.301628834009196143804785850366685502445e+0000L, + +1.310193935047555634256437689171905343754e+0000L, + +1.318242051016837049859330202327136304043e+0000L, + +1.325817663668032465059239210428475688616e+0000L, + +1.339705659598999539328303752589555785024e+0000L, + +1.352127380920954657189147941389812759877e+0000L, + +1.363300100359693954289298527825099156027e+0000L, + +1.373400766945015860861271926444961060484e+0000L, + +1.382574821490125858059967417768568516395e+0000L, + +1.390942827002418348642768694383643239549e+0000L, + +1.398605512271957595012670081611428272786e+0000L, + +1.405647649380269780952193401995808066441e+0000L, + +1.412141064608495215367613671858489085282e+0000L, + +1.418146998399631459403860303970098863261e+0000L, + +1.423717971406494118901819046610729710890e+0000L, + +1.428899272190732696418470074537198400139e+0000L, + +1.433730152484708986640471909669887388026e+0000L, + +1.438244794498222597961404247935481603967e+0000L, + +1.442473099109101820025292059937729181035e+0000L, + +1.446441332248135184199966842475880386611e+0000L, +}; + +const long double _TBL_atanl_lo[] = { + +1.407486919762806380231720282041430859065e-0036L, + -4.959696159473992555573043943799966949987e-0036L, + +8.952774562519464887393121344636183788152e-0036L, + +1.188043742320789571818076584354496443030e-0035L, + -2.781027811204514537842537512823435451463e-0037L, + +1.479722037702380032729553623431514726239e-0036L, + -4.216956140054819873287038480184963406819e-0036L, + +7.243122966691348464993032365631602349468e-0036L, + -2.157343008983917029989567935379065159119e-0036L, + -9.951574540512672355445236729812860518631e-0036L, + -3.906555899232483818161756973039787656743e-0036L, + +5.526029227179372681321198066466113031444e-0036L, + +8.841572221591432180768225431803645204369e-0036L, + -8.176772879158617925419332362828558820944e-0036L, + -1.334412303465614224379711382302833876421e-0036L, + -4.492733120781338290893073392468132589219e-0036L, + +4.494551147181249039320182433676250148336e-0036L, + -1.668808150427922355577672445964844056727e-0035L, + +1.562975758610795576946108656893732968411e-0035L, + -2.238983556330807855250797038533151084811e-0035L, + -4.831232174554731155187045067118216295832e-0036L, + -1.433617235290583287695892661098069884431e-0035L, + -8.744018199889993280298917417096058172481e-0036L, + +5.928463600852983744578036078546455593865e-0036L, + -2.237665124843624127606105529504351499363e-0035L, + +6.074583759933610541428031075667744213648e-0036L, + +1.537218711045194967779234476202996702309e-0035L, + +2.097606805675115624165712158247879024716e-0035L, + -5.562395640549543806072686220262281911497e-0036L, + +1.969736670783247184185841193489735190152e-0035L, + +2.107031196447948850903473363942488754370e-0035L, + -2.302735636298200160225651851085422984456e-0035L, + +4.895096422573334926686184352202977056848e-0036L, + -7.238014347779445821387272305082026475766e-0036L, + +1.636564886570361403163744339604956885811e-0035L, + -3.988581195823453079372912991980323419740e-0035L, + +4.158772212091261351041778392322742597344e-0035L, + +3.834742145455647215368468737733713502739e-0035L, + -9.225117893363872172351589646548899090659e-0036L, + +1.409461969045598952617573674185465039654e-0036L, + +3.356885780547223527061285142581080367945e-0035L, + +3.909099105552255239501810680323211880340e-0035L, + +5.295641697965420814052186270729703965359e-0036L, + -5.096084681994551436784706392366250713672e-0036L, + -4.495901442527761585832968039391831520500e-0035L, + +3.803922654455163426656685761596261429034e-0035L, + -4.405652287289551210830864219661168965762e-0036L, + +1.602502419248216107622380775342561907695e-0036L, + +2.167952532530945256199261006510838063526e-0035L, + +1.984403801351542212571536292573675410407e-0035L, + +3.913961947179974683450522735356843245724e-0035L, + +2.111344380797545350551845343679956185473e-0035L, + +3.155855727744469275503981694439277018543e-0035L, + +1.629504452035546140826558561950023833561e-0035L, + -3.508724520927030585615123035617120894580e-0035L, + +2.904104186428285567959105527094611730009e-0035L, + -2.312884345381835659093199520980662723328e-0035L, + -7.712492318147157843996797382071597987481e-0035L, + +2.753902782988692242909206359044995381933e-0035L, + -9.450089945318130895108454599083752773445e-0035L, + -7.306175530203209233759494600164318159101e-0035L, + -4.173614481395375219395277015740431906643e-0035L, + +3.436994835625640704534485526286425749647e-0035L, + -6.379024349229809090730208492427563489748e-0035L, + -9.684294381635326129100412786609400488464e-0036L, + +4.874675753913887090927595832669806057728e-0035L, + -8.753388647708419088451160136858547852751e-0035L, + +1.428474399232791889269255113808220484160e-0035L, + +5.726277621107338954256562569347449057228e-0035L, + -3.225488314878041124559482227075035491317e-0035L, + +7.885354819060987732596552525237673513561e-0035L, + +8.408173673903719409751503836536882928318e-0035L, + +7.472287035756368381507824298193454239425e-0035L, + +7.997720282579343528943481360087007043974e-0036L, + -8.057784077336213905484849234629395332153e-0035L, + +1.421774675367058306549004020905308580426e-0035L, + +1.223248691422120500410974356032312699327e-0035L, + +8.969605507083003644736195721794664042146e-0035L, + -3.148039443508188441068606673984936704609e-0035L, + -5.092714604071534501324064251761157116236e-0035L, + -5.743199771592413656813385943270585886166e-0035L, + -4.392045140508377027909976608047950844300e-0035L, + +9.110675398490771556301866677631321964372e-0035L, + -3.703256901427284100951240077306435653503e-0035L, + +8.816741942974671427690982540513176913907e-0035L, + -3.838934169602835250375231286170331051923e-0036L, + -3.346295934196089154634089550801425121335e-0035L, + -3.921262677678607438391618849895555508099e-0035L, + -7.834039739637786725586449456859141775022e-0035L, + +7.468101863245698652060064034062436100558e-0035L, + +8.911091861895691845113559487616548179839e-0035L, + +3.941816063227189053043179714566870857491e-0035L, + -4.104811408858010482019343563832718161219e-0035L, + -2.316541945158215332638394475622094450115e-0035L, + -1.842831258152531940939933020370545982007e-0035L, + +7.147731654670948234541171201790940212220e-0035L, + +2.991450157843587466215363770701953452571e-0035L, +}; diff --git a/usr/src/libm/src/Q/_TBL_cosl.c b/usr/src/libm/src/Q/_TBL_cosl.c new file mode 100644 index 0000000..8bdd3be --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_cosl.c @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_cosl.c 1.9 06/01/31 SMI" + +/* + * table of cosl(x) where x is 0.15625 + i*0.03125, i=0,1,...,74. + * {0x3ffc4000,0,0,0} --> (inc 0x800) --> {0x3ffe9000,0,0,0} + * 0.15625 0.03125 0.78125 (pi/4 = 0.785395663...) + */ + +#include "libm.h" + +const long double _TBL_cosl_hi[] = { + +9.878177838164719441005030343632113165093e-0001L, + +9.872023778548304903960885335116224443952e-0001L, + +9.865719083994975887573374074953084086015e-0001L, + +9.859263850706614357470592528694354441508e-0001L, + +9.852658177182138162042947097595789939359e-0001L, + +9.845902164215998060143951077820687364441e-0001L, + +9.838995914896639721783093514164872453367e-0001L, + +9.831939534604930725278757612989684275134e-0001L, + +9.824733131012552574873276832436224950147e-0001L, + +9.817376814080357763345961479047090031971e-0001L, + +9.809870696056691904693298964353096645569e-0001L, + +9.802214891475680962478518674217420182872e-0001L, + +9.794409517155483599985309545029874933258e-0001L, + +9.786454692196508678842676797432752842534e-0001L, + +9.778350537979597933319715729444545493330e-0001L, + +9.770097178164173848020456902145767884624e-0001L, + +9.761694738686352767239890354351355336967e-0001L, + +9.753143347757023264772798556222610938601e-0001L, + +9.744443135859889803497110560454343440501e-0001L, + +9.735594235749481714583125145098981012012e-0001L, + +9.726596782449127526709130582675652597851e-0001L, + +9.717450913248894676192664941325029643211e-0001L, + +9.708156767703494629474905457850460270255e-0001L, + +9.698714487630153449923440459169307761267e-0001L, + +9.689124217106447841445954494941892053405e-0001L, + +9.669500292306778220083416236105315034050e-0001L, + +9.649286191047710095810746653157483714001e-0001L, + +9.628483147093796998997010934802143646862e-0001L, + +9.607092430155619030666593505813134717046e-0001L, + +9.585115345812286273019694081549198217856e-0001L, + +9.562553235431752969755999422630283611690e-0001L, + +9.539407476088947339813247959876116228319e-0001L, + +9.515679480481722021454882173642709621657e-0001L, + +9.491370696844630276658474217621056230077e-0001L, + +9.466482608860533218460995072955329761108e-0001L, + +9.441016735570043456300176912531248599600e-0001L, + +9.414974631278810686445112360536708146537e-0001L, + +9.388357885462654886325783059847125541586e-0001L, + +9.361168122670552902942374110195085880318e-0001L, + +9.333407002425484356552992294699955265909e-0001L, + +9.305076219123142911494767922295554806411e-0001L, + +9.276177501928519096280307987999613501918e-0001L, + +9.246712614670360985021130145601387709996e-0001L, + +9.216683355733519181754113682027127142383e-0001L, + +9.186091557949182678378249777185498625801e-0001L, + +9.154939088483012285639177321802218816645e-0001L, + +9.123227848721178464920295420473417337577e-0001L, + +9.090959774154310516503817356844764174905e-0001L, + +9.058136834259364207445166606527002577088e-0001L, + +9.024761032379415049251832726758959994948e-0001L, + +8.990834405601384562165449292093793065380e-0001L, + +8.956359024631706989005700004462563503448e-0001L, + +8.921336993669944047239002537237885750767e-0001L, + +8.885770450280355433176090231160209800973e-0001L, + +8.849661565261432916972965369666479264236e-0001L, + +8.813012542513405991401619082981001728813e-0001L, + +8.775825618903727161162815826038296809401e-0001L, + +8.699847180584173888289155999014662429887e-0001L, + +8.621744799348805043671625102533242741250e-0001L, + +8.541537542773853851434517851051031764412e-0001L, + +8.459244992310679544597230785974932624246e-0001L, + +8.374887238505236853153533489172406171513e-0001L, + +8.288484876093257348101717901191166381510e-0001L, + +8.200058998972340082555506338765560425268e-0001L, + +8.109631195052179021895348039410807243520e-0001L, + +8.017223540984184506074926056529642078277e-0001L, + +7.922858596771785431415013237817093985302e-0001L, + +7.826559400262727969307874474281390259485e-0001L, + +7.728349461524715448108518459134251775639e-0001L, + +7.628252757105762505070987536254297918621e-0001L, + +7.526293724180664760545413248471431159893e-0001L, + +7.422497254585013069913472534496105367206e-0001L, + +7.316888688738208863118387530000845290150e-0001L, + +7.209493809456964180438127841484476879092e-0001L, + +7.100338835660796749741216439594902194333e-0001L, +}; + +const long double _TBL_cosl_lo[] = { + +4.742713078367058978924681076205264183648e-0035L, + -3.400922580038153352909034207677181560093e-0035L, + -2.473279499369853624762524012127207246323e-0035L, + -3.902320877004518000716232064546238578734e-0035L, + +2.265680295058180661415174977785279521173e-0035L, + -2.254772246444203259170588302104662991085e-0036L, + +2.734143189480662078104863307237612648780e-0035L, + -3.701912560693446438656202168446355677822e-0035L, + -1.649243588915575846254638680142303422320e-0035L, + +2.725042655698714891044457001868653187367e-0035L, + -1.908992594100964198869963315362783449712e-0036L, + -1.465547554627127716918860559012698704471e-0035L, + +4.428780565915607570668447972900679899952e-0035L, + +1.439313657623768907227720140857454695843e-0035L, + -3.792074229051804169372108537791927020038e-0035L, + -2.610779485320152706286660129045188117210e-0036L, + -2.877279742494815830479448606269854599891e-0035L, + +3.991065835589256680020290949615723238476e-0035L, + +3.099479059550534193045145385925483327991e-0035L, + +1.146611686911982702287167679510021879695e-0035L, + -3.917592318193149049660769585602527582231e-0035L, + -1.951971321999985008371800682574139933978e-0035L, + +2.974588209723938591252776820212028367960e-0035L, + -2.038390756570426530537115267786908745116e-0036L, + -5.536347061134619893988732877493263844943e-0036L, + -4.389722144327924120620880599904805370946e-0035L, + -3.666858326708207750024755456027611364938e-0035L, + +4.889869663833434507994220130518213362272e-0036L, + -5.870115582315839607120133516012219562069e-0036L, + +2.507707793716364811457350893931543805685e-0035L, + +3.216165721908659970511036451358372071749e-0035L, + +2.880756890524786020083959729246571876109e-0035L, + +6.368426285981156583087492887998846060579e-0036L, + +6.844339659916371522503091904688601360028e-0037L, + -4.329063396630008909415294204988246215817e-0035L, + +1.038125352401202296098224611721455839121e-0035L, + +3.207093666031656020715902410548849578474e-0036L, + -3.987580687739740313485850727522454807713e-0035L, + +3.404815912367106584354098624390321615909e-0035L, + -4.752557072516798311248008988313821999362e-0035L, + +2.745410885517329825733352856854160918801e-0035L, + +7.585203719163457562812011671268547121453e-0036L, + -4.141871248600318251086493472511758380472e-0035L, + -1.835879954339576229487102635414793218992e-0035L, + +2.976082827782744334600577457984098492775e-0035L, + -3.507755179553069548150909011683056358498e-0035L, + +7.869038865563736742679481321788455681309e-0036L, + +1.208860140284441557337760250856779527931e-0035L, + -3.609503076059411697756765630044671398302e-0035L, + +2.262828995013444190183062956802106020046e-0035L, + -2.067726154909043706666702751547519756391e-0035L, + +3.735937416598668830886204955423117851511e-0035L, + -1.107719376025673147326930792646924920884e-0035L, + +4.123542789546647314438136551770221119198e-0036L, + +4.533705702883256304420378263134621416396e-0035L, + -1.434191923121166877839456190096294453634e-0035L, + -2.894849601813639248551925385406988512004e-0035L, + -4.681686383005756267827413197921838600437e-0035L, + -3.715568183175335822345624718357717998947e-0035L, + -1.687075340130951528732220617225731715663e-0035L, + +1.980549471419898781791643429252740528544e-0035L, + +2.727619978720845330457777186773261559081e-0035L, + +1.430825081004965817190481755062397701422e-0035L, + -1.720088119552308234167243322979912469421e-0035L, + +1.104812928567944364260514024188043464704e-0035L, + +6.094878513052330893256279394589637408556e-0036L, + +2.475195582284731678792488916738076213891e-0035L, + +1.693320456792379194278077712885062541662e-0035L, + +3.949752293412116642372415347411469162440e-0035L, + +4.220674118886015050047489393823250795070e-0035L, + +3.713069586576631896654508643111045710544e-0035L, + -3.789252700498009135399234738712875263543e-0035L, + +1.482556375489316971849917102931986196306e-0035L, + +4.786912857336733794995363260508118324272e-0035L, + -4.096232247636924432208967529079024417475e-0035L, +}; diff --git a/usr/src/libm/src/Q/_TBL_expl.c b/usr/src/libm/src/Q/_TBL_expl.c new file mode 100644 index 0000000..b586c77 --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_expl.c @@ -0,0 +1,104 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_expl.c 1.8 06/01/31 SMI" + +/* + * Table of constants for expl. + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const long double _TBL_expl_hi[] = { + +1.000000000000000000000000000000000000000e+0000L, + +1.021897148654116678234480134783299439782e+0000L, + +1.044273782427413840321966478739929008785e+0000L, + +1.067140400676823618169521120992809162607e+0000L, + +1.090507732665257659207010655760707978993e+0000L, + +1.114386742595892536308812956919603067800e+0000L, + +1.138788634756691653703830283841511254720e+0000L, + +1.163724858777577513813573599092185312343e+0000L, + +1.189207115002721066717499970560475915293e+0000L, + +1.215247359980468878116520251338798457624e+0000L, + +1.241857812073484048593677468726595605511e+0000L, + +1.269050957191733222554419081032338004715e+0000L, + +1.296839554651009665933754117792451159835e+0000L, + +1.325236643159741294629537095498721674113e+0000L, + +1.354255546936892728298014740140702804343e+0000L, + +1.383909881963831954872659527265192818002e+0000L, + +1.414213562373095048801688724209698078570e+0000L, + +1.445180806977046620037006241471670905678e+0000L, + +1.476826145939499311386907480374049923924e+0000L, + +1.509164427593422739766019551033193531420e+0000L, + +1.542210825407940823612291862090734841307e+0000L, + +1.575980845107886486455270160181905008906e+0000L, + +1.610490331949254308179520667357400583459e+0000L, + +1.645755478153964844518756724725822445667e+0000L, + +1.681792830507429086062250952466429790080e+0000L, + +1.718619298122477915629344376456312504516e+0000L, + +1.756252160373299483112160619375313221294e+0000L, + +1.794709075003107186427703242127781814354e+0000L, + +1.834008086409342463487083189588288856077e+0000L, + +1.874167634110299901329998949954446534439e+0000L, + +1.915206561397147293872611270295830887850e+0000L, + +1.957144124175400269018322251626871491190e+0000L, +}; + +const long double _TBL_expl_lo[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.805067874203309547455733330545737864651e-0035L, + -9.374520292280427421957567419730832143843e-0035L, + -1.596968447292758770712909630231499971233e-0035L, + +9.112493410125022978511686101672486662119e-0035L, + -6.504228206978548287230374775259388710985e-0035L, + -8.148468844525851137325691767488155323605e-0035L, + -5.066214576721800313372330745142903350963e-0035L, + -1.359830974688816973749875638245919118924e-0035L, + +9.497427635563196470307710566433246597109e-0035L, + -3.283170523176998601615065965333915261932e-0036L, + -5.017235709387190410290186530458428950862e-0035L, + -2.391474797689109171622834301602640139258e-0035L, + -8.350571357633908815298890737944083853080e-0036L, + +7.036756889073265042421737190671876440729e-0035L, + -5.182484853064646457536893018566956189817e-0035L, + +9.422242548621832065692116736394064879758e-0035L, + -3.967500825398862309167306130216418281103e-0035L, + +7.143528991563300614523273615092767243521e-0035L, + +1.159871252867985124246517834100444327747e-0035L, + +4.696933478358115495309739213201874466685e-0035L, + -3.386513175995004710799241984999819165197e-0035L, + -8.587318774298247068868655935103874453522e-0035L, + -9.605951548749350503185499362246069088835e-0035L, + +9.609733932128012784507558697141785813655e-0035L, + +6.378397921440028439244761449780848545957e-0035L, + +7.792430785695864249456461125169277701177e-0035L, + +7.361337767588456524131930836633932195088e-0035L, + -6.472995147913347230035214575612170525266e-0035L, + +8.587474417953698694278798062295229624207e-0035L, + +2.371815422825174835691651228302690977951e-0035L, + -3.026891682096118773004597373421900314256e-0037L, +}; diff --git a/usr/src/libm/src/Q/_TBL_expm1l.c b/usr/src/libm/src/Q/_TBL_expm1l.c new file mode 100644 index 0000000..67b5148 --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_expm1l.c @@ -0,0 +1,367 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_expm1l.c 1.4 06/01/31 SMI" + +/* + * Table of constants for expm1l. + * By K.C. Ng, June 30, 1995 + */ + +#include "libm.h" + +const long double _TBL_expm1lx[] = { + +7.8124999999999999999999999999995004619e-03L, + +2.3437499999999999999999999999998790275e-02L, + +3.9062499999999999999999999999981095794e-02L, + +5.4687500000000000000000000000007637516e-02L, + +7.0312500000000000000000000000001673152e-02L, + +8.5937500000000000000000000000004285194e-02L, + +1.0156249999999999999999999999997969348e-01L, + +1.1718749999999999999999999999998630182e-01L, + +1.3281249999999999999999999999999246480e-01L, + +1.4843750000000000000000000000000096296e-01L, + +1.6406249999999999999999999999999947037e-01L, + +1.7968750000000000000000000000007190941e-01L, + +1.9531249999999999999999999999999874815e-01L, + +2.1093749999999999999999999999999877222e-01L, + +2.2656250000000000000000000000000019259e-01L, + +2.4218749999999999999999999999999800185e-01L, + +2.5781249999999999999999999999996335918e-01L, + +2.7343749999999999999999999999999903704e-01L, + +2.8906249999999999999999999999998362960e-01L, + +3.0468750000000000000000000000000240741e-01L, + +3.2031249999999999999999999999999542592e-01L, + +3.3593749999999999999999999999999619629e-01L, + +3.5156250000000000000000000000001040002e-01L, + +3.6718749999999999999999999999999316295e-01L, + +3.8281250000000000000000000000000871483e-01L, + +3.9843750000000000000000000000000385186e-01L, + +4.1406249999999999999999999999999975926e-01L, + +4.2968750000000000000000000000000385186e-01L, + +4.4531250000000000000000000000032119697e-01L, + +4.6093749999999999999999999999999956667e-01L, + +4.7656250000000000000000000000000134815e-01L, + +4.9218749999999999999999999999999388517e-01L, + +5.0781249999999999999999999999999778518e-01L, + +5.2343749999999999999999999999999325925e-01L, + +5.3906249999999999999999999999990649610e-01L, + +5.5468750000000000000000000000000452594e-01L, + +5.7031250000000000000000000000000645187e-01L, + +5.8593749999999999999999999999999807407e-01L, + +6.0156250000000000000000000000000182963e-01L, + +6.1718750000000000000000000000000048148e-01L, + +6.3281250000000000000000000000000943706e-01L, + +6.4843749999999999999999999999999470369e-01L, + +6.6406250000000000000000000000000192593e-01L, + +6.7968750000000000000000000000000693335e-01L, + +6.9531250000000000000000000000001482966e-01L, + +7.1093750000000000000000000000000154074e-01L, + +7.2656250000000000000000000000000385186e-01L, + +7.4218750000000000000000000000000115556e-01L, + +7.5781250000000000000000000000000048148e-01L, + +7.7343749999999999999999999999999942222e-01L, + +7.8906249999999999999999999999999720740e-01L, + +8.0468749999999999999999999999999634073e-01L, + +8.2031249999999999999999999999999797777e-01L, + +8.3593750000000000000000000000000828150e-01L, + +8.5156249999999999999999999999999865185e-01L, + +8.6718749999999999999999999999999682222e-01L, + +8.8281249999999999999999999999999749629e-01L, + +8.9843749999999999999999999999999836296e-01L, + +9.1406249999999999999999999999999229628e-01L, + +9.2968750000000000000000000000000462223e-01L, + +9.4531249999999999999999999999999499258e-01L, + +9.6093749999999999999999999999999894074e-01L, + +9.7656249999999999999999999999999855555e-01L, + +9.9218750000000000000000000000000028889e-01L, + +1.0078124999999999999999999999999870963e+00L, + +1.0234375000000000000000000000000003852e+00L, + +1.0390624999999999999999999999999998074e+00L, + +1.0546874999999999999999999999999801629e+00L, + +1.0703125000000000000000000000000182963e+00L, + +1.0859375000000000000000000000000021185e+00L, + +1.1015624999999999999999999999999978815e+00L, + +1.1171874999999999999999999999999986518e+00L, + +1.1328124999999999999999999999999984593e+00L, + +1.1484374999999999999999999999999980741e+00L, + +1.1640625000000000000000000000000000000e+00L, + +1.1796874999999999999999999999999998074e+00L, + +1.1953125000000000000000000000000196445e+00L, + +1.2109374999999999999999999999999976889e+00L, + +1.2265625000000000000000000000000017333e+00L, + +1.2421874999999999999999999999999976889e+00L, + +1.2578124999999999999999999999999980741e+00L, + +1.2734374999999999999999999999999951852e+00L, + -7.8125000000000000000000000000074012886e-03L, + -2.3437499999999999999999999999997418050e-02L, + -3.9062499999999999999999999999998134255e-02L, + -5.4687500000000000000000000000006361587e-02L, + -7.0312500000000000000000000000006271309e-02L, + -8.5937500000000000000000000000001721300e-02L, + -1.0156250000000000000000000000000020463e-01L, + -1.1718750000000000000000000000000290093e-01L, + -1.3281249999999999999999999999999987963e-01L, + -1.4843749999999999999999999999999942222e-01L, + -1.6406250000000000000000000000000026482e-01L, + -1.7968750000000000000000000000000052963e-01L, + -1.9531249999999999999999999999999867592e-01L, + -2.1093750000000000000000000000000081852e-01L, + -2.2656250000000000000000000000000250371e-01L, + -2.4218749999999999999999999999999718333e-01L, + -2.5781250000000000000000000000001059261e-01L, + -2.7343749999999999999999999999999557036e-01L, + -2.8906250000000000000000000000000520001e-01L, + -3.0468749999999999999999999999999792963e-01L, + -3.2031250000000000000000000000000231112e-01L, + -3.3593749999999999999999999999999383702e-01L, + -3.5156249999999999999999999999999903704e-01L, + -3.6718749999999999999999999999999634073e-01L, + -3.8281249999999999999999999999999956667e-01L, + -3.9843750000000000000000000000000004815e-01L, + -4.1406249999999999999999999999999870000e-01L, + -4.2968750000000000000000000000000216667e-01L, + -4.4531250000000000000000000000000072222e-01L, + -4.6093749999999999999999999999999513703e-01L, + -4.7656250000000000000000000000000539260e-01L, + -4.9218749999999999999999999999999759259e-01L, + -5.0781250000000000000000000000000067408e-01L, + -5.2343750000000000000000000000001386670e-01L, + -5.3906249999999999999999999999996427400e-01L, + -5.5468750000000000000000000000000404445e-01L, + -5.7031249999999999999999999999998536293e-01L, + -5.8593749999999999999999999999999634073e-01L, + -6.0156250000000000000000000000000028889e-01L, + -6.1718749999999999999999999999998468886e-01L, + -6.3281249999999999999999999999999335554e-01L, + -6.4843750000000000000000000000000356297e-01L, + -6.6406250000000000000000000000000019259e-01L, + -6.7968750000000000000000000000000067408e-01L, + -6.9531249999999999999999999999998439997e-01L, + -7.1093750000000000000000000000000009630e-01L, + -7.2656250000000000000000000000000414075e-01L, + -7.4218749999999999999999999999998738516e-01L, + -7.5781250000000000000000000000000019259e-01L, + -7.7343750000000000000000000000000808891e-01L, + -7.8906250000000000000000000000000028889e-01L, + -8.0468750000000000000000000000000096296e-01L, + -8.2031249999999999999999999999999634073e-01L, + -8.3593750000000000000000000000000086667e-01L, + -8.5156250000000000000000000000000115556e-01L, + -8.6718750000000000000000000000000028889e-01L, + -8.8281250000000000000000000000000577779e-01L, + -8.9843750000000000000000000000000587409e-01L, + -9.1406250000000000000000000000000654816e-01L, + -9.2968749999999999999999999999999210369e-01L, + -9.4531250000000000000000000000000115556e-01L, + -9.6093749999999999999999999999999682222e-01L, + -9.7656250000000000000000000000000298519e-01L, + -9.9218749999999999999999999999999711111e-01L, + -1.0078124999999999999999999999999953778e+00L, + -1.0234375000000000000000000000000279260e+00L, + -1.0390625000000000000000000000000000000e+00L, + -1.0546875000000000000000000000000042370e+00L, + -1.0703124999999999999999999999999969185e+00L, + -1.0859374999999999999999999999999965333e+00L, + -1.1015625000000000000000000000000144445e+00L, + -1.1171875000000000000000000000000007704e+00L, + -1.1328125000000000000000000000000052000e+00L, + -1.1484375000000000000000000000000021185e+00L, + -1.1640625000000000000000000000000188741e+00L, + -1.1796874999999999999999999999999915259e+00L, + -1.1953125000000000000000000000000196445e+00L, + -1.2109374999999999999999999999999965333e+00L, + -1.2265624999999999999999999999999940296e+00L, + -1.2421874999999999999999999999999955704e+00L, + -1.2578125000000000000000000000000036593e+00L, + -1.2734375000000000000000000000000005778e+00L, +}; +const long double _TBL_expm1l[] = { + +7.8430972064479776934535597601230757455e-03L, + +2.3714316602357916968850532165767719684e-02L, + +3.9835471336230000576622009875172135295e-02L, + +5.6210497316931971181336703768137839969e-02L, + +7.2843392434877444411300095010815562014e-02L, + +8.9738217538093231018282025418516874073e-02L, + +1.0689909742365748278760239374175648642e-01L, + +1.2433022184475071745173290544494115241e-01L, + +1.4203584653356558967616143134879189360e-01L, + +1.6002029424032514702921521389409692713e-01L, + +1.7828795578866324266314330819585528511e-01L, + +1.9684329114762477118219685277015441058e-01L, + +2.1569083052054745183001825454039531082e-01L, + +2.3483517545109100468401611087600122614e-01L, + +2.5428099994668375200482125309068587301e-01L, + +2.7403305161966092927649653871633746915e-01L, + +2.9409615284637330982611062681002354822e-01L, + +3.1447520194454913428413492197987169290e-01L, + +3.3517517436919679261104091066359163835e-01L, + +3.5620112392734023305848825938576026618e-01L, + +3.7755818401188367036076223788924855083e-01L, + +3.9925156885490683578620594549518425777e-01L, + +4.2128657480069675555953993770759156716e-01L, + +4.4366858159882686275236684344276542291e-01L, + +4.6640305371759914220725806222619001343e-01L, + +4.8949554167816997960557644740137111180e-01L, + +5.1295168340968543562438375411433268970e-01L, + +5.3677720562575679548630956319776281677e-01L, + +5.6097792522261245434042965895343734385e-01L, + +5.8555975069926749109404773234379454282e-01L, + +6.1052868360005765883685767151390117210e-01L, + +6.3589081997988998017269448860547030419e-01L, + +6.6165235189256768193303746403901387346e-01L, + +6.8781956890255283724467962874956344875e-01L, + +7.1439885962053580513691701731111144172e-01L, + +7.4139671326318637019829227657113109938e-01L, + +7.6881972123746738864840365608134114398e-01L, + +7.9667457874989774401796242919906293119e-01L, + +8.2496808644115750689424843988747369733e-01L, + +8.5370715204643438037652998773947217833e-01L, + +8.8289879208191679750745518776052322714e-01L, + +9.1255013355784542053624989476899678486e-01L, + +9.4266841571854127598629758313676072782e-01L, + +9.7326099180983534572639774176307759762e-01L, + +1.0043353308743311241896996041635484486e+00L, + +1.0358990195749384471803078942778539846e+00L, + +1.0679597640471238000138982954032239149e+00L, + +1.1005253917803293237405302109843095228e+00L, + +1.1336038535290198596069082796475886372e+00L, + +1.1672032252538246054702826653026856058e+00L, + +1.2013317100932473053430319197695098859e+00L, + +1.2359976403664263564195928556324051853e+00L, + +1.2712094796074337986378900692396989892e+00L, + +1.3069758246316098519065681271390426992e+00L, + +1.3433054076344374874670823571053750423e+00L, + +1.3802070983234694643656048039370676504e+00L, + +1.4176899060838283316233663173665695732e+00L, + +1.4557629821778080933856445408266366139e+00L, + +1.4944356219791145601003068865880283870e+00L, + +1.5337172672422898656593276319684411587e+00L, + +1.5736175084078752204957296405891018055e+00L, + +1.6141460869438746959611789503655886752e+00L, + +1.6553128977240916980462361132966809382e+00L, + +1.6971279914439187908098398388318058200e+00L, + +1.7396015770741706739548391625061335312e+00L, + +1.7827440243535594070547844776995992896e+00L, + +1.8265658663204204072713762882878497127e+00L, + +1.8710778018843073303053045700866748054e+00L, + +1.9162906984380836781353147778063807290e+00L, + +1.9622155945111488641976761209920359164e+00L, + +2.0088637024644465094546949911966296733e+00L, + +2.0562464112279129437484158431835361592e+00L, + +2.1043752890810342484246997824723919209e+00L, + +2.1532620864771907009732038831535016010e+00L, + +2.2029187389124781729299756967091584890e+00L, + +2.2533573698397068911414662577557965622e+00L, + +2.3045902936282890023428920188543322512e+00L, + +2.3566300185707375845529318204056188573e+00L, + +2.4094892499365111287660263409108200844e+00L, + +2.4631808930739490736114262571853290077e+00L, + +2.5177180565610557168667363422785819682e+00L, + +2.5731140554059017538295927080989585090e+00L, + -7.7820617397564878940627738863895136168e-03L, + -2.3164975049937966141654020345517900132e-02L, + -3.8309398394574704340244721980137502162e-02L, + -5.3219029217871103345945692892173875140e-02L, + -6.7897507640472422098597150880870814431e-02L, + -8.2348417348184187852664478898998721220e-02L, + -9.6575286466913289047103004903893595054e-02L, + -1.1058158842404436382535801893754083366e-01L, + -1.2437074279646178545389116639858446817e-01L, + -1.3794611614542428546897208319214646321e-01L, + -1.5131102283849604551092782942638950286e-01L, + -1.6446872585873492869892658849405193342e-01L, + -1.7742243760133541028616024893906020644e-01L, + -1.9017532065792070445541830028432254804e-01L, + -2.0273048858867556872072433107848955384e-01L, + -2.1509100668250829875574108587424583890e-01L, + -2.2725989270542750384925893490015094814e-01L, + -2.3924011763731637587872084997993792064e-01L, + -2.5103460639728433199192216502316397159e-01L, + -2.6264623855777312240316411149867979990e-01L, + -2.7407784904759174916236707216223561007e-01L, + -2.8533222884405183877364930326430409924e-01L, + -2.9641212565437245046372839566093886675e-01L, + -3.0732024458652068208051596680746383072e-01L, + -3.1805924880965185639883349668276001738e-01L, + -3.2863176020431053139357768964876694788e-01L, + -3.3904036000255107819096872094974754682e-01L, + -3.4928758941813410931539692705947339063e-01L, + -3.5937595026695261691197788694876862518e-01L, + -3.6930790557783929525168292907013160482e-01L, + -3.7908588019390417343445352368401927192e-01L, + -3.8871226136454937222268435896035706784e-01L, + -3.9818939932830552279841068785563696096e-01L, + -4.0751960788663214438675145523141552015e-01L, + -4.1670516496882207157854692957232201487e-01L, + -4.2574831318814785027807613334156070311e-01L, + -4.3465126038938588296698375694570623290e-01L, + -4.4341618018785199889104786302733041801e-01L, + -4.5204521250008005232823143909224611574e-01L, + -4.6054046406627311177860364731167719334e-01L, + -4.6890400896465479423105583940253757841e-01L, + -4.7713788911784632111786130180348641941e-01L, + -4.8524411479139292568322765140558720714e-01L, + -4.9322466508456132479074344820321183415e-01L, + -5.0108148841352808120907176750144850100e-01L, + -5.0881650298707682468469397665961648404e-01L, + -5.1643159727492047118140006480748942245e-01L, + -5.2392863046876277909203259884139061144e-01L, + -5.3130943293621180856911747373956708600e-01L, + -5.3857580666765610494466923948125161529e-01L, + -5.4572952571621270908956480845806990458e-01L, + -5.5277233663085440607996537742519552762e-01L, + -5.5970595888282195827936433920573211501e-01L, + -5.6653208528542542950338319090135722295e-01L, + -5.7325238240733709291016556235278458118e-01L, + -5.7986849097947682625807457952262425338e-01L, + -5.8638202629558933380643321697780798620e-01L, + -5.9279457860661099402232802681583606392e-01L, + -5.9910771350892261602101192647121362362e-01L, + -6.0532297232658289493900937468812013977e-01L, + -6.1144187248763588685320419660027392238e-01L, + -6.1746590789458437705718363441235796711e-01L, + -6.2339654928911959113469156706514092208e-01L, + -6.2923524461119629598145525663487507890e-01L, + -6.3498341935254095737853086332102468069e-01L, + -6.4064247690467926157560858675744165996e-01L, + -6.4621379890156797026959945044832806294e-01L, + -6.5169874555691476103560779451668832748e-01L, + -6.5709865599626840836245660190447925603e-01L, + -6.6241484858396038364664323096343822726e-01L, + -6.6764862124497769549523628863318702845e-01L, + -6.7280125178184555417288842186006604551e-01L, + -6.7787399818659722569870081614203949644e-01L, + -6.8286809894790724165768340684449330930e-01L, + -6.8778477335346294994617200009875163606e-01L, + -6.9262522178764822913249147051072428209e-01L, + -6.9739062602461204459956440664519817754e-01L, + -7.0208214951679339786562882012396622127e-01L, + -7.0670093767897311117765076057926938575e-01L, + -7.1124811816792179736856679611852620191e-01L, + -7.1572480115771228979753508374143873445e-01L, + -7.2013207961076374868941367244540733250e-01L, +}; diff --git a/usr/src/libm/src/Q/_TBL_ipio2l.c b/usr/src/libm/src/Q/_TBL_ipio2l.c new file mode 100644 index 0000000..5e9c171 --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_ipio2l.c @@ -0,0 +1,503 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_ipio2l.c 1.12 06/01/31 SMI" + +/* + * Table of constants for 2/pi, used in __rem_pio2l (trigl) function. + * By K.C. Ng, April 25, 1989 + */ + +#include "libm.h" + +const int _TBL_ipio2l_inf[] = { /* by DHBailey MP package */ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, + 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, + 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, + 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, + 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, + 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, + 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, + 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, + 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, + 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, + 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, + 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, + 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, + 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, + 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, + 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, + 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, + 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, + 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, + 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, + 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, + 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, + 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, + 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, + 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, + 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, + 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, + 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, + 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, + 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, + 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, + 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, + 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, + 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, + 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, + 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, + 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, + 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, + 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, + 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, + 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, + 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, + 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, + 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, + 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, + 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, + 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, + 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, + 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, + 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, + 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, + 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, + 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, + 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, + 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, + 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, + 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, + 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, + 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, + 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, + 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, + 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, + 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, + 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, + 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, + 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, + 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, + 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, + 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, + 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, + 0xE13F89, 0xB295F3, 0x64A8F1, 0xAEA74B, 0x38FC4C, 0xEAB2BB, + 0x47270B, 0xABC3A7, 0x34BA60, 0x52DD34, 0xF8563A, 0xEB7E8A, + 0x31BB36, 0x5895B7, 0x47F7A9, 0x94C3AA, 0xD39225, 0x1E7F3E, + 0xD8974E, 0xBBA94F, 0xD8AE01, 0xE661B4, 0x393D8E, 0xA523AA, + 0x33068E, 0x1633B5, 0x3BB188, 0x1D3A9D, 0x4013D0, 0xCC1BE5, + 0xF862E7, 0x3BF28F, 0x39B5BF, 0x0BC235, 0x22747E, 0xA247C0, + 0xD52D1F, 0x19ADD3, 0x9094DF, 0x9311D0, 0xB42B25, 0x496DB2, + 0xE264B2, 0x5EF135, 0x3BC6A4, 0x1A4AD0, 0xAAC92E, 0x64E886, + 0x573091, 0x982CFB, 0x311B1A, 0x08728B, 0xBDCEE1, 0x60E142, + 0xEB641D, 0xD0BBA3, 0xE559D4, 0x597B8C, 0x2A4483, 0xF332BA, + 0xF84867, 0x2C8D1B, 0x2FA9B0, 0x50F3DD, 0xF9F573, 0xDB61B4, + 0xFE233E, 0x6C41A6, 0xEEA318, 0x775A26, 0xBC5E5C, 0xCEA708, + 0x94DC57, 0xE20196, 0xF1E839, 0xBE4851, 0x5D2D2F, 0x4E9555, + 0xD96EC2, 0xE7D755, 0x6304E0, 0xC02E0E, 0xFC40A0, 0xBBF9B3, + 0x7125A7, 0x222DFB, 0xF619D8, 0x838C1C, 0x6619E6, 0xB20D55, + 0xBB5137, 0x79E809, 0xAF9149, 0x0D73DE, 0x0B0DA5, 0xCE7F58, + 0xAC1934, 0x724667, 0x7A1A13, 0x9E26BC, 0x4555E7, 0x585CB5, + 0x711D14, 0x486991, 0x480D60, 0x56ADAB, 0xD62F64, 0x96EE0C, + 0x212FF3, 0x5D6D88, 0xA67684, 0x95651E, 0xAB9E0A, 0x4DDEFE, + 0x571010, 0x836A39, 0xF8EA31, 0x9E381D, 0xEAC8B1, 0xCAC96B, + 0x37F21E, 0xD505E9, 0x984743, 0x9FC56C, 0x0331B7, 0x3B8BF8, + 0x86E56A, 0x8DC343, 0x6230E7, 0x93CFD5, 0x6A8F2D, 0x733005, + 0x1AF021, 0xA09FCB, 0x7415A1, 0xD56B23, 0x6FF725, 0x2F4BC7, + 0xB8A591, 0x7FAC59, 0x5C55DE, 0x212C38, 0xB13296, 0x5CFF50, + 0x366262, 0xFA7B16, 0xF4D9A6, 0x2ACFE7, 0xF07403, 0xD4D604, + 0x6FD916, 0x31B1BF, 0xCBB450, 0x5BD7C8, 0x0CE194, 0x6BD643, + 0x4FD91C, 0xDF4543, 0x5F3453, 0xE2B5AA, 0xC9AEC8, 0x131485, + 0xF9D2BF, 0xBADB9E, 0x76F5B9, 0xAF15CF, 0xCA3182, 0x14B56D, + 0xE9FE4D, 0x50FC35, 0xF5AED5, 0xA2D0C1, 0xC96057, 0x192EB6, + 0xE91D92, 0x07D144, 0xAEA3C6, 0x343566, 0x26D5B4, 0x3161E2, + 0x37F1A2, 0x209EFF, 0x958E23, 0x493798, 0x35F4A6, 0x4BDC02, + 0xC2BE13, 0xBE80A0, 0x0B72A3, 0x115C5F, 0x1E1BD1, 0x0DB4D3, + 0x869E85, 0x96976B, 0x2AC91F, 0x8A26C2, 0x3070F0, 0x041412, + 0xFC9FA5, 0xF72A38, 0x9C6878, 0xE2AA76, 0x50CFE1, 0x559274, + 0x934E38, 0x0A92F7, 0x5533F0, 0xA63DB4, 0x399971, 0xE2B755, + 0xA98A7C, 0x008F19, 0xAC54D2, 0x2EA0B4, 0xF5F3E0, 0x60C849, + 0xFFD269, 0xAE52CE, 0x7A5FDD, 0xE9CE06, 0xFB0AE8, 0xA50CCE, + 0xEA9D3E, 0x3766DD, 0xB834F5, 0x0DA090, +}; + +#if 0 +const int _TBL_ipio2l_66[] = { + 0xA2F983, 0x6E4E44, 0x152A00, 0x062BC4, 0x0DA276, 0xBED4C1, + 0xFDF905, 0x5CD5BA, 0x767CEC, 0x1F80D6, 0xC26053, 0x3A0070, + 0x107C2A, 0xF68EE9, 0x687B7A, 0xB990AA, 0x38DE4B, 0x96CFF3, + 0x92735E, 0x8B34F6, 0x195BFC, 0x27F88E, 0xA93EC5, 0x3958A5, + 0x3E5D13, 0x1C55A8, 0x5B4A8B, 0xA42E04, 0x12D105, 0x35580D, + 0xF62347, 0x450900, 0xB98BCA, 0xF7E8A4, 0xA2E5D5, 0x69BC52, + 0xF0381D, 0x1A0A88, 0xFE8714, 0x7F6735, 0xBB7D4D, 0xC6F642, + 0xB27E80, 0x6191BF, 0xB6B750, 0x52776E, 0xD60FD0, 0x607DCC, + 0x68BFAF, 0xED69FC, 0x6EB305, 0xD2557D, 0x25BDFB, 0x3E4AA1, + 0x84472D, 0x8B0376, 0xF77740, 0xD290DF, 0x15EC8C, 0x45A5C3, + 0x6181EF, 0xC5E7E8, 0xD8909C, 0xF62144, 0x298428, 0x6E5D9D, + 0xF9A9B4, 0xCDBD2F, 0xC083E7, 0x0D3957, 0xECA3B2, 0x96223C, + 0xC1080D, 0x087D47, 0x7D7576, 0xA614B1, 0x42A4B6, 0xAA173C, + 0xE217E5, 0xFDCD34, 0x279D5F, 0x39AACA, 0x1CA8DF, 0x8B6633, + 0x5C49E4, 0xB56803, 0x1E7938, 0x741FDC, 0x4CB19B, 0xCECC3B, + 0x921EB7, 0x7C0FC3, 0x361F23, 0xF9EE22, 0xBA4235, 0xA5FCA3, + 0xBD4680, 0xFCDF65, 0xFC96AD, 0x31C90C, 0x919EEB, 0xFE0FB7, + 0x75B4B0, 0x693961, 0x75BCAA, 0xEB6F39, 0xA343C0, 0xD16FF2, + 0x33DAD0, 0xC1E095, 0x053182, 0x11E4A1, 0x40F943, 0x32D314, + 0xAF1B98, 0xE1B05A, 0xE5F3AD, 0x6E633F, 0x363D14, 0xA3777C, + 0xC8C6EE, 0x001E18, 0x0D180C, 0xAA1369, 0xEDFBA2, 0x998A9D, + 0x16E799, 0x693B75, 0x90EF50, 0x938DD4, 0xFB7ACD, 0x67CEEB, + 0x249DE3, 0x9B9B52, 0xD8CDAC, 0xC31A54, 0x855FBF, 0x848591, + 0x0954B0, 0x946B8C, 0xA4C7B4, 0x9A9E51, 0xF20425, 0xAA2637, + 0xFC6657, 0x7D8625, 0x620B74, 0x8B578D, 0xEC9A05, 0xDEF24F, + 0x7F19B0, 0xFC2544, 0x1DA0F1, 0x23790C, 0xC4294D, 0x6D3C32, + 0x66FE56, 0xD45562, 0x66264F, 0xA24162, 0x13E930, 0xB0E7C0, + 0xFA0E97, 0xBFC62C, 0x0E663F, 0x90F33B, 0x55E73C, 0xD791F7, + 0xD3F00D, 0xAB01C7, 0x40CF8F, 0xA593BA, 0xE627D5, 0x4A8308, + 0x32DC06, 0x80C876, 0x1C3DB5, 0xB5489F, 0x632CDF, 0xB02517, + 0xD17EFA, 0x92570F, 0xFAED44, 0x8F8536, 0x27069B, 0xC014DC, + 0x997D48, 0x961D61, 0x7A960B, 0x31B622, 0xD3C425, 0xA69520, + 0x98D29E, 0xF1C973, 0x5483D7, 0x99611E, 0xEAFF5F, 0x7DEFF1, + 0x98475C, 0x91C787, 0x537E17, 0x068C65, 0xF05E52, 0x942F04, + 0x37CF92, 0xEF4223, 0xC4C52F, 0x521DAA, 0xBAAF97, 0x972236, + 0xA2B3D3, 0x62C921, 0x8D3A8B, 0x2B3302, 0x6061B9, 0x0CBE94, + 0x75F451, 0xBD06DE, 0x86042D, 0xFB61ED, 0x4C8869, 0x590232, + 0x479963, 0x23518D, 0xAF5D28, 0x60C9DE, 0x473DB0, 0x9DE009, + 0xD8FC4C, 0xE96991, 0x9CA455, 0x800BC8, 0x977CE0, 0xDCBFA6, + 0x19D249, 0xA0F76D, 0x5F9B2F, 0x452BB3, 0x77E091, 0xB6383A, + 0x7BE9C2, 0x4BF7C1, 0x8A5EBF, 0xEB0D55, 0x9AF4DC, 0x275CA0, + 0xED09D0, 0xE50A7F, 0xBEF42C, 0x4803AF, 0x56139F, 0xD58848, + 0x797D96, 0xB8352E, 0x49D90D, 0x7607E0, 0xC99256, 0x75F530, + 0xB72237, 0x1AF080, 0xC2E813, 0x06CFA9, 0xB9DF8E, 0x919C38, + 0x89D97E, 0x0464D5, 0xB12EEF, 0xD14165, 0x365A72, 0x550D35, + 0x3772D8, 0xF41B58, 0x0378A7, 0x2D5D7D, 0xD6E433, 0xDD2018, + 0x139FD7, 0x1B5621, 0x94E046, 0x97A323, 0x693176, 0x28DF59, + 0xD24273, 0x0E4E26, 0xA9A8F6, 0xF15B41, 0x450EE3, 0x57EA61, + 0x7DADA6, 0xF21086, 0x394BEE, 0x8F4813, 0x3FDEE9, 0xF3A53D, + 0xAB2F40, 0x8B1E2B, 0xA07FD4, 0x992CC4, 0x63532D, 0x9F35A2, + 0x6FA290, 0x0094DE, 0xD2A24D, 0x755B81, 0x79F9E1, 0xFE1D35, + 0xFEE8CC, 0x9224C5, 0x54E2CE, 0x41F31C, 0xF45138, 0xED6D10, + 0x6B439D, 0xD2BE46, 0xC327D4, 0x68BFB0, 0x46D5A5, 0x79B285, + 0x776D7C, 0xE18647, 0x00E32F, 0xEBB7F2, 0x5DE307, 0x5A8EA0, + 0x06CEFE, 0x20923C, 0x354CE1, 0xAD09C5, 0x56996D, 0xCFB124, + 0xEF7BC1, 0x76BF72, 0xF20753, 0x5BBAFA, 0xB8A2B2, 0x5914F2, + 0x5D834F, 0xE64A08, 0x14C3AB, 0x07796B, 0xF2212D, 0xC74049, + 0xB61C6A, 0x282CFC, 0x25070C, 0x315BF1, 0x6FEAD3, 0x2CD2E5, + 0xD10F9C, 0x1972BB, 0x908073, 0x0F368C, 0x69BE97, 0xA242B0, + 0x722DFE, 0xAFE6A2, 0x143D8B, 0x5C5699, 0x48232B, 0xFF49AC, + 0xB5FA62, 0x6AD778, 0x7A844D, 0x258AA0, 0x8EDE3D, 0x9A9496, + 0x49924E, 0xA33E97, 0x4F43FA, 0xC40741, 0x2F764A, 0x8EB2B1, + 0x8E67D3, 0x9FF324, 0x51B11B, 0x5D6E09, 0xE9AD3E, 0xFFA902, + 0xF48653, 0x0845D3, 0xDED33E, 0x32D30E, 0x6247CA, 0x7C586D, + 0x2EAF9E, 0x323A35, 0xAD11FB, 0x0F420C, 0x0E0685, 0x401B60, + 0xBB3D43, 0xF4D489, 0xBCDC4C, 0x40FFBA, 0x18AB08, 0x7AC72D, + 0x5E76DB, 0xE8344E, 0x3975A2, 0xF9611B, 0x1121F3, 0x3A429C, + 0x9B18EC, 0xF298B1, 0x8AEC78, 0x1C248B, 0x69108F, 0xDB2D37, + 0xA1A613, 0x910359, 0x521451, 0xD4441F, 0x0BB3B6, 0x50D9DB, + 0xBD589F, 0x62A62E, 0xA9B903, 0x935F63, 0x058BEC, 0x78BCB5, + 0x2CB460, 0x3A9037, 0x0291C4, 0x1FABC1, 0xBE7D05, 0xF948E7, + 0x6BA5CD, 0xF62A0A, 0x9AEA19, 0x2257AB, 0x2E0D7D, 0x9EB93F, + 0x5E3F77, 0xD4A13F, 0x08E3DB, 0xDFD689, 0x2B9B4E, 0xB58427, + 0x25424B, 0x1197FD, 0xCF298A, 0x314008, 0xD5687F, 0x0F0EAC, + 0x13C485, 0xF684B2, 0xED7EC7, 0x6E636D, 0x28C933, 0xE19058, + 0x688B6A, 0xC88905, 0xFB2F31, 0x61304C, 0xC19765, 0x60D81A, + 0x57F276, 0xC6EFC4, 0x048954, 0x303470, 0xDA6F6F, 0x93901A, + 0x911439, 0x363D12, 0x59E72B, 0x6F9F1E, 0x57C584, 0xDF0D23, + 0xBB743F, 0xADE99C, 0x546097, 0xFCC820, 0xCBB968, 0xDA9B5F, + 0x0DC271, 0x563337, 0x9ED662, 0xE7C44F, 0x3129F8, 0xF5EAF9, + 0xDAF7F2, 0xCD09FF, 0xA92535, 0x441C29, 0x7DF436, 0xE2B00A, + 0x36746F, 0xF1DC61, 0x9D3C9C, 0x63AB71, 0xB8F3BB, 0x1C80F6, + 0x62FF65, 0x5FFE5F, 0x3B2814, 0xBADE27, 0x1B384B, 0x268AA9, + 0xBD91EF, 0xCA436B, 0xABE107, 0x88DCA6, 0xC3AFC0, 0x85D155, + 0x464A48, 0xBFDAEB, 0xC6F389, 0x907C11, 0x0D3E41, 0xCD2197, + 0x549008, 0x817E4E, 0x8C7154, 0x1DC37F, 0x5E897E, 0xA9A2FE, + 0xEC6060, 0xCC0728, 0x430D3B, 0x62471C, 0xD3A4D3, 0x2BA57B, + 0xE5D15A, 0xD632F3, 0xF2B76F, 0xEC8498, 0xAE41C2, 0xAAF413, + 0xEAF5C0, 0xDD1B07, 0xB9A2A0, 0x59F230, 0xA3F61B, 0x8F8643, + 0x05DE6B, 0x1B5B8E, 0x63ECC5, 0xBFF01D, 0x8F1440, 0x3F8ADF, + 0x2E6539, 0xF3DB7A, 0x293FE5, 0x7EE714, 0x88E6D8, 0x2B2A6A, + 0xDF6E34, 0x8D4604, 0x4F6594, 0x639063, 0x6B51CC, 0x0D05CD, + 0x009607, 0xE7BF70, 0xC9A0EA, 0x0D80DD, 0xA1A065, 0x0DCB8F, + 0xA48430, 0x715934, 0x6FC8E4, 0x6FFC52, 0xEF8B05, 0xDE506A, + 0xE62BBC, 0x31480F, 0xEA64EA, 0x51E6FB, 0x9AE773, 0x21C54D, + 0xBFA080, 0x273D1E, 0x9FFD4E, 0x0C2CA8, 0x0690A5, 0xF8773B, + 0x4B2680, 0x6E3F56, 0xC8B89F, 0x0B7BD0, 0x71C8BF, 0x5AABD3, + 0x2BA93E, 0x9D2EE1, 0xCDF2FA, 0xEE57BE, 0x84A116, 0xDA756D, + 0x8FD6C0, 0x927153, 0xFF5EF3, 0x9F8331, 0x713411, 0xF945F3, + 0x0382B2, 0x8BAE30, 0xBC45A4, 0x630101, 0x5C9C3A, 0x643CFD, + 0x48115C, 0x17F03E, 0xB5F55E, 0x288DAF, 0x725660, 0xFB58E0, + 0xFC189E, 0x1ECA69, 0xFB19A6, 0xFA7A92, 0x7CC48E, 0x869372, + 0x58089A, 0x16DB5C, 0xADC0CD, 0x09D3D4, 0xD1108E, 0xDC64ED, + 0x3A999C, 0xAA8716, 0x5A3D8E, 0x7037FB, 0x1976AD, 0xE477D7, + 0x23782B, 0xC51F39, 0x4A5E9A, 0xDAD9DA, 0xE5B559, 0x08EF06, + 0x76E24F, 0x7361AD, 0x5F42A3, 0x9B70E5, 0xCE96C4, 0x552E99, + 0x6D7A6F, 0x804474, 0x4FA45B, 0x1D115B, 0x6D109E, 0x0A1A63, + 0x1084A6, 0xE18E5D, 0x2D8589, 0x203345, 0x4851AF, 0xA71EDC, + 0x03B6B1, 0x267970, 0xDEC908, 0x795BED, 0x7099B9, 0x209321, + 0x7FC2E7, 0x0F3E5E, 0xC7A4F4, 0x088129, 0x59AE63, 0x4E3251, + 0x344268, 0x79285D, 0x2B9494, 0xF1E2A2, 0xF7DA20, 0xDF6756, + 0xCA3BA3, 0x422489, 0xA2239C, 0x38724D, 0x2AC767, 0x601E9D, + 0xB47C6C, 0xA22481, 0xBBB655, 0x1EC0C4, 0xD84A97, 0xD449EE, + 0x162C9D, 0x782F29, 0xCEB4FA, 0xE317BC, 0x2FFDBD, 0xB342D2, + 0xB2CB19, 0x323AB9, 0x1AFF93, 0x13A8DF, 0x86B5A5, 0x5741D6, + 0xC54342, 0x3CAC29, 0xF7517C, 0x129A7A, 0xB2B8B4, 0x9B709F, + 0x3923C5, 0xEAFA6E, 0xDB9077, 0x29EEA0, 0x702D8C, 0x4DC14F, + 0xE46933, 0xA764E4, 0x754266, 0xFA4F98, 0x643DA5, 0xCA775C, + 0x7F1632, 0xE671A3, 0x4BF4C6, 0xA82378, 0xEFD317, 0xE62D38, + 0xD461C9, 0x8EEC80, 0xC89882, 0x4CC73C, 0x830F3F, 0xE4B200, + 0x582615, 0x6CD558, 0xA66727, 0xEF7975, 0xFEA5CE, 0x147A40, + 0x4796E4, 0xC07761, 0xF5D5B3, 0x6B65FB, 0xE4F14D, 0xA837CA, + 0x9A152A, 0x554E94, 0x83EC5F, 0xA62174, 0x85E2ED, 0xCCE71C, + 0x3540FF, 0x088A84, 0xBA2816, 0x293610, 0x4C3EE7, 0x8E55A9, + 0x49E5E5, 0x782178, 0x45D2AA, 0x9BB449, 0x00D282, 0xF61E67, + 0xE2F7DE, 0xCC6AA1, 0xCD1979, 0x52FEDB, 0x9A8776, 0x70A018, + 0x500271, 0x1273BA, 0xDE648E, 0x7AC7F7, 0x767725, 0xD0A457, + 0xF17250, 0xBC578C, 0x2DFD3A, 0x97F988, 0xA576C8, 0x8129BB, + 0x22D9C3, 0x0436ED, 0x650791, 0xA314EC, 0x42A0B3, 0x37A521, + 0x4BFB2B, 0x8C1B7F, 0x115E17, 0xF7C27F, 0xC1D5EB, 0x060487, + 0x8A28D6, 0x41330F, 0xBFAE67, 0x7774E8, 0x4CCC3C, 0x6B2F80, + 0x628BF2, 0x1E41A6, 0x8D0B22, 0xBC85BA, 0xCCF461, 0xBEC69C, + 0xDF8A10, 0x3C5E71, 0x2F8D5F, 0x63D3DA, 0x5934D1, 0x2CA35D, + 0xC687A2, 0x24E9B4, 0x1843D3, 0x5C9B97, 0x9B580C, 0x780B2C, + 0x59943D, 0x0744D0, 0x8DA6E3, 0x07AAF6, 0x2214D0, 0x72E8D7, + 0x54151B, 0x514DE9, 0x8DCC3B, 0x0CEB00, 0x2C4DE3, 0x5012AE, + 0xD7B72E, 0xB7DE9A, 0x641B2F, 0xF9CF17, 0x8BD282, 0x9F31A3, + 0xDED846, 0x467E05, 0x26CCEA, 0xF8E404, 0x65572E, 0x82C594, + 0xE572A9, 0x895653, 0xA1AA94, 0x8DD876, 0x5E9A61, 0x69EB1C, + 0x0385A9, 0x5BC844, 0x95B2DF, 0x6678F6, 0xFA7033, 0xE4F434, + 0x5584A9, 0x32C099, 0x9AD846, 0xB3FFD1, 0xA81C56, 0x4E54EF, + 0x54D173, 0xF191B4, 0x49B2A2, 0xB309D9, 0x546D8D, 0xC0A51E, + 0xCAFFC0, 0x785400, 0x05F69D, 0x894056, 0xC33098, 0xDFF6C2, + 0x908D97, 0x05CC96, 0x46484B, 0xBD7B9D, 0xB152F5, 0x5A7461, + 0x59CA20, 0x8F8EF5, 0xC9FF05, 0xF6F398, 0x856C97, 0x81E07C, + 0xAE5EDA, 0x51BDC9, 0xF26437, 0xBBC8CE, 0x091B52, 0x68B6A5, + 0x90750E, 0x925EF9, 0x3D9CB3, 0x46EA96, 0x97D648, 0x78BCC7, + 0xF4B488, 0x05275E, 0x6619DF, 0x56D4A0, 0x8C5C41, 0xDB345A, + 0x0B79DA, 0x496369, 0x96109B, 0x667664, 0xC40CF9, 0x91D7CA, + 0x119F1A, 0xA99272, 0xCBB529, 0xBB033E, 0x8F91C0, 0x570045, + 0xB845C2, 0x2B8E52, 0x687AFB, 0x0D0AA3, 0x200863, 0x043B83, + 0xF129DE, 0x49C2D6, 0x9641D2, 0xC4747C, 0x220804, 0x503F05, + 0x7E274F, 0xCA83D9, 0x9D6495, 0x0E5039, +}; +const int _TBL_ipio2l_53[] = { + 0xA2F983, 0x6E4E44, 0x16F3C4, 0xEA69B5, 0xD3E131, 0x60E1D2, + 0xD7982A, 0xC031F5, 0xD67BCC, 0xFD1375, 0x60919B, 0x3FA0BB, + 0x612ABB, 0x714F9B, 0x03DA8A, 0xC05948, 0xD023F4, 0x5AFA37, + 0x51631D, 0xCD7A90, 0xC0474A, 0xF6A6F3, 0x1A52E1, 0x5C3927, + 0x3ADA45, 0x4E2DB5, 0x64E8C4, 0x274A5B, 0xB74ADC, 0x1E6591, + 0x2822BE, 0x4771F5, 0x12A63F, 0x83BD35, 0x2488CA, 0x1FE1BE, + 0x42C21A, 0x682569, 0x2AFB91, 0x68ADE1, 0x4A42E5, 0x9BE357, + 0xB79675, 0xCE998A, 0x83AF8B, 0xE645E6, 0xDF0789, 0x9E9747, + 0xAA15FF, 0x358C3F, 0xAF3141, 0x72A3F7, 0x2BF1D4, 0xF3AD96, + 0x7D759F, 0x257FCE, 0x29FB69, 0xB1B42C, 0xC32DE1, 0x8C0BBD, + 0x31EC2F, 0x942026, 0x85DCE7, 0x653FF3, 0x136FA7, 0x0D7A5F, + 0x93FC61, 0x035287, 0xC77FCA, 0x73530A, 0xC6BC15, 0x0E4B0F, + 0x568FCE, 0x2D3456, 0x4D7FE1, 0xA12CD1, 0xB2CEA2, 0x531C62, + 0x70B4D2, 0x1BCE9A, 0x87704D, 0x6B83D7, 0xAA8121, 0x2530EA, + 0x2074BF, 0x28A071, 0x9D69C3, 0x406DD8, 0xF58783, 0x115D89, + 0x5E85F3, 0xAACDCC, 0x8C0B57, 0xD7DFFE, 0x550D96, 0xC43EB4, + 0x89ABA7, 0x94F595, 0x56F260, 0x06A4CD, 0x7FD2E2, 0x6FDFA8, + 0x3E9C98, 0xBFD682, 0xAD3A12, 0x23A8A6, 0x173A89, 0x5DE9BD, + 0x95A978, 0x28E484, 0x5964F3, 0x496AF0, 0x4B1DA9, 0x989061, + 0xBD2BF2, 0xE01A90, 0x0905B7, 0xAC39AC, 0x52D5B7, 0x109F25, + 0x3AE1DC, 0xF90A7C, 0x33F4E5, 0xF5DFDF, 0x1522D0, 0x562CE6, + 0x392CFF, 0xEB9032, 0x10A08E, 0x0B1D7F, 0x42B80A, 0x366DD2, + 0xC24F89, 0x02222E, 0x21494C, 0x985287, 0x87FD07, 0x2EE361, + 0xAD8D68, 0xE72273, 0x9E8D59, 0xD09999, 0x10F4A1, 0x1079A3, + 0xE9BEAF, 0x9C0887, 0x09C622, 0xEBCF06, 0x974532, 0x086A8F, + 0x6CEA05, 0x388C00, 0x74969E, 0xC85B16, 0x385A38, 0x9A2F35, + 0x670531, 0xABA6D0, 0xEFD3C1, 0x27AD92, 0xF4203E, 0x3D619F, + 0x4D05F4, 0x9AE7CC, 0x03B592, 0x41FF55, 0xCAFCA5, 0x1A0987, + 0x88AB79, 0x3627D4, 0x25B12A, 0x52594A, 0xA2BEB0, 0x25C3F2, + 0x4489DA, 0x7959A7, 0xEAEC89, 0xB34714, 0x960196, 0x1FC33A, + 0x7F0275, 0x32EF92, 0x0111CE, 0x8E4685, 0x6F5B34, 0xF6123A, + 0x5543B2, 0xE9A02A, 0x74E03F, 0x54D5A8, 0x086A2C, 0x4A9CD3, + 0x921191, 0x229764, 0x0A1A84, 0x9B45AE, 0xC653A5, 0xB15F33, + 0x100FD1, 0x7DD740, 0xB20CD3, 0x0A0786, 0xF506C3, 0x25EBF4, + 0x3AB39E, 0xE3BB24, 0x27646F, 0xEECE57, 0x706BFE, 0xC7A869, + 0x57ED51, 0x118C82, 0x2B0FF5, 0xC8E545, 0xC43D80, 0x2A3183, + 0x4C1BB9, 0xBC108A, 0x099779, 0xF9ECC8, 0x2A1063, 0x5D2F6A, + 0x8F2675, 0x12FF6D, 0x32EED9, 0xE4A245, 0x7392CF, 0x5C240B, + 0xC476FF, 0x97AFC7, 0xB76131, 0x665E05, 0x67BD57, 0x19E998, + 0x3A5863, 0x23B8AA, 0x5B5608, 0x8A66C6, 0x5F2AD3, 0x78BAFA, + 0x3516CE, 0xCBEA16, 0x6E40D4, 0xB463D4, 0xA6C12F, 0xABD3D7, + 0x32650A, 0x579D10, 0x3CB9E2, 0x1A02A7, 0xDF2FFA, 0x28C991, + 0xB2264C, 0x027870, 0x47BDD4, 0xF243B1, 0x39AE2C, 0x282EA4, + 0xAF1D98, 0x2AFD16, 0xABE7AF, 0x17CB67, 0x8FF93E, 0x793167, + 0x435F6B, 0x48058B, 0x417DA0, 0xE01217, 0x085A69, 0xB50E36, + 0x79A4CD, 0xD74907, 0x26C4B5, 0xB90054, 0x06C3AD, 0x5AB38F, + 0x585E91, 0xD04E4F, 0x2938CE, 0xD4EAA7, 0xA06DE5, 0x40BFE5, + 0xDE6849, 0xEF65F0, 0xF1D4BB, 0x94C21E, 0x66E978, 0x1B9B94, + 0x961043, 0x5961B8, 0xBAAA74, 0xD662EE, 0x9DABF6, 0x0AFE28, + 0x9587A4, 0xA632BC, 0x09149F, 0xDEA996, 0x2CAFD7, 0xBDE29B, + 0x7159E6, 0x1F7C49, 0xF2E2ED, 0xBFA992, 0x7C77EF, 0xC245D0, + 0xB2D129, 0x993E75, 0xAB4C0C, 0x5C84B6, 0x17F542, 0x45314E, + 0x1DEF1B, 0xE3BDCC, 0xB3AE86, 0x24522F, 0x918FC6, 0x2138D5, + 0x883646, 0x6858B6, 0x032762, 0x5170F8, 0x4974EA, 0x76BF77, + 0xECDA8A, 0x9EADDD, 0x2404EF, 0xC52A5D, 0xF2E858, 0xC42D60, + 0xD18C08, 0xDE59B2, 0x4CC3A6, 0x94D888, 0x4C4AF0, 0xCF1F8C, + 0xBF2F6F, 0x7B4535, 0x98B0DB, 0x2BE0CF, 0x4616A7, 0xA8D9FB, + 0x88CA7A, 0x5087E1, 0x18DD8A, 0x1A9F4F, 0x1DCECE, 0xF8609E, + 0xE2F0C8, 0x9AD7D4, 0xE3CDFE, 0xC6FDD5, 0x8FF3CD, 0x7D45AA, + 0xD34957, 0x7C1963, 0x6CE098, 0xB70215, 0x326BBF, 0x47B3A6, + 0xF9235D, 0x6F66F5, 0xC6E40C, 0xE7F50B, 0xFF2FDD, 0x5A1251, + 0xE95EF1, 0xDE8E67, 0xECEE9B, 0xC9F98E, 0x722224, 0x6DF750, + 0x81D08F, 0x2BFCF0, 0xDDC10D, 0x775314, 0xDB1D87, 0x41626B, + 0x9EDF31, 0x7738D9, 0x8D9EB4, 0x4F1C2A, 0xF3E795, 0xB69699, + 0xD9A56D, 0x31BB1B, 0x542975, 0xAB917B, 0x63927C, 0x9BB764, + 0x84A598, 0x0A0C51, 0x5E48C4, 0x7780E3, 0x87E156, 0x155972, + 0xE406F8, 0x48AB9E, 0x3CCDDA, 0x010F87, 0x683B70, 0x400CAD, + 0x5DE5C5, 0x7262FA, 0xFA248D, 0x013AF2, 0xE2E8B5, 0x995F7D, + 0x7F8C4B, 0x0E8B59, 0x1006F1, 0x40B6E9, 0x760654, 0xCBCC8C, + 0x086F40, 0xDC7F6F, 0xFCD0D4, 0xA47ADE, 0x5204FA, 0xF38A9D, + 0xE76C7C, 0x575207, 0x499BF1, 0x0DB01C, 0x09098E, 0x957A71, + 0xD53E0E, 0x61DF1D, 0xE6EF34, 0x5821EC, 0x96BCC0, 0xDC96CE, + 0xA9C0AE, 0x130B2C, 0xCCC589, 0x829BB9, 0x2A75BA, 0x97611C, + 0x0CEAB8, 0x165D9D, 0x35AD41, 0x82A805, 0x975628, 0x5601A6, + 0x074F08, 0x80A27D, 0xEFA64E, 0xD7BB4B, 0x5E6397, 0xC92FFC, + 0x4F3F7A, 0xBEA764, 0x0C9B7D, 0xC5DC74, 0xEAD216, 0x6DBBC0, + 0x913E3E, 0xABF50B, 0x95B24A, 0x3FC9C5, 0xE7BA15, 0x8C7F70, + 0xF81358, 0x774606, 0xCE8C0D, 0xB6B268, 0xB85BA6, 0xAC9B2E, + 0x1AAB05, 0x0C6C82, 0x6EC2AE, 0x606874, 0x8F60BF, 0x1FBC7B, + 0x58C97A, 0x448794, 0xBA48A0, 0x72E882, 0x6D3568, 0xE131FD, + 0x4745D0, 0x0BFA1E, 0x07B01D, 0x474D43, 0x59387E, 0x5B0AD5, + 0xC37A8C, 0x0474E8, 0x13D99D, 0x68A13C, 0xB69118, 0x89228C, + 0x6F7D83, 0x86D665, 0x5C7744, 0xDD183E, 0x1C2E17, 0x712F5E, + 0x4AACCB, 0xB69B68, 0xA1201F, 0x743C2B, 0xF6AD70, 0x92E024, + 0xF34FD8, 0x33712E, 0xFE1D73, 0x4471F0, 0x7D0526, 0x58AF47, + 0x7B11FE, 0x1FCE4F, 0x1356C9, 0x9CE3CA, 0xA843C0, 0x8EEA3C, + 0xABEEE4, 0xA5D495, 0xA407A4, 0x31BB4B, 0x0AA1E3, 0x518E7C, + 0xAA4A66, 0xD82CD8, 0x6EF8D2, 0x6F32E6, 0x1DC26B, 0x17AE59, + 0x4B683B, 0x8D48F7, 0xF4FBD8, 0xD4FE0A, 0xE961DE, 0x87BD37, + 0xE6CCD6, 0xCBD76D, 0x3E99DE, 0xB72E21, 0x54EB90, 0x6AB45D, + 0x600AFB, 0xA17B2F, 0xDA0421, 0xE6CA95, 0x35AAA2, 0x7D8FB1, + 0x3207BB, 0xBF82EE, 0x71F55F, 0xC661CB, 0xBD72A1, 0xBF5A64, + 0x6E39E8, 0x6C6DE2, 0x2BD178, 0xAF62A5, 0xA7D86E, 0xE7D0FE, + 0x84DB03, 0x67FDA2, 0x2D6809, 0x0F8B8F, 0x1B50E3, 0x234EF5, + 0x7325ED, 0x8F8F4C, 0xC1E426, 0x3066AD, 0x0759A4, 0xE03390, + 0x70CC9A, 0x524F77, 0xCDD489, 0x97DD24, 0xA81858, 0xF24513, + 0xA9C18E, 0x2A2F82, 0xC2C014, 0xB8E7F0, 0x934036, 0xD36E51, + 0xD9A089, 0xDBC587, 0xB30418, 0x969192, 0x0A5213, 0xE21841, + 0x2881EC, 0x9A293F, 0x0DF705, 0x85B497, 0xE430B9, 0xE90ECF, + 0xC15FDC, 0x9E8A7E, 0xC5472D, 0xB54FBD, 0x456AF2, 0xCA80B6, + 0xAE25FE, 0xA03B46, 0x6C6CFD, 0x78382A, 0x0E7877, 0x7F2D31, + 0x03C827, 0x61CF52, 0x339A2F, 0x2286A9, 0xE41DF0, 0x640F5C, + 0xBEF364, 0x010506, 0x6D2C21, 0x841EFF, 0x7F3B5D, 0xD98DC8, + 0x0F9421, 0xA25B0C, 0x4C2C44, 0x922392, 0xB98A8A, 0x6179B9, + 0xF7B419, 0x289AAF, 0xE92F47, 0x5E47A2, 0x82927F, 0xC7290E, + 0x6C925C, 0xBA5A3C, 0x8FB7F6, 0x9C4BEE, 0x02C529, 0x0CFCD7, + 0x5EBD8C, 0x7196E0, 0x4B917E, 0x6B9780, 0x6A1731, 0xA617FF, + 0x27A20D, 0x5A56A3, 0x43C4DB, 0xC62EA4, 0x637A84, 0x1C46F9, + 0x33C780, 0x61A278, 0x4915C9, 0xD6C776, 0x6A7C66, 0xD8DD0C, + 0xF87EB1, 0x124C43, 0x5B87E7, 0x097456, 0x3C2FA7, 0x307C4A, + 0x54267A, 0x30E34E, 0xC0CF98, 0xD75B19, 0xFADEDB, 0x12CBE8, + 0x29F24C, 0x579C7E, 0xBF3682, 0xDCB460, 0xAE08B3, 0xA524BC, + 0xC181C2, 0x5DAB90, 0x466602, 0x55345B, 0xA13941, 0x47D820, + 0x278066, 0x81B089, 0x165EFB, 0x4D27FD, 0x2BF9F4, 0x2E2FFB, + 0x6106B5, 0xE76806, 0x445A84, 0x0BDA0D, 0x49D7A4, 0x72650D, + 0xCDC55B, 0x3E16BC, 0x132F6F, 0x29E8FD, 0xE58428, 0x621E41, + 0x7D2AC4, 0xAB5697, 0xAC61EB, 0xE5DAF0, 0x654ED6, 0x8E77E3, + 0x0B2FBC, 0x2E63A3, 0xC8296A, 0x8B631F, 0x4ECCA6, 0x91859C, + 0x9E3E45, 0x0E3CC7, 0xC12454, 0xCCBCB6, 0x17979E, 0xD0D374, + 0xA489A2, 0xC6258F, 0xE8EF9E, 0x12EE26, 0xC614C2, 0x62E23E, + 0xCA8C5C, 0x409AC9, 0x511D05, 0xA88CE0, 0x195500, 0xF7144F, + 0x913BB7, 0x17D064, 0xF6C9CE, 0xAC5D11, 0xD0C313, 0xBCCCB6, + 0xAAD4FC, 0xE47B2C, 0xFE4362, 0xF2E712, 0x2D5EFF, 0x833822, + 0x58A1D7, 0x68377C, 0xE49B25, 0x22B179, 0x048796, 0x069400, + 0xE670D3, 0xD2CB85, 0x55FBE6, 0x67F281, 0xFE2DE0, 0x8CFAF2, + 0x9865BC, 0x210CD3, 0x86DD70, 0x43D00F, 0x55E279, 0x679252, + 0x8D4F58, 0xE17AC5, 0x6A6127, 0x1B0876, 0x5D8ED0, 0x701330, + 0xD5BD25, 0xC9A126, 0x57C571, 0xDC5C3F, 0xB6D34E, 0xB72383, + 0x001A9E, 0x7D36C0, 0x8151F6, 0x65D7C1, 0xE1F513, 0xCD372A, + 0xE69B0C, 0xD02685, 0x23C3EB, 0x3544CB, 0xF0BE31, 0x83F399, + 0xCB93F8, 0xFFC693, 0x908EC6, 0x8E5DE1, 0x315B7E, 0x67CE7B, + 0x40AAF7, 0x7FD285, 0x069B36, 0x03C00A, 0x13C7D5, 0x0DA14C, + 0x1EAAD4, 0x2B777F, 0x8E05C1, 0x5AD1AE, 0x60C398, 0xA4EA59, + 0x10BEED, 0x88F2FA, 0x69B941, 0xA54E70, 0xA817C3, 0xB96246, + 0xE8EEDC, 0x56D570, 0xBBEBB5, 0xD8F235, 0x201AB9, 0x9CC747, + 0x5BC2FB, 0xC877F3, 0x428CF6, 0x4EEF84, 0xBF85FD, 0xEE6D34, + 0x84C2DE, 0xC42F4C, 0x1A513B, 0x9AC41F, 0x87FFFA, 0x1CA431, + 0x714252, 0xC73FB9, 0x662D89, 0x3D83BA, 0xBDF046, 0x2E4F62, + 0x76B7C0, 0x81336C, 0xBE80A9, 0x4C9D72, 0x739A15, 0x47972C, + 0xA36A1B, 0xD31731, 0x54BA46, 0x2E8C72, 0xFEA5A5, 0x9A7E5F, + 0xC359ED, 0x8F0FFB, 0x1270DA, 0x5E9B08, 0xB0BFCB, 0x36974C, + 0x6CD8F9, 0xD02E1F, 0x1C3F2F, 0xFCF8F0, 0x4C2C6D, 0x0B2169, + 0x48B9CE, 0x42737D, 0xA8E974, 0x64062D, 0xA86C59, 0xEEC419, + 0x047C83, 0x996A23, 0xF2A4C8, 0x4BE1B8, 0x348286, 0xE84240, + 0x8337CB, 0xE55A2F, 0xC17750, 0xA4DA06, 0x64347F, 0x59A5A1, + 0xDFF53D, 0x62A571, 0xEECF3A, 0x886700, 0xC06DAF, 0x4E161F, + 0x12670E, 0xBDFE1A, 0xA72B38, 0x5BA22C, 0xFED227, 0x3FC814, + 0x150E5A, 0xE99B3A, 0x8EE9FC, 0xBC1845, 0x32373A, 0xBDA476, + 0xCEB88F, 0x7FAED3, 0xDB9116, 0x31CF72, 0x1A5136, 0xC4F362, + 0xDE4799, 0x768043, 0x386207, 0x8E5497, 0xB0EF6D, 0x6C57FB, + 0xF56664, 0xD24F05, 0xE0F702, 0x8A41EF, 0xA2EC53, 0x09731C, + 0x6157FE, 0xC5731C, 0xEF1A2E, 0x60EC10, 0xA67EFE, 0x486A73, + 0x8004F6, 0xC3F482, 0x63BA28, 0x107282, +}; +#endif diff --git a/usr/src/libm/src/Q/_TBL_logl.c b/usr/src/libm/src/Q/_TBL_logl.c new file mode 100644 index 0000000..4e9b054 --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_logl.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_logl.c 1.8 06/01/31 SMI" + +/* + * Table of constants for logl. + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const long double _TBL_logl_hi[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.550418653596525415085404409320395875274e-0002L, + +3.077165866675368837102820454313423549427e-0002L, + +4.580953603129420316667926449525231301634e-0002L, + +6.062462181643484258060612972483742997442e-0002L, + +7.522342123758752569860532039086827578824e-0002L, + +8.961215868968713261995143730528787027578e-0002L, + +1.037967936816435648260617694803438348244e-0001L, + +1.177830356563834545387940581504548168563e-0001L, + +1.315763577887192725887160624312135596698e-0001L, + +1.451820098444978972819350572286183123887e-0001L, + +1.586050301766385840933711683530052981349e-0001L, + +1.718502569266592223400988812516892523599e-0001L, + +1.849223384940119926639035059723077314615e-0001L, + +1.978257433299198803625720374533782925763e-0001L, + +2.105647691073496376695527531626088702497e-0001L, + +2.231435513142097557662949937027997104032e-0001L, + +2.355660713127669090775881163009096337126e-0001L, + +2.478361639045812567806026867952720081699e-0001L, + +2.599575244369260669720794933085231739065e-0001L, + +2.719337154836417588316694242031136685288e-0001L, + +2.837681731306445983469010458794547135312e-0001L, + +2.954642128938358763866817700916107535002e-0001L, + +3.070250352949118620751243666552419834311e-0001L, + +3.184537311185346158102471140686078561531e-0001L, + +3.297532863724679818144228014362478784488e-0001L, + +3.409265869705932103050890001544662426952e-0001L, + +3.519764231571781846554474552048254288130e-0001L, + +3.629054936893684531378242945398272521523e-0001L, + +3.737164097935840808210167331226757525499e-0001L, + +3.844116989103320397347900487369508110320e-0001L, + +3.949938082408689781063939783520655318915e-0001L, + +4.054651081081643819780130322994137932204e-0001L, + +4.158278951437109656133288259511744826605e-0001L, + +4.260843953109000631245447385569842356371e-0001L, + +4.362367667749180703490412239178509575736e-0001L, + +4.462871026284195115325899874055994208063e-0001L, + +4.562374334815875943808053840818705719756e-0001L, + +4.660897299245992245586191878736453654769e-0001L, + +4.758459048699639142652093893677655824436e-0001L, + +4.855078157817008078017910633011255535046e-0001L, + +4.950772667978515145979645213034899480604e-0001L, + +5.045560107523952870583081828817948816463e-0001L, + +5.139457511022343168010058668287669524912e-0001L, + +5.232481437645478365168069353535037563840e-0001L, + +5.324647988694718438739234379583263151144e-0001L, + +5.415972824327443715765422111689841356847e-0001L, + +5.506471179526622792599479861304555364807e-0001L, + +5.596157879354226862708883466532843603287e-0001L, + +5.685047353526687120787385804082945993734e-0001L, + +5.773153650348236043181117067559499073234e-0001L, + +5.860490450035782089041193916402035316840e-0001L, + +5.947071077466927895143434959005658134879e-0001L, + +6.032908514380842623405849663552155166682e-0001L, + +6.118015411059929035298897608882125523626e-0001L, + +6.202404097518575288514942954323627943283e-0001L, + +6.286086594223741377443081293997900727520e-0001L, + +6.369074622370692316204942281372157123062e-0001L, + +6.451379613735847016652282983340864160916e-0001L, + +6.533012720127456387586157190946858013903e-0001L, + +6.613984822453650082602354487776933060928e-0001L, + +6.694306539426292672988850845059757003379e-0001L, + +6.773988235918061408096824565025274617492e-0001L, + +6.853040030989194165440476699956951850629e-0001L, +}; + +const long double _TBL_logl_lo[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.949242877125126389030374148355277037360e-0027L, + +3.053637928597425156289800058830629052349e-0027L, + +3.119411029097592549724599219796579698355e-0027L, + +2.315582833311779694729302029874044004747e-0027L, + +1.959279413884862919696230642481001644914e-0026L, + +3.207319665850940689112590931321584585232e-0026L, + +3.428363085348215886901240200560690191423e-0026L, + +5.132006688821218644279793035639158591104e-0026L, + +6.625826960278191623061313902987136675670e-0026L, + +6.511946011133829904478213998927380817716e-0027L, + +6.272836277110805877048126233548710095828e-0027L, + +6.480345801257546326311423010001184801374e-0026L, + +8.668694188954430256018491855337005516255e-0026L, + +3.374358317649896753533582921130800923337e-0026L, + +5.956974264347082186429247944518667757530e-0026L, + +9.660703479297144864941461785565180191497e-0026L, + +1.026401337764243728855958607127831718221e-0025L, + +7.895125273982903351541822547625351974082e-0026L, + +1.233787870669833985274611329531652753861e-0027L, + +7.032988549345377674736424478296516377526e-0026L, + +1.764708000531295728633384847670848625081e-0025L, + +1.359633534416813878749988462341486606257e-0025L, + +1.738801359182578816100029030519562527565e-0025L, + +9.952199173944211463901058384143333287734e-0026L, + +1.048454193250289008158931842356333139861e-0026L, + +1.996258899657478647716755914664160562170e-0025L, + +1.054613497176328160439100383508915283893e-0027L, + +5.143766259398803158035428613944687700657e-0026L, + +9.959314775409457843445608446369116918733e-0026L, + +1.374434005748650164937032849496159512584e-0026L, + +5.801291623641845255360276144691829323298e-0026L, + +8.316493534335158882618991007102844149005e-0026L, + +6.700372782269538472749252935215499311080e-0026L, + +1.410384923832595967313936024639114199873e-0025L, + +9.914327034309007140234547094345224044051e-0026L, + +1.932140695859428972988292357113036038299e-0025L, + +1.540820591764623257530922632028001952748e-0025L, + +5.963112403438125368118769047371614538741e-0026L, + +1.969366158297316138140115855981754471320e-0025L, + +1.388966334707414023926476567157219393213e-0026L, + +6.353934371729676603785277612987160899450e-0026L, + +3.488563800483361999633395030516586696799e-0025L, + +2.214454506406188993139159148705861598088e-0025L, + +2.895813670852564643073769701905380524878e-0025L, + +2.855018159274929532107406110765900047355e-0025L, + +9.273144996328510392949911518833977809658e-0026L, + +1.930744579236138780895942105787011752697e-0025L, + +1.538735422331574088102192677519746877453e-0025L, + +1.844586676642028985383989272409206538468e-0025L, + +3.547635464941839708071563131885310128521e-0025L, + +4.464712081783102087084281748635332222581e-0026L, + +5.062863951970459495500575300347508324877e-0026L, + +2.203060950889790157204518257910818074191e-0025L, + +5.540602231323196163388428517126435254723e-0027L, + +3.371348840624439923830692211721531149909e-0025L, + +7.637439356719457811667844141793488670929e-0026L, + +1.990439834788842292780211676828666657547e-0025L, + +1.978006454898465493718923085569873769719e-0025L, + +1.621161880831806223416081355472819612309e-0025L, + +3.899319576320551292151632804501913965920e-0025L, + +1.864235278097858865893177670582100390924e-0025L, + +1.534948208368053655735541548539936152221e-0025L, + +4.089715378013580174759550633443176148182e-0025L, +}; diff --git a/usr/src/libm/src/Q/_TBL_sinl.c b/usr/src/libm/src/Q/_TBL_sinl.c new file mode 100644 index 0000000..09e4a6b --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_sinl.c @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_sinl.c 1.10 06/01/31 SMI" + +/* + * table of sinl(x) where x is 0.15625 + i*0.03125, i=0,1,...,74. + * {0x3ffc4000,0,0,0} --> (inc 0x800) --> {0x3ffe9000,0,0,0} + * 0.15625 0.03125 0.78125 (pi/4 = 0.785395663...) + */ + +#include "libm.h" + +const long double _TBL_sinl_hi[] = { + +1.556149927735560412099206432035162581492e-0001L, + +1.594724589318434199425963881130908091043e-0001L, + +1.633274917366128508468661724543543700180e-0001L, + +1.671800323648067343709660282007512722777e-0001L, + +1.710300220313950192813479692398343312832e-0001L, + +1.748774019902721898956853691085201901772e-0001L, + +1.787221135351536593753562418641807235164e-0001L, + +1.825640980004715553995456513594130154574e-0001L, + +1.864032967622698845523799831032052611919e-0001L, + +1.902396512390990617639858876307573287214e-0001L, + +1.940731028929097911560552002141454036336e-0001L, + +1.979035932299462846523939109918127853182e-0001L, + +2.017310638016388047250381511640009707423e-0001L, + +2.055554562054955176568330206054936963632e-0001L, + +2.093767120859936437118907527248816522107e-0001L, + +2.131947731354698906160730331184784624987e-0001L, + +2.170095810950101567605780958260553963420e-0001L, + +2.208210777553384905528563479277490523429e-0001L, + +2.246292049577052923504285497964248198189e-0001L, + +2.284339045947747454247378313461956799859e-0001L, + +2.322351186115114624139308777462358722636e-0001L, + +2.360327890060663337354342917945180835158e-0001L, + +2.398268578306615644413692518108865737937e-0001L, + +2.436172671924748860122309477052146367777e-0001L, + +2.474039592545229295968487048493892032583e-0001L, + +2.549659604158784674875565748648726276685e-0001L, + +2.625123997691532814509496263956929310415e-0001L, + +2.700428167185850315527550636188270542366e-0001L, + +2.775567516463363259220234468281285678680e-0001L, + +2.850537459405474245877630333232525606110e-0001L, + +2.925333420233275436247023264939134225079e-0001L, + +2.999950833786830511632482820116999437532e-0001L, + +3.074385145803808506705029582019820907725e-0001L, + +3.148631813197452508650363151269390156066e-0001L, + +3.222686304333866256877459198931880313050e-0001L, + +3.296544099308601719143177251264631756945e-0001L, + +3.370200690222530762612817541738100244419e-0001L, + +3.443651581456984082071720464722237468910e-0001L, + +3.516892289948140592225848969555470155541e-0001L, + +3.589918345460650536777102991528689411936e-0001L, + +3.662725290860475613729093517162641768533e-0001L, + +3.735308682386929464168397526608481120900e-0001L, + +3.807664089923901920572007033888966750813e-0001L, + +3.879787097270250460510796908137419597834e-0001L, + +3.951673302409342362448326404196536570776e-0001L, + +4.023318317777731112171055988809823868862e-0001L, + +4.094717770532950661226940270114522362676e-0001L, + +4.165867302820411192591124488310696565000e-0001L, + +4.236762572039380103616839880311024798208e-0001L, + +4.307399251108031972163215178508491897943e-0001L, + +4.377773028727551328616189747027966801523e-0001L, + +4.447879609645272114330560125295252111499e-0001L, + +4.517714714916837765816887501340628695303e-0001L, + +4.587274082167365923772950289728747732442e-0001L, + +4.656553465851601826811995125075467791328e-0001L, + +4.725548637513044511465513178085169418350e-0001L, + +4.794255386042030002732879352155714019245e-0001L, + +4.930786857539230572651365527534871205832e-0001L, + +5.066114548142573676422960008938671919466e-0001L, + +5.200205419537270047602136998746747297451e-0001L, + +5.333026735360201733291311033081615288994e-0001L, + +5.464546069192035644033495537494110008818e-0001L, + +5.594731312473668773848440060031166884132e-0001L, + +5.723550682345072403849537068245036075406e-0001L, + +5.850972729404621548053993141500804585059e-0001L, + +5.976966345387015312386476189673343370299e-0001L, + +6.101500770757913712737423935661832200218e-0001L, + +6.224545602223436830419267050904433302049e-0001L, + +6.346070800152692968503099142036714364826e-0001L, + +6.466046695911523705240421598828007629792e-0001L, + +6.584443999105675415895839548840419894459e-0001L, + +6.701233804731628946545315835006484946172e-0001L, + +6.816387600233341667332419527798939078545e-0001L, + +6.929877272463179102818154908230482095679e-0001L, + +7.041675114545336727800595099739428438828e-0001L, +}; + +const long double _TBL_sinl_lo[] = { + -7.839895634192879801217180506294972695887e-0036L, + -7.579278167533093253112813720340914585189e-0036L, + +1.813803443011554857703679023007542917336e-0036L, + -5.685040200337201343842157163322014327778e-0036L, + +7.013958751874876088754160302032414326691e-0036L, + +9.101164084055805006113433827277389417722e-0036L, + -1.529069265172651032025475612605940319203e-0036L, + -5.873100812266872079952884219254900231461e-0036L, + +1.764603048068267800105867159753318395454e-0036L, + +1.747799267790272859521729635868399475234e-0036L, + -9.673047410519982672089452429449289994858e-0036L, + -7.666827750837122707923169727244402427704e-0036L, + -4.275134347549669784351512906173841196088e-0036L, + -1.826904072780322152815985026139121969706e-0036L, + -1.594702873443294499653146384825158092559e-0036L, + -7.180615084240582786256765419723871383233e-0036L, + +1.073564887942168318128295491982011935257e-0035L, + +6.166267602604185314123111207543917974633e-0036L, + +2.420615108492974698446957518700585915995e-0036L, + +1.864291640707538541155008952901532832506e-0036L, + -4.969304833641910200750246243329289676583e-0036L, + +7.191910920600591837788283739445222790835e-0036L, + +2.398670365698962872409384444450714480056e-0036L, + +2.625717623049256499265563616201152710192e-0036L, + -7.364870011085995329435971152758116180239e-0036L, + +2.202803779185347210050716883280741537850e-0035L, + +3.249236770720310646731771785718217268891e-0036L, + +2.438735936561976529428558055804286674772e-0035L, + -1.358485954689981282143446687700830546868e-0036L, + +2.042693258859029188027001236804037487674e-0035L, + +1.935394086687044503080036879506851279569e-0035L, + +1.351742655356978501392833614755710504356e-0035L, + +1.065151724232046458392410994534171402266e-0035L, + +1.924312402124329269930577050628341603064e-0035L, + -1.495058978047592634838539083355002279867e-0035L, + -1.226069967847432149730821922942328537678e-0035L, + -2.214357561488394736777775450498906642993e-0035L, + -3.197918850054809249377584675940519273161e-0036L, + +1.752934334182702105675254128020832940341e-0035L, + -2.067723892627233681394169702571120887364e-0035L, + -1.967684335349365926758978182531089889151e-0035L, + -1.480234947789865560488791134115171284680e-0035L, + -2.020095411752086363369245333724961071903e-0035L, + +8.019047838709350758444432786175864173856e-0036L, + +7.575600313883125509400401940426278198665e-0036L, + -1.956787228828481747235699165048715626458e-0035L, + +2.239452414684575979216557857298213538383e-0035L, + -2.004881068319988136754382697969636119420e-0035L, + +1.404844563886544703294730965793125947043e-0035L, + +1.540967800016293988508912183967615475673e-0035L, + +9.627943645034426124771174260339225827341e-0036L, + -1.671879365114936780075083716139548989818e-0035L, + -1.193872230164722958937943872752845047434e-0035L, + -4.709469941941829089292517195754317215227e-0036L, + -1.562825989789718724786197721553059612264e-0035L, + +9.313247749577680188502242676253713195205e-0036L, + -1.384269776167183189501758486393819264119e-0035L, + +7.064986931125350563523011010886249504328e-0036L, + -3.109636998242741557027060430659670849804e-0035L, + -3.324150213308849248337118428668967104680e-0035L, + -3.427152913195516159969937952267551337396e-0035L, + -2.118702307301603154209365237718648576399e-0035L, + -1.289226205241639223068869521009917813361e-0037L, + +2.125722734799331239445801994645145285587e-0035L, + -1.781645762780561951365253354033804640300e-0035L, + -2.495276089408737145274279413504615537138e-0035L, + +1.338422379299389637809694183691505317685e-0035L, + +1.919747869211470727176212361922698586017e-0035L, + +4.330259169399683693260601564559270596091e-0036L, + -3.417429818162194124156743659460798263758e-0035L, + -4.544129440843003305237213918657872189520e-0035L, + -1.331658529527437298976340693936847286647e-0036L, + +2.748387759350275490242241143386673942983e-0035L, + +4.500898710776635571808492195291899181897e-0035L, + -2.872593727403933486766336102755986165044e-0035L, +}; diff --git a/usr/src/libm/src/Q/_TBL_tanl.c b/usr/src/libm/src/Q/_TBL_tanl.c new file mode 100644 index 0000000..2b93621 --- /dev/null +++ b/usr/src/libm/src/Q/_TBL_tanl.c @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_tanl.c 1.9 06/01/31 SMI" + +/* + * table of tanl(x) where x is 0.15625 + i*0.03125, i=0,1,...,74. + * {0x3ffc4000,0,0,0} --> (inc 0x800) --> {0x3ffe9000,0,0,0} + * 0.15625 0.03125 0.78125 (pi/4 = 0.785395663...) + */ + +#include "libm.h" + +const long double _TBL_tanl_hi[] = { + +1.575341073252716106852257741724104864870e-0001L, + +1.615397840495214763092752400110463977418e-0001L, + +1.655505192739339762139309125850523900470e-0001L, + +1.695664452197665101509706065437500194264e-0001L, + +1.735876947679815208446734114353616329985e-0001L, + +1.776144014774467276317429269586882243819e-0001L, + +1.816466996033214276582758961743535864882e-0001L, + +1.856847241156344116266612278649865067149e-0001L, + +1.897286107180591328910833700730338069829e-0001L, + +1.937784958668918635160223977682694780440e-0001L, + +1.978345167902386688084063751239797409303e-0001L, + +2.018968115074171328840689933657666757769e-0001L, + +2.059655188485788721087393288030358608878e-0001L, + +2.100407784745589808415175232245911862545e-0001L, + +2.141227308969586648860666814158624683863e-0001L, + +2.182115174984674325058820481495796084382e-0001L, + +2.223072805534313308722888175879995829692e-0001L, + +2.264101632486738374776714045035595099974e-0001L, + +2.305203097045761414554475155379181753938e-0001L, + +2.346378649964236789993677105610770268268e-0001L, + +2.387629751760259202681510637409399276566e-0001L, + +2.428957872936165424010859430156609881174e-0001L, + +2.470364494200412646634947325158035272170e-0001L, + +2.511851106692407673991038906774344215246e-0001L, + +2.553419212210362665044822364904736907938e-0001L, + +2.636805964199967998548259948794679989658e-0001L, + +2.720536986587708834265643667712727220498e-0001L, + +2.804624701452514031696042891852650256007e-0001L, + +2.889081724405147260015884454642448163630e-0001L, + +2.973920872690245894671940160246554900716e-0001L, + +3.059155173530592641072389231969929579942e-0001L, + +3.144797872725715161734382202256272257022e-0001L, + +3.230862443517455201183006557179619867007e-0001L, + +3.317362595735727673394297030105334375685e-0001L, + +3.404312285238303874282274418902587687499e-0001L, + +3.491725723659103522547129636843912210518e-0001L, + +3.579617388480169983883959631794471179752e-0001L, + +3.668002033443234227206048185537661359712e-0001L, + +3.756894699317548404092457756875977254806e-0001L, + +3.846310725041492230408562796582816506283e-0001L, + +3.936265759256327582294137871012180779893e-0001L, + +4.026775772251402117785937359900067250949e-0001L, + +4.117857068341084757888498763848712415895e-0001L, + +4.209526298694758220747941414506739471850e-0001L, + +4.301800474642300490203296054472752443302e-0001L, + +4.394696981478662404836631484799309510327e-0001L, + +4.488233592792397088405555239245740331672e-0001L, + +4.582428485344323669591891965241567184790e-0001L, + +4.677300254523917999213427069619926229670e-0001L, + +4.772867930412522617224042590104237355391e-0001L, + +4.869150994484063244987175035683195875449e-0001L, + +4.966169396975656257105605790725693200164e-0001L, + +5.063943574962298120708227547071771601970e-0001L, + +5.162494471171751444917753379369286911420e-0001L, + +5.261843553577791441706134379510093677744e-0001L, + +5.362012835812160313475789292393083126826e-0001L, + +5.463024898437905132551794657802853544147e-0001L, + +5.667670655805864456801779441354759990792e-0001L, + +5.875973675914432213941588631578976895206e-0001L, + +6.088137403243807214124939743963768234939e-0001L, + +6.304376738358847668526114292997751740101e-0001L, + +6.524918979288079927238977365516267472227e-0001L, + +6.750004851442429076631779494777228720541e-0001L, + +6.979889636235992551497657233900136516119e-0001L, + +7.214844409909044199895178832795946639042e-0001L, + +7.455157405593919951361301646778137804617e-0001L, + +7.701135513442087050059836600527731975210e-0001L, + +7.953105935686741856456016917474183548089e-0001L, + +8.211418015898941219114239653747117425236e-0001L, + +8.476445264465526540907883088076187235513e-0001L, + +8.748587605544823495396719079321555572147e-0001L, + +9.028273874526735021961743652539763208464e-0001L, + +9.315964599440724611652027565739364074620e-0001L, + +9.612155104943704161853006259468735267385e-0001L, + +9.917378983632686802568573899299023560595e-0001L, +}; + +const long double _TBL_tanl_lo[] = { + +4.179214385976688849250979202972663542033e-0036L, + +1.201528446191025246839024650298397902579e-0035L, + +1.129323489449537738080901788756231977300e-0035L, + +2.140135278964936125815581758267649033136e-0037L, + +4.432205749300185001040819456988862684951e-0036L, + +6.136100978120132271332684207100740679906e-0036L, + -1.032553059579180849987395832156976613765e-0035L, + -3.160024259922437001215851404196652376871e-0037L, + +9.288062528988428190963791818336024913881e-0036L, + -7.446971695790644707546943119354167721612e-0036L, + -3.194115406765633171232961214385101074252e-0036L, + +8.636824101000271295925487212833770093090e-0036L, + +3.102272236726159152985822088441358430350e-0036L, + -5.851906473589368694487202441718008909753e-0036L, + +4.010022070137306925338504597897336002613e-0036L, + +1.037727706884673933875970874373462194321e-0035L, + -7.373234860421060505099033319601658081963e-0037L, + +1.012564187615243178899324943342662908733e-0035L, + -1.409372712725876553601555574139438939044e-0036L, + +8.378827024922767151362882309834645448153e-0036L, + +2.973824478467770877677465646013477493211e-0037L, + +5.400099398783906370270919848839276575083e-0036L, + -6.462512242458415498262723324973388658384e-0036L, + -2.322762023061318925750503642571013465985e-0035L, + -1.258955887171193954556521579215259847692e-0035L, + -2.320447955805179154521333495999564905899e-0035L, + -1.149012552345329193834437558081484346041e-0035L, + +1.452751817871169833623945031311944393871e-0035L, + +1.233520419884672519188849688498814953115e-0035L, + -2.801716058919562991500189219464456618491e-0036L, + -8.652310551710608096633992612270187537921e-0036L, + +1.247172716748407772564831128401880847054e-0035L, + -1.239704249638930213583929247314024560861e-0035L, + +5.184462988068616168233816296529150644737e-0036L, + -6.856476723415391305857531095744442523549e-0039L, + -9.739553531295433673398454344315039002245e-0036L, + +2.266233016492660661638292126777401538348e-0035L, + +2.301502770052376628347923621704562121797e-0035L, + +1.948845747336057051538318007442114995744e-0035L, + -1.940750389335608259363326370556914475278e-0035L, + +2.019644660873458215118483163076314703163e-0035L, + +1.602015812156905914821208807083062984550e-0035L, + -3.292416392515743374743236507806546284438e-0036L, + +8.663813942351672490328381271391704283086e-0036L, + +2.366609581506599084093910217277994736871e-0035L, + -1.408950063101056644039900854057776596620e-0035L, + -1.514769920962849077013113923603803573445e-0035L, + -2.261973795598615105449462443044330073903e-0035L, + -2.553211882172402068866429390071980923144e-0036L, + +1.416627029437126089675998257335033382140e-0035L, + +2.342724931714249133589230079809850869266e-0035L, + +1.710557978782419482731492281155256146438e-0036L, + -2.148837714938406737587489024152464642738e-0035L, + -4.273007541330408144086077695573950943351e-0035L, + -1.187512317830147119742251549090183099823e-0035L, + +4.828271743385077560204615670566277021463e-0036L, + +2.888285131340709127656514948635349311805e-0035L, + -4.505233085580329558684272075904471228864e-0035L, + +2.931630895327838681946984510160883959332e-0036L, + +2.647698485118630114484469975939947791390e-0035L, + +3.589320320845381187254017736531618320153e-0035L, + +3.109409548262590459351847474032415851843e-0035L, + +4.083234910839125531016836269706248922707e-0035L, + +2.617081426185972174278972738311427223003e-0035L, + +1.685638883876736468625598871602567025329e-0035L, + +3.340709007044122362174996515517070074049e-0035L, + +4.272448967674769643335827331513513914893e-0035L, + -1.016337077502454982949287784426587554312e-0035L, + -4.164820472415940877265629374001265181061e-0035L, + -1.266702907529482683855413412028523879313e-0035L, + -2.498295523749676738976415773050309926889e-0035L, + -2.240244888035701528565322302010524216607e-0035L, + +2.072673676633052237512344957839713494538e-0035L, + -5.635620575073849011607547314084511148918e-0036L, + +1.289773398786324444403985925780591709915e-0035L, +}; diff --git a/usr/src/libm/src/Q/__cosl.c b/usr/src/libm/src/Q/__cosl.c new file mode 100644 index 0000000..1bd86de --- /dev/null +++ b/usr/src/libm/src/Q/__cosl.c @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__cosl.c 1.8 06/01/31 SMI" + +/* + * __k_cosl(long double x, long double y) + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by cos(-x) = cos(x), we may replace x by |x| + * 2. if x < 25/128 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc60000 0), return 1.0 with inexact if x!= 0 + * z = x*x; + * if x <= 1/128 = 2**-7 = 0.0078125 + * cos(x)=1.0+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + * else + * cos(x)=1.0+z*(q1+ ... z*q8) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * cos(t+x') + * = cos(t)cos(x')-sin(t)sin(x') + * = cos(t)(1+z*(qq1+z*qq2))-[sin(t)]*x*(1+z*(pp1+z*pp2)) + * = cos(t) + [cos(t)]*(z*(qq1+z*qq2))- + * [sin(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_cos_hi[i], b = _TBL_cos_lo[i], c= _TBL_sin_hi[i], + * x = (x-t)+y + * z = x*x; + * cos(t+x) = a+(b+ (-c*x*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +extern const long double _TBL_cosl_hi[], _TBL_cosl_lo[], _TBL_sinl_hi[]; +static const long double + one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * 2 16 -117.11 + * |cos(x) - (1+q1*x + ... + q8*x )| <= 2 for |x|<= 0.15625 + */ + q1 = -4.999999999999999999999999999999756416975e-0001L, + q2 = +4.166666666666666666666666664006066577258e-0002L, + q3 = -1.388888888888888888888877700363937169637e-0003L, + q4 = +2.480158730158730158494468463031814083559e-0005L, + q5 = -2.755731922398586276322819250356005542871e-0007L, + q6 = +2.087675698767424261441959760729854017855e-0009L, + q7 = -1.147074481239662089072452129010790774761e-0011L, + q8 = +4.777761647399651599730663422263531034782e-0014L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L; + +#define i0 0 + +long double +__k_cosl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if ((i = (int) x) == 0) + return (one); /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) /* 0.0078125 */ + return one + z * (qq1 + z * (qq2 + z * (qq3 + + z * (qq4 + z * qq5)))); + else + return one + z * (q1 + z * (q2 + z * (q3 + + z * (q4 + z * (q5 + z * (q6 + z * (q7 + + z * q8))))))); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_cosl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + t = _TBL_cosl_lo[i] - (_TBL_sinl_hi[i] * w - a * t); + return (a + t); +} diff --git a/usr/src/libm/src/Q/__lgammal.c b/usr/src/libm/src/Q/__lgammal.c new file mode 100644 index 0000000..884c81b --- /dev/null +++ b/usr/src/libm/src/Q/__lgammal.c @@ -0,0 +1,394 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__lgammal.c 1.5 06/01/31 SMI" + +/* + * long double __k_lgammal(long double x, int *signgamlp); + * K.C. Ng, August, 1989. + * + * We choose [1.5,2.5] to be the primary interval. Our algorithms + * are mainly derived from + * + * + * zeta(2)-1 2 zeta(3)-1 3 + * lgamma(2+s) = s*(1-euler) + --------- * s - --------- * s + ... + * 2 3 + * + * + * Note 1. Since gamma(1+s)=s*gamma(s), hence + * lgamma(1+s) = log(s) + lgamma(s), or + * lgamma(s) = lgamma(1+s) - log(s). + * When s is really tiny (like roundoff), lgamma(1+s) ~ s(1-enler) + * Hence lgamma(s) ~ -log(s) for tiny s + * + */ + +#include "libm.h" + +static long double neg(long double, int *); +static long double poly(long double, const long double *, int); +static long double polytail(long double); +static long double primary(long double); + +static const long double +c0 = 0.0L, +ch = 0.5L, +c1 = 1.0L, +c2 = 2.0L, +c3 = 3.0L, +c4 = 4.0L, +c5 = 5.0L, +c6 = 6.0L, +pi = 3.1415926535897932384626433832795028841971L, +tiny = 1.0e-40L; + +long double +__k_lgammal(long double x, int *signgamlp) { + long double t,y; + int i; + + /* purge off +-inf, NaN and negative arguments */ + if(!finitel(x)) return x*x; + *signgamlp = 1; + if(signbitl(x)) return(neg(x,signgamlp)); + + /* for x < 8.0 */ + if(x<8.0L) { + y = anintl(x); + i = (int) y; + switch(i) { + case 0: + if(x<1.0e-40L) return -logl(x); else + return (primary(x)-log1pl(x))-logl(x); + case 1: + return primary(x-y)-logl(x); + case 2: + return primary(x-y); + case 3: + return primary(x-y)+logl(x-c1); + case 4: + return primary(x-y)+logl((x-c1)*(x-c2)); + case 5: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)); + case 6: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)); + case 7: + return primary(x-y)+logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)); + case 8: + return primary(x-y)+ + logl((x-c1)*(x-c2)*(x-c3)*(x-c4)*(x-c5)*(x-c6)); + } + } + + /* 8.0 <= x < 1.0e40 */ + if (x < 1.0e40L) { + t = logl(x); + return x*(t-c1)-(ch*t-polytail(c1/x)); + } + + /* 1.0e40 <= x <= inf */ + return x*(logl(x)-c1); +} + +static const long double an1[] = { /* 20 terms */ + -0.0772156649015328606065120900824024309741L, + 3.224670334241132182362075833230130289059e-0001L, + -6.735230105319809513324605383668929964120e-0002L, + 2.058080842778454787900092432928910226297e-0002L, + -7.385551028673985266273054086081102125704e-0003L, + 2.890510330741523285758867304409628648727e-0003L, + -1.192753911703260976581414338096267498555e-0003L, + 5.096695247430424562831956662855697824035e-0004L, + -2.231547584535777978926798502084300123638e-0004L, + 9.945751278186384670278268034322157947635e-0005L, + -4.492623673665547726647838474125147631082e-0005L, + 2.050721280617796810096993154281561168706e-0005L, + -9.439487785617396552092393234044767313568e-0006L, + 4.374872903516051510689234173139793159340e-0006L, + -2.039156676413643091040459825776029327487e-0006L, + 9.555777181318621470466563543806211523634e-0007L, + -4.468344919709630637558538313482398989638e-0007L, + 2.216738086090045781773004477831059444178e-0007L, + -7.472783403418388455860445842543843485916e-0008L, + 8.777317930927149922056782132706238921648e-0008L, +}; + +static const long double an2[] = { /* 20 terms */ + -.0772156649015328606062692723698127607018L, + 3.224670334241132182635552349060279118047e-0001L, + -6.735230105319809367555642883133994818325e-0002L, + 2.058080842778459676880822202762143671813e-0002L, + -7.385551028672828216011343150077846918930e-0003L, + 2.890510330762060607399561536905727853178e-0003L, + -1.192753911419623262328187532759756368041e-0003L, + 5.096695278636456678258091134532258618614e-0004L, + -2.231547306817535743052975194022893369135e-0004L, + 9.945771461633313282744264853986643877087e-0005L, + -4.492503279458972037926876061257489481619e-0005L, + 2.051311416812082875492678651369394595613e-0005L, + -9.415778282365955203915850761537462941165e-0006L, + 4.452428829045147098722932981088650055919e-0006L, + -1.835024727987632579886951760650722695781e-0006L, + 1.379783080658545009579060714946381462565e-0006L, + 2.282637532109775156769736768748402175238e-0007L, + 1.002577375515900191362119718128149880168e-0006L, + 5.177028794262638311939991106423220002463e-0007L, + 3.127947245174847104122426445937830555755e-0007L, +}; + +static const long double an3[] = { /* 20 terms */ + -.0772156649015328227870646417729220690875L, + 3.224670334241156699881788955959915250365e-0001L, + -6.735230105312273571375431059744975563170e-0002L, + 2.058080842924464587662846071337083809005e-0002L, + -7.385551008677271654723604653956131791619e-0003L, + 2.890510536479782086197110272583833176602e-0003L, + -1.192752262076857692740571567808259138697e-0003L, + 5.096800771149805289371135155128380707889e-0004L, + -2.231000836682831335505058492409860123647e-0004L, + 9.968912171073936803871803966360595275047e-0005L, + -4.412020779327746243544387946167256187258e-0005L, + 2.281374113541454151067016632998630209049e-0005L, + -4.028361291428629491824694655287954266830e-0006L, + 1.470694920619518924598956849226530750139e-0005L, + 1.381686137617987197975289545582377713772e-0005L, + 2.012493539265777728944759982054970441601e-0005L, + 1.723917864208965490251560644681933675799e-0005L, + 1.202954035243788300138608765425123713395e-0005L, + 5.079851887558623092776296577030850938146e-0006L, + 1.220657945824153751555138592006604026282e-0006L, +}; + +static const long double an4[] = { /* 21 terms */ + -.0772156649015732285350261816697540392371L, + 3.224670334221752060691751340365212226097e-0001L, + -6.735230109744009693977755991488196368279e-0002L, + 2.058080778913037626909954141611580783216e-0002L, + -7.385557567931505621170483708950557506819e-0003L, + 2.890459838416254326340844289785254883436e-0003L, + -1.193059036207136762877351596966718455737e-0003L, + 5.081914708100372836613371356529568937869e-0004L, + -2.289855016133600313131553005982542045338e-0004L, + 8.053454537980585879620331053833498511491e-0005L, + -9.574620532104845821243493405855672438998e-0005L, + -9.269085628207107155601445001196317715686e-0005L, + -2.183276779859490461716196344776208220180e-0004L, + -3.134834305597571096452454999737269668868e-0004L, + -3.973878894951937437018305986901392888619e-0004L, + -3.953352414899222799161275564386488057119e-0004L, + -3.136740932204038779362660900621212816511e-0004L, + -1.884502253819634073946130825196078627664e-0004L, + -8.192655799958926853585332542123631379301e-0005L, + -2.292183750010571062891605074281744854436e-0005L, + -3.223980628729716864927724265781406614294e-0006L, +}; + +static const long double ap1[] = { /* 19 terms */ + -0.0772156649015328606065120900824024296961L, + 3.224670334241132182362075833230047956465e-0001L, + -6.735230105319809513324605382963943777301e-0002L, + 2.058080842778454787900092126606252375465e-0002L, + -7.385551028673985266272518231365020063941e-0003L, + 2.890510330741523285681704570797770736423e-0003L, + -1.192753911703260971285304221165990244515e-0003L, + 5.096695247430420878696018188830886972245e-0004L, + -2.231547584535654004647639737841526025095e-0004L, + 9.945751278137201960636098805852315982919e-0005L, + -4.492623672777606053587919463929044226280e-0005L, + 2.050721258703289487603702670753053765201e-0005L, + -9.439485626565616989352750672499008021041e-0006L, + 4.374838162403994645138200419356844574219e-0006L, + -2.038979492862555348577006944451002161496e-0006L, + 9.536763152382263548086981191378885102802e-0007L, + -4.426111214332434049863595231916564014913e-0007L, + 1.911148847512947464234633846270287546882e-0007L, + -5.788673944861923038157839080272303519671e-0008L, +}; + +static const long double ap2[] = { /* 19 terms */ + -0.077215664901532860606428624449354836087L, + 3.224670334241132182271948744265855440139e-0001L, + -6.735230105319809467356126599005051676203e-0002L, + 2.058080842778453315716389815213496002588e-0002L, + -7.385551028673653323064118422580096222959e-0003L, + 2.890510330735923572088003424849289006039e-0003L, + -1.192753911629952368606185543945790688144e-0003L, + 5.096695239806718875364547587043220998766e-0004L, + -2.231547520600616108991867127392089144886e-0004L, + 9.945746913898151120612322833059416008973e-0005L, + -4.492599307461977003570224943054585729684e-0005L, + 2.050609891889165453592046505651759999090e-0005L, + -9.435329866734193796540515247917165988579e-0006L, + 4.362267138522223236241016136585565144581e-0006L, + -2.008556356653246579300491601497510230557e-0006L, + 8.961498103387207161105347118042844354395e-0007L, + -3.614187228330216282235692806488341157741e-0007L, + 1.136978988247816860500420915014777753153e-0007L, + -2.000532786387196664019286514899782691776e-0008L, +}; + +static const long double ap3[] = { /* 19 terms */ + -0.077215664901532859888521470795348856446L, + 3.224670334241131733364048614484228443077e-0001L, + -6.735230105319676541660495145259038151576e-0002L, + 2.058080842775975461837768839015444273830e-0002L, + -7.385551028347615729728618066663566606906e-0003L, + 2.890510327517954083379032008643080256676e-0003L, + -1.192753886919470728001821137439430882603e-0003L, + 5.096693728898932234814903769146577482912e-0004L, + -2.231540055048827662528594010961874258037e-0004L, + 9.945446210018649311491619999438833843723e-0005L, + -4.491608206598064519190236245753867697750e-0005L, + 2.047939071322271016498065052853746466669e-0005L, + -9.376824046522786006677541036631536790762e-0006L, + 4.259329829498149111582277209189150127347e-0006L, + -1.866064770421594266702176289764212873428e-0006L, + 7.462066721137579592928128104534957135669e-0007L, + -2.483546217529077735074007138457678727371e-0007L, + 5.915166576378161473299324673649144297574e-0008L, + -7.334139641706988966966252333759604701905e-0009L, +}; + +static const long double ap4[] = { /* 19 terms */ + -0.0772156649015326785569313252637238673675L, + 3.224670334241051435008842685722468344822e-0001L, + -6.735230105302832007479431772160948499254e-0002L, + 2.058080842553481183648529360967441889912e-0002L, + -7.385551007602909242024706804659879199244e-0003L, + 2.890510182473907253939821312248303471206e-0003L, + -1.192753098427856770847894497586825614450e-0003L, + 5.096659636418811568063339214203693550804e-0004L, + -2.231421144004355691166194259675004483639e-0004L, + 9.942073842343832132754332881883387625136e-0005L, + -4.483809261973204531263252655050701205397e-0005L, + 2.033260142610284888319116654931994447173e-0005L, + -9.153539544026646699870528191410440585796e-0006L, + 3.988460469925482725894144688699584997971e-0006L, + -1.609692980087029172567957221850825977621e-0006L, + 5.634916377249975825399706694496688803488e-0007L, + -1.560065465929518563549083208482591437696e-0007L, + 2.961350193868935325526962209019387821584e-0008L, + -2.834602215195368130104649234505033159842e-0009L, +}; + +static long double +primary(long double s) { /* assume |s|<=0.5 */ + int i; + + i = (int) (8.0L * (s + 0.5L)); + switch(i) { + case 0: return ch*s+s*poly(s,an4,21); + case 1: return ch*s+s*poly(s,an3,20); + case 2: return ch*s+s*poly(s,an2,20); + case 3: return ch*s+s*poly(s,an1,20); + case 4: return ch*s+s*poly(s,ap1,19); + case 5: return ch*s+s*poly(s,ap2,19); + case 6: return ch*s+s*poly(s,ap3,19); + case 7: return ch*s+s*poly(s,ap4,19); + } + /* NOTREACHED */ +} + +static long double +poly(long double s, const long double *p, int n) { + long double y; + int i; + y = p[n-1]; + for (i=n-2;i>=0;i--) y = p[i]+s*y; + return y; +} + +static const long double pt[] = { + 9.189385332046727417803297364056176804663e-0001L, + 8.333333333333333333333333333331286969123e-0002L, + -2.777777777777777777777777553194796036402e-0003L, + 7.936507936507936507927283071433584248176e-0004L, + -5.952380952380952362351042163192634108297e-0004L, + 8.417508417508395661774286645578379460131e-0004L, + -1.917526917525263651186066417934685675649e-0003L, + 6.410256409395203164659292973142293199083e-0003L, + -2.955065327248303301763594514012418438188e-0002L, + 1.796442830099067542945998615411893822886e-0001L, + -1.392413465829723742489974310411118662919e+0000L, + 1.339984238037267658352656597960492029261e+0001L, + -1.564707657605373662425785904278645727813e+0002L, + 2.156323807499211356127813962223067079300e+0003L, + -3.330486427626223184647299834137041307569e+0004L, + 5.235535072011889213611369254140123518699e+0005L, + -7.258160984602220710491988573430212593080e+0006L, + 7.316526934569686459641438882340322673357e+0007L, + -3.806450279064900548836571789284896711473e+0008L, +}; + +static long double +polytail(long double s) { + long double t,z; + int i; + z = s*s; + t = pt[18]; + for (i=17;i>=1;i--) t = pt[i]+z*t; + return pt[0]+s*t; +} + +static long double +neg(long double z, int *signgamlp) { + long double t,p; + + /* + * written by K.C. Ng, Feb 2, 1989. + * + * Since + * -z*G(-z)*G(z) = pi/sin(pi*z), + * we have + * G(-z) = -pi/(sin(pi*z)*G(z)*z) + * = pi/(sin(pi*(-z))*G(z)*z) + * Algorithm + * z = |z| + * t = sinpi(z); ...note that when z>2**112, z is an int + * and hence t=0. + * + * if(t==0.0) return 1.0/0.0; + * if(t< 0.0) *signgamlp = -1; else t= -t; + * if(z<1.0e-40) ...tiny z + * return -log(z); + * else + * return log(pi/(t*z))-lgamma(z); + * + */ + + t = sinpil(z); /* t := sin(pi*z) */ + if (t==c0) /* return 1.0/0.0 = +INF */ + return c1/c0; + + z = -z; + if(z<=tiny) + p = -logl(z); + else + p = logl(pi/(fabsl(t)*z))-__k_lgammal(z,signgamlp); + if(t= 0; i--) + t = p[i] + x * t; + return (t); +} diff --git a/usr/src/libm/src/Q/__rem_pio2l.c b/usr/src/libm/src/Q/__rem_pio2l.c new file mode 100644 index 0000000..cdc787f --- /dev/null +++ b/usr/src/libm/src/Q/__rem_pio2l.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__rem_pio2l.c 1.12 06/01/31 SMI" + +/* + * __rem_pio2l(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] by calling __rem_pio2m + */ + +#ifndef FDLIBM_BASED +#include "libm.h" +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#else /* FDLIBM_BASED */ +#include "fdlibm.h" +#define __rem_pio2m __kernel_rem_pio2 +#endif /* FDLIBM_BASED */ + +extern const int _TBL_ipio2l_inf[]; + +static const long double + two24l = 16777216.0L, + pio4 = 0.7853981633974483096156608458198757210495L; + +int +__rem_pio2l(long double x, long double *y) { + long double z, w; + double t[5], v[5]; + int e0, i, nx, n, sign; + const int *ipio2; + + sign = signbitl(x); + z = fabsl(x); + if (z <= pio4) { + y[0] = x; + y[1] = 0; + return (0); + } + e0 = ilogbl(z) - 23; + z = scalbnl(z, -e0); + for (i = 0; i < 5; i++) { + t[i] = (double) ((int) (z)); + z = (z - (long double) t[i]) * two24l; + } + nx = 5; + while (t[nx - 1] == 0.0) + nx--; /* skip zero term */ + ipio2 = _TBL_ipio2l_inf; + n = __rem_pio2m(t, v, e0, nx, 3, (const int *) ipio2); + z = (long double) v[2] + (long double) v[1]; + w = (long double) v[0]; + y[0] = z + w; + y[1] = z - (y[0] - w); + if (sign == 1) { + y[0] = -y[0]; + y[1] = -y[1]; + return (-n); + } + return (n); +} diff --git a/usr/src/libm/src/Q/__sincosl.c b/usr/src/libm/src/Q/__sincosl.c new file mode 100644 index 0000000..cc05320 --- /dev/null +++ b/usr/src/libm/src/Q/__sincosl.c @@ -0,0 +1,144 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sincosl.c 1.8 06/01/31 SMI" + +/* + * long double __k_sincos(long double x, long double y, long double *c); + * kernel sincosl function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * return sinl(x) with *c = cosl(x) + * + * Table look up algorithm + * see __k_sinl and __k_cosl + */ + +#include "libm.h" + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], + _TBL_cosl_hi[], _TBL_cosl_lo[]; +static const long double + one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ + p1 = -1.666666666666666666666666666666211262297e-0001L, + p2 = +8.333333333333333333333333301497876908541e-0003L, + p3 = -1.984126984126984126984041302881180621922e-0004L, + p4 = +2.755731922398589064100587351307269621093e-0006L, + p5 = -2.505210838544163129378906953765595393873e-0008L, + p6 = +1.605904383643244375050998243778534074273e-0010L, + p7 = -7.647162722800685516901456114270824622699e-0013L, + p8 = +2.810046428661902961725428841068844462603e-0015L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * 2 16 -117.11 + * |cos(x) - (1+q1*x + ... + q8*x )| <= 2 for |x|<= 0.15625 + */ + q1 = -4.999999999999999999999999999999756416975e-0001L, + q2 = +4.166666666666666666666666664006066577258e-0002L, + q3 = -1.388888888888888888888877700363937169637e-0003L, + q4 = +2.480158730158730158494468463031814083559e-0005L, + q5 = -2.755731922398586276322819250356005542871e-0007L, + q6 = +2.087675698767424261441959760729854017855e-0009L, + q7 = -1.147074481239662089072452129010790774761e-0011L, + q8 = +4.777761647399651599730663422263531034782e-0014L; + +#define i0 0 + +long double +__k_sincosl(long double x, long double y, long double *c) { + long double a1, a2, t, t1, t2, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + if (ix < 0x3fc60000) + if (((int) x) == 0) { + *c = one; + return (x); + } /* generate inexact */ + z = x * x; + + if (ix < 0x3ff80000) { + *c = one + z * (qq1 + z * (qq2 + z * (qq3 + + z * (qq4 + z * qq5)))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + + z * (p5 + z * p6))))); + } else { + *c = one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * (q7 + z * q8))))))); + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * (p6 + z * (p7 + z * p8))))))); + } + + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a1 = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + a2 = _TBL_cosl_hi[i]; + t2 = _TBL_cosl_lo[i] - (a1 * w - a2 * t); + *c = a2 + t2; + t1 = a2 * w + a1 * t; + t1 += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a1 - t1); + else + return (a1 + t1); +} diff --git a/usr/src/libm/src/Q/__sinl.c b/usr/src/libm/src/Q/__sinl.c new file mode 100644 index 0000000..bbd502f --- /dev/null +++ b/usr/src/libm/src/Q/__sinl.c @@ -0,0 +1,142 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sinl.c 1.11 06/01/31 SMI" + +/* + * long double __k_sinl(long double x, long double y); + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * + * Table look up algorithm + * 1. by sin(-x) = -sin(x), need only to consider positive x + * 2. if x < 25/128 = [0x3ffc9000,0,0,0] = 0.1953125 , then + * if x < 2^-57 (hx < 0x3fc60000,0,0,0), return x (inexact if x!= 0) + * z = x*x; + * if x <= 1/64 = 2**-6 + * sin(x) = x + (y+(x*z)*(p1 + z*p2)) + * else + * sin(x) = x + (y+(x*z)*(p1 + z*(p2 + z*(p3 + z*p4)))) + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7 + * By + * sin(t+x') + * = sin(t)cos(x')+cos(t)sin(x') + * = sin(t)(1+z*(qq1+z*qq2))+[cos(t)]*x*(1+z*(pp1+z*pp2)) + * = sin(t) + [sin(t)]*(z*(qq1+z*qq2))+ + * [cos(t)]*x*(1+z*(pp1+z*pp2)) + * + * Thus, + * let a= _TBL_sin_hi[i], b = _TBL_sin_lo[i], c= _TBL_cos_hi[i], + * x = (x-t)+y + * z = x*x; + * sin(t+x) = a+(b+ ((c*x)*(1+z*(pp1+z*pp2))+a*(z*(qq1+z*qq2))) + */ + +#include "libm.h" + +extern const long double _TBL_sinl_hi[], _TBL_sinl_lo[], _TBL_cosl_hi[]; +static const long double +one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * |(sin(x) - (x+p1*x^3+...+p8*x^17)| + * |------------------------------- | <= 2^-116.17 for |x|<0.1953125 + * | x | + */ + p1 = -1.666666666666666666666666666666211262297e-0001L, + p2 = +8.333333333333333333333333301497876908541e-0003L, + p3 = -1.984126984126984126984041302881180621922e-0004L, + p4 = +2.755731922398589064100587351307269621093e-0006L, + p5 = -2.505210838544163129378906953765595393873e-0008L, + p6 = +1.605904383643244375050998243778534074273e-0010L, + p7 = -7.647162722800685516901456114270824622699e-0013L, + p8 = +2.810046428661902961725428841068844462603e-0015L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L; + +#define i0 0 + +long double +__k_sinl(long double x, long double y) { + long double a, t, z, w; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc9000) { + *(3 - i0 + (int *) &t) = -1; /* one-ulp */ + *(2 + (int *) &t) = -1; /* one-ulp */ + *(1 + (int *) &t) = -1; /* one-ulp */ + *(i0 + (int *) &t) -= 1; /* one-ulp */ + if (ix < 0x3fc60000) + if (((int) (x * t)) < 1) + return (x); /* inexact and underflow */ + z = x * x; + t = z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * (p5 + + z * (p6 + z * (p7 + z * p8))))))); + t = y + x * t; + return (x + t); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_sinl_hi[i]; + z = x * x; + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + w = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + t = _TBL_cosl_hi[i] * w + a * t; + t += _TBL_sinl_lo[i]; + if (hx < 0) + return (-a - t); + else + return (a + t); +} diff --git a/usr/src/libm/src/Q/__tanl.c b/usr/src/libm/src/Q/__tanl.c new file mode 100644 index 0000000..63294c6 --- /dev/null +++ b/usr/src/libm/src/Q/__tanl.c @@ -0,0 +1,162 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__tanl.c 1.11 06/01/31 SMI" + +/* + * long double __k_tanl(long double x; long double y, int k); + * kernel tan/cotan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is assumed to be bounded by ~pi/4 in magnitude. + * Input y is the tail of x. + * Input k indicate -- tan if k=0; else -1/tan + * + * Table look up algorithm + * 1. by tan(-x) = -tan(x), need only to consider positive x + * 2. if x < 5/32 = [0x3ffc4000, 0] = 0.15625 , then + * if x < 2^-57 (hx < 0x3fc40000 0), set w=x with inexact if x!= 0 + * else + * z = x*x; + * w = x + (y+(x*z)*(t1+z*(t2+z*(t3+z*(t4+z*(t5+z*t6)))))) + * return (k==0)? w: 1/w; + * 3. else + * ht = (hx + 0x400)&0x7ffff800 (round x to a break point t) + * lt = 0 + * i = (hy-0x3ffc4000)>>11; (i<=64) + * x' = (x - t)+y (|x'| ~<= 2^-7) + * By + * tan(t+x') + * = (tan(t)+tan(x'))/(1-tan(x')tan(t)) + * We have + * sin(x')+tan(t)*(tan(t)*sin(x')) + * = tan(t) + ------------------------------- for k=0 + * cos(x') - tan(t)*sin(x') + * + * cos(x') - tan(t)*sin(x') + * = - -------------------------------------- for k=1 + * tan(t) + tan(t)*(cos(x')-1) + sin(x') + * + * + * where tan(t) is from the table, + * sin(x') = x + pp1*x^3 + ...+ pp5*x^11 + * cos(x') = 1 + qq1*x^2 + ...+ qq5*x^10 + */ + +#include "libm.h" + +extern const long double _TBL_tanl_hi[], _TBL_tanl_lo[]; +static const long double + one = 1.0L, +/* + * 3 11 -122.32 + * |sin(x) - (x+pp1*x +...+ pp5*x )| <= 2 for |x|<1/64 + */ + pp1 = -1.666666666666666666666666666586782940810e-0001L, + pp2 = +8.333333333333333333333003723660929317540e-0003L, + pp3 = -1.984126984126984076045903483778337804470e-0004L, + pp4 = +2.755731922361906641319723106210900949413e-0006L, + pp5 = -2.505198398570947019093998469135012057673e-0008L, +/* + * 2 10 -123.84 + * |cos(x) - (1+qq1*x +...+ qq5*x )| <= 2 for |x|<=1/128 + */ + qq1 = -4.999999999999999999999999999999378373641e-0001L, + qq2 = +4.166666666666666666666665478399327703130e-0002L, + qq3 = -1.388888888888888888058211230618051613494e-0003L, + qq4 = +2.480158730156105377771585658905303111866e-0005L, + qq5 = -2.755728099762526325736488376695157008736e-0007L, +/* + * |tan(x) - (x+t1*x^3+...+t6*x^13)| + * |------------------------------ | <= 2^-59.73 for |x|<0.15625 + * | x | + */ + t1 = +3.333333333333333333333333333333423342490e-0001L, + t2 = +1.333333333333333333333333333093838744537e-0001L, + t3 = +5.396825396825396825396827906318682662250e-0002L, + t4 = +2.186948853615520282185576976994418486911e-0002L, + t5 = +8.863235529902196573354554519991152936246e-0003L, + t6 = +3.592128036572480064652191427543994878790e-0003L, + t7 = +1.455834387051455257856833807581901305474e-0003L, + t8 = +5.900274409318599857829983256201725587477e-0004L, + t9 = +2.391291152117265181501116961901122362937e-0004L, + t10 = +9.691533169382729742394024173194981882375e-0005L, + t11 = +3.927994733186415603228178184225780859951e-0005L, + t12 = +1.588300018848323824227640064883334101288e-0005L, + t13 = +6.916271223396808311166202285131722231723e-0006L; + +#define i0 0 + +long double +__k_tanl(long double x, long double y, int k) { + long double a, t, z, w, s, c; + int *pt = (int *) &t, *px = (int *) &x; + int i, j, hx, ix; + + t = 1.0L; + hx = px[i0]; + ix = hx & 0x7fffffff; + if (ix < 0x3ffc4000) { + *(3 - i0 + (int *) &t) = 1; /* make t = one+ulp */ + if (ix < 0x3fc60000) { + if (((int) (x * t)) < 1) /* generate inexact */ + w = x; /* generate underflow if subnormal */ + } else { + z = x * x; + if (ix < 0x3ff30000) /* 2**-12 */ + t = z * (t1 + z * (t2 + z * (t3 + z * t4))); + else + t = z * (t1 + z * (t2 + z * (t3 + z * (t4 + + z * (t5 + z * (t6 + z * (t7 + z * (t8 + + z * (t9 + z * (t10 + z * (t11 + + z * (t12 + z * t13)))))))))))); + t = y + x * t; + w = x + t; + } + return (k == 0 ? w : -one / w); + } + j = (ix + 0x400) & 0x7ffff800; + i = (j - 0x3ffc4000) >> 11; + pt[i0] = j; + if (hx > 0) + x = y - (t - x); + else + x = (-y) - (t + x); + a = _TBL_tanl_hi[i]; + z = x * x; + /* cos(x)-1 */ + t = z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5)))); + /* sin(x) */ + s = x * (one + z * (pp1 + z * (pp2 + z * (pp3 + z * (pp4 + z * pp5))))); + if (k == 0) { + w = a * s; + t = _TBL_tanl_lo[i] + (s + a * w) / (one - (w - t)); + return (hx < 0 ? -a - t : a + t); + } else { + w = s + a * t; + c = w + _TBL_tanl_lo[i]; + z = one - (a * s - t); + return (hx >= 0 ? z / (-a - c) : z / (a + c)); + } +} diff --git a/usr/src/libm/src/Q/acoshl.c b/usr/src/libm/src/Q/acoshl.c new file mode 100644 index 0000000..f76f4e8 --- /dev/null +++ b/usr/src/libm/src/Q/acoshl.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acoshl.c 1.7 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak acoshl = __acoshl +#endif + +#include "libm.h" + +static const long double + zero = 0.0L, + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.e+20L; + +long double +acoshl(long double x) { + long double t; + + if (isnanl(x)) + return (x + x); + else if (x > big) + return (logl(x) + ln2); + else if (x > one) { + t = sqrtl(x - one); + return (log1pl(t * (t + sqrtl(x + one)))); + } else if (x == one) + return (zero); + else + return ((x - x) / (x - x)); +} diff --git a/usr/src/libm/src/Q/acosl.c b/usr/src/libm/src/Q/acosl.c new file mode 100644 index 0000000..a1ff5ce --- /dev/null +++ b/usr/src/libm/src/Q/acosl.c @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acosl.c 1.6 06/01/31 SMI" + +/* + * arccosin function + * ________ + * / 1 - x + * acos(x) = 2*atan2( / -------- , 1 ) + * \/ 1 + x + * + * ________ + * / 1 - x + * = 2*atan ( / -------- ) for non-exceptional x. + * \/ 1 + x + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + */ + +#pragma weak acosl = __acosl + +#include "libm.h" + +static const long double zero = 0.0L, one = 1.0L; + +long double +acosl(long double x) { + if (isnanl(x)) + return (x + x); + else if (fabsl(x) < one) + x = atanl(sqrtl((one - x) / (one + x))); + else if (x == -one) + x = atan2l(one, zero); /* x <- PI */ + else if (x == one) + x = zero; + else { /* |x| > 1 create invalid signal */ + return (zero / zero); + } + return (x + x); +} diff --git a/usr/src/libm/src/Q/asinhl.c b/usr/src/libm/src/Q/asinhl.c new file mode 100644 index 0000000..01aa243 --- /dev/null +++ b/usr/src/libm/src/Q/asinhl.c @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asinhl.c 1.8 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak asinhl = __asinhl +#endif + +#include "libm.h" + +static const long double + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L, + big = 1.0e+20L, + tiny = 1.0e-20L; + +long double +asinhl(long double x) { + long double t, w; + + w = fabsl(x); + if (isnanl(x)) + return (x + x); /* x is NaN */ + if (w < tiny) { +#ifndef lint + volatile long double dummy = x + big; /* inexact if x != 0 */ +#endif + return (x); /* tiny x */ + } else if (w < big) { + t = one / w; + return (copysignl(log1pl(w + w / (t + sqrtl(one + t * t))), x)); + } else + return (copysignl(logl(w) + ln2, x)); +} diff --git a/usr/src/libm/src/Q/asinl.c b/usr/src/libm/src/Q/asinl.c new file mode 100644 index 0000000..3e9b47a --- /dev/null +++ b/usr/src/libm/src/Q/asinl.c @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asinl.c 1.8 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak asinl = __asinl +#endif + +/* + * asinl(x) = atan2l(x,sqrt(1-x*x)); + * + * For better accuracy, 1-x*x is computed as follows + * 1-x*x if x < 0.5, + * 2*(1-|x|)-(1-|x|)*(1-|x|) if x >= 0.5. + * + * Special cases: + * if x is NaN, return x itself; + * if |x|>1, return NaN with invalid signal. + */ + +#include "libm.h" + +static const long double zero = 0.0L, small = 1.0e-20L, half = 0.5L, one = 1.0L; +#ifndef lint +static const long double big = 1.0e+20L; +#endif + +long double +asinl(long double x) { + long double t, w; + + w = fabsl(x); + if (isnanl(x)) + return (x + x); + else if (w <= half) { + if (w < small) { +#ifndef lint + volatile long double dummy = w + big; + /* inexact if w != 0 */ +#endif + return (x); + } else + return (atanl(x / sqrtl(one - x * x))); + } else if (w < one) { + t = one - w; + w = t + t; + return (atanl(x / sqrtl(w - t * t))); + } else if (w == one) + return (atan2l(x, zero)); /* asin(+-1) = +- PI/2 */ + else + return (zero / zero); /* |x| > 1: invalid */ +} diff --git a/usr/src/libm/src/Q/atan2l.c b/usr/src/libm/src/Q/atan2l.c new file mode 100644 index 0000000..a89ae0f --- /dev/null +++ b/usr/src/libm/src/Q/atan2l.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2l.c 1.7 06/01/31 SMI" + +/* + * atan2l(y,x) + * + * Method : + * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). + * 2. Reduce x to positive by (if x and y are unexceptional): + * ARG (x+iy) = arctan(y/x) ... if x > 0, + * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, + * + * Special cases: + * + * ATAN2((anything), NaN ) is NaN; + * ATAN2(NAN , (anything) ) is NaN; + * ATAN2(+-0, +(anything but NaN)) is +-0 ; + * ATAN2(+-0, -(anything but NaN)) is +-PI ; + * ATAN2(+-(anything but 0 and NaN), 0) is +-PI/2; + * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; + * ATAN2(+-(anything but INF and NaN), -INF) is +-PI; + * ATAN2(+-INF,+INF ) is +-PI/4 ; + * ATAN2(+-INF,-INF ) is +-3PI/4; + * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-PI/2; + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#pragma weak atan2l = __atan2l + +#include "libm.h" + +static const long double + zero = 0.0L, + tiny = 1.0e-40L, + one = 1.0L, + half = 0.5L, + PI3o4 = 2.356194490192344928846982537459627163148L, + PIo4 = 0.785398163397448309615660845819875721049L, + PIo2 = 1.570796326794896619231321691639751442099L, + PI = 3.141592653589793238462643383279502884197L, + PI_lo = 8.671810130123781024797044026043351968762e-35L; + +long double +atan2l(long double y, long double x) { + long double t, z; + int k, m, signy, signx; + + if (x != x || y != y) + return (x + y); /* return NaN if x or y is NAN */ + signy = signbitl(y); + signx = signbitl(x); + if (x == one) + return (atanl(y)); + m = signy + signx + signx; + + /* when y = 0 */ + if (y == zero) + switch (m) { + case 0: + return (y); /* atan(+0,+anything) */ + case 1: + return (y); /* atan(-0,+anything) */ + case 2: + return (PI + tiny); /* atan(+0,-anything) */ + case 3: + return (-PI - tiny); /* atan(-0,-anything) */ + } + + /* when x = 0 */ + if (x == zero) + return (signy == 1 ? -PIo2 - tiny : PIo2 + tiny); + + /* when x is INF */ + if (!finitel(x)) + if (!finitel(y)) { + switch (m) { + case 0: + return (PIo4 + tiny); /* atan(+INF,+INF) */ + case 1: + return (-PIo4 - tiny); /* atan(-INF,+INF) */ + case 2: + return (PI3o4 + tiny); /* atan(+INF,-INF) */ + case 3: + return (-PI3o4 - tiny); /* atan(-INF,-INF) */ + } + } else { + switch (m) { + case 0: + return (zero); /* atan(+...,+INF) */ + case 1: + return (-zero); /* atan(-...,+INF) */ + case 2: + return (PI + tiny); /* atan(+...,-INF) */ + case 3: + return (-PI - tiny); /* atan(-...,-INF) */ + } + } + + /* when y is INF */ + if (!finitel(y)) + return (signy == 1 ? -PIo2 - tiny : PIo2 + tiny); + + /* compute y/x */ + x = fabsl(x); + y = fabsl(y); + t = PI_lo; + k = (ilogbl(y) - ilogbl(x)); + + if (k > 120) + z = PIo2 + half * t; + else if (m > 1 && k < -120) + z = zero; + else + z = atanl(y / x); + + switch (m) { + case 0: + return (z); /* atan(+,+) */ + case 1: + return (-z); /* atan(-,+) */ + case 2: + return (PI - (z - t)); /* atan(+,-) */ + case 3: + return ((z - t) - PI); /* atan(-,-) */ + } + /* NOTREACHED */ +} diff --git a/usr/src/libm/src/Q/atan2pil.c b/usr/src/libm/src/Q/atan2pil.c new file mode 100644 index 0000000..f70853f --- /dev/null +++ b/usr/src/libm/src/Q/atan2pil.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2pil.c 1.6 06/01/31 SMI" + +#pragma weak atan2pil = __atan2pil + +#include "libm.h" + +/* + * atan2pil(y,x) = atan2l(y, x) / pi + */ + +static const long double invpi = 3.183098861837906715377675267450287240689e-1L; + +long double +atan2pil(long double y, long double x) { + return (atan2l(y, x) * invpi); +} diff --git a/usr/src/libm/src/Q/atanhl.c b/usr/src/libm/src/Q/atanhl.c new file mode 100644 index 0000000..d4856e0 --- /dev/null +++ b/usr/src/libm/src/Q/atanhl.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atanhl.c 1.6 06/01/31 SMI" + +#pragma weak atanhl = __atanhl + +#include "libm.h" + +/* + * 1 2x x + * atanhl(x) = --- * LOG(1 + -------) = 0.5 * log1pl(2 * --------) + * 2 1 - x 1 - x + * Note: to guarantee atanhl(-x) = -atanhl(x), we use + * sign(x) |x| + * atanhl(x) = ------- * log1pl(2*-------). + * 2 1 - |x| + * + * Special cases: + * atanhl(x) is NaN if |x| > 1 with signal; + * atanhl(NaN) is that NaN with no signal; + * atanhl(+-1) is +-INF with signal. + * + */ + +static const long double zero = 0.0L, half = 0.5L, one = 1.0L; + +long double +atanhl(long double x) { + long double t; + + t = fabsl(x); + if (t == one) + return (x / zero); + t = t / (one - t); + return (copysignl(half, x) * log1pl(t + t)); +} diff --git a/usr/src/libm/src/Q/atanl.c b/usr/src/libm/src/Q/atanl.c new file mode 100644 index 0000000..9e77162 --- /dev/null +++ b/usr/src/libm/src/Q/atanl.c @@ -0,0 +1,208 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atanl.c 1.12 06/01/31 SMI" + +#pragma weak atanl = __atanl + +/* + * atanl(x) + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-115.94 long double + * |(atan(x)-poly1(x))/x|<= 2^-58.85 double + * |(atan(x)-poly1(x))/x|<= 2^-25.53 float + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error (absolute) + * |atan(x)-poly2(x)|<= 2^-122.15 long double + * |atan(x)-poly2(x)|<= 2^-64.79 double + * |atan(x)-poly2(x)|<= 2^-35.36 float + * Here poly1 and poly2 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2-2), atan(x) = x with inexact + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then + * (3.1) if x >= 2^(prec+2), atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3+2), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_atanl_hi[j] + (_TBL_atanl_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +#include "libm.h" + +extern const long double _TBL_atanl_hi[], _TBL_atanl_lo[]; +static const long double + one = 1.0L, + p1 = -3.333333333333333333333333333331344526118e-0001L, + p2 = 1.999999999999999999999999989931277668570e-0001L, + p3 = -1.428571428571428571428553606221309530901e-0001L, + p4 = 1.111111111111111111095219842737139747418e-0001L, + p5 = -9.090909090909090825503603835248061123323e-0002L, + p6 = 7.692307692307664052130743214708925258904e-0002L, + p7 = -6.666666666660213835187713228363717388266e-0002L, + p8 = 5.882352940152439399097283359608661949504e-0002L, + p9 = -5.263157780447533993046614040509529668487e-0002L, + p10 = 4.761895816878184933175855990886788439447e-0002L, + p11 = -4.347345005832274022681019724553538135922e-0002L, + p12 = 3.983031914579635037502589204647752042736e-0002L, + p13 = -3.348206704469830575196657749413894897554e-0002L, + q1 = -3.333333333333333333333333333195273650186e-0001L, + q2 = 1.999999999999999999999988146114392615808e-0001L, + q3 = -1.428571428571428571057630319435467111253e-0001L, + q4 = 1.111111111111105373263048208994541544098e-0001L, + q5 = -9.090909090421834209167373258681021816441e-0002L, + q6 = 7.692305377813692706850171767150701644539e-0002L, + q7 = -6.660896644393861499914731734305717901330e-0002L, + pio2hi = 1.570796326794896619231321691639751398740e+0000L, + pio2lo = 4.335905065061890512398522013021675984381e-0035L; + +#define i0 0 +#define i1 3 + +long double +atanl(long double x) { + long double y, z, r, p, s; + int *px = (int *) &x, *py = (int *) &y; + int ix, iy, sign, j; + + ix = px[i0]; + sign = ix & 0x80000000; + ix ^= sign; + + /* for |x| < 1/8 */ + if (ix < 0x3ffc0000) { + if (ix < 0x3feb0000) { /* when |x| < 2**(-prec/6-2) */ + if (ix < 0x3fc50000) { /* if |x| < 2**(-prec/2-2) */ + s = one; + *(3 - i0 + (int *) &s) = -1; /* s = 1-ulp */ + *(1 + (int *) &s) = -1; + *(2 + (int *) &s) = -1; + *(i0 + (int *) &s) -= 1; + if ((int) (s * x) < 1) + return (x); /* raise inexact */ + } + z = x * x; + if (ix < 0x3fe20000) { /* if |x| < 2**(-prec/4-1) */ + return (x + (x * z) * p1); + } else { /* if |x| < 2**(-prec/6-2) */ + return (x + (x * z) * (p1 + z * p2)); + } + } + z = x * x; + return (x + (x * z) * (p1 + z * (p2 + z * (p3 + z * (p4 + + z * (p5 + z * (p6 + z * (p7 + z * (p8 + z * (p9 + + z * (p10 + z * (p11 + z * (p12 + z * p13))))))))))))); + } + + /* for |x| >= 8.0 */ + if (ix >= 0x40020000) { + px[i0] = ix; + if (ix < 0x40050400) { /* x < 65 */ + r = one / x; + z = r * r; + /* + * poly1 + */ + y = r * (one + z * (p1 + z * (p2 + z * (p3 + + z * (p4 + z * (p5 + z * (p6 + z * (p7 + + z * (p8 + z * (p9 + z * (p10 + z * (p11 + + z * (p12 + z * p13))))))))))))); + y -= pio2lo; + } else if (ix < 0x40260000) { /* x < 2**(prec/3+2) */ + r = one / x; + z = r * r; + /* + * poly2 + */ + y = r * (one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * q7))))))); + y -= pio2lo; + } else if (ix < 0x40720000) { /* x < 2**(prec+2) */ + y = one / x - pio2lo; + } else if (ix < 0x7fff0000) { /* x < inf */ + y = -pio2lo; + } else { /* x is inf or NaN */ + if (((ix - 0x7fff0000) | px[1] | px[2] | px[i1]) != 0) + return (x - x); + y = -pio2lo; + } + + if (sign == 0) + return (pio2hi - y); + else + return (y - pio2hi); + } + + /* now x is between 1/8 and 8 */ + px[i0] = ix; + iy = (ix + 0x00000800) & 0x7ffff000; + py[i0] = iy; + py[1] = py[2] = py[i1] = 0; + j = (iy - 0x3ffc0000) >> 12; + + if (sign == 0) + s = (x - y) / (one + x * y); + else + s = (y - x) / (one + x * y); + z = s * s; + if (ix == iy) + p = s * (one + z * (q1 + z * (q2 + z * (q3 + z * q4)))); + else + p = s * (one + z * (q1 + z * (q2 + z * (q3 + z * (q4 + + z * (q5 + z * (q6 + z * q7))))))); + if (sign == 0) { + r = p + _TBL_atanl_lo[j]; + return (r + _TBL_atanl_hi[j]); + } else { + r = p - _TBL_atanl_lo[j]; + return (r - _TBL_atanl_hi[j]); + } +} diff --git a/usr/src/libm/src/Q/cbrtl.c b/usr/src/libm/src/Q/cbrtl.c new file mode 100644 index 0000000..a9c697a --- /dev/null +++ b/usr/src/libm/src/Q/cbrtl.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cbrtl.c 1.7 06/01/31 SMI" + +#pragma weak cbrtl = __cbrtl + +#include "libm.h" + +#define n0 0 + +long double +cbrtl(long double x) { + long double s, t, r, w, y; + double dx, dy; + int *py = (int *) &dy; + int n, m, m3, sx; + + if (!finitel(x)) + return (x + x); + if (iszerol(x)) + return (x); + sx = signbitl(x); + x = fabsl(x); + n = ilogbl(x); + m = n / 3; + m3 = m + m + m; + y = scalbnl(x, -m3); + dx = (double) y; + dy = cbrt(dx); + py[1 - n0] += 2; + if (py[1 - n0] == 0) + py[n0] += 1; + + /* one step newton iteration to 113 bits with error < 0.667ulps */ + t = (long double) dy; + t = scalbnl(t, m); + s = t * t; + r = x / s; + w = t + t; + r = (r - t) / (w + r); + t += t * r; + + return (sx == 0 ? t : -t); +} diff --git a/usr/src/libm/src/Q/copysignl.c b/usr/src/libm/src/Q/copysignl.c new file mode 100644 index 0000000..8dd25fc --- /dev/null +++ b/usr/src/libm/src/Q/copysignl.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)copysignl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak copysignl = __copysignl +#endif + +#include "libm.h" + +long double +copysignl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + px[HIXWORD] = (px[HIXWORD] & ~XSGNMSK) | (py[HIXWORD] & XSGNMSK); + return (x); +} diff --git a/usr/src/libm/src/Q/coshl.c b/usr/src/libm/src/Q/coshl.c new file mode 100644 index 0000000..966a9a4 --- /dev/null +++ b/usr/src/libm/src/Q/coshl.c @@ -0,0 +1,104 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)coshl.c 1.8 06/01/31 SMI" + +#pragma weak coshl = __coshl + +#include "libm.h" + +/* + * coshl(X) + * RETURN THE HYPERBOLIC COSINE OF X + * + * Method : + * 1. Replace x by |x| (coshl(x) = coshl(-x)). + * 2. + * [ expl(x) - 1 ]^2 + * 0 <= x <= 0.3465 : coshl(x) := 1 + ------------------- + * 2*expl(x) + * + * expl(x) + 1/expl(x) + * 0.3465 <= x <= thresh : coshl(x) := ------------------- + * 2 + * thresh <= x <= lnovft : coshl(x) := expl(x)/2 + * lnovft <= x < INF : coshl(x) := scalbnl(expl(x-1024*ln2),1023) + * + * here + * thr1 a number that is near one half of ln2. + * thr2 a number such that + * expl(thresh)+expl(-thresh)=expl(thresh) + * lnovft: logrithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * coshl(x) is |x| if x is +INF, -INF, or NaN. + * only coshl(0)=1 is exact for finite x. + */ + +#define ME 16383 +#define MEP1 16384 +#define LNOVFT 1.135652340629414394949193107797076342845e+4L + /* last 32 bits of LN2HI is zero */ +#define LN2HI 6.931471805599453094172319547495844850203e-0001L +#define LN2LO 1.667085920830552208890449330400379754169e-0025L +#define THR1 0.3465L +#define THR2 45.L + +static const long double + half = 0.5L, + tinyl = 7.5e-37L, + one = 1.0L, + ln2hi = LN2HI, + ln2lo = LN2LO, + lnovftL = LNOVFT, + thr1 = THR1, + thr2 = THR2; + +long double +coshl(long double x) { + long double t, w; + + w = fabsl(x); + if (!finitel(w)) + return (w + w); /* x is INF or NaN */ + if (w < thr1) { + t = w < tinyl ? w : expm1l(w); + w = one + t; + if (w != one) + w = one + (t * t) / (w + w); + return (w); + } else if (w < thr2) { + t = expl(w); + return (half * (t + one / t)); + } else if (w <= lnovftL) + return (half * expl(w)); + else { + return (scalbnl(expl((w - MEP1 * ln2hi) - MEP1 * ln2lo), ME)); + } +} diff --git a/usr/src/libm/src/Q/cosl.c b/usr/src/libm/src/Q/cosl.c new file mode 100644 index 0000000..f631344 --- /dev/null +++ b/usr/src/libm/src/Q/cosl.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cosl.c 1.7 06/01/31 SMI" + +/* + * cosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak cosl = __cosl + +#include "libm.h" +#include "longdouble.h" + +long double +cosl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) /* |x| ~< pi/4 */ + return (__k_cosl(x, z)); + else if (ix >= 0x7fff0000) /* trig(Inf or NaN) is NaN */ + return (x - x); + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return (__k_cosl(y[0], y[1])); + case 1: + return (-__k_sinl(y[0], y[1])); + case 2: + return (-__k_cosl(y[0], y[1])); + case 3: + return (__k_sinl(y[0], y[1])); + } + } + /* NOTREACHED */ +} diff --git a/usr/src/libm/src/Q/erfl.c b/usr/src/libm/src/Q/erfl.c new file mode 100644 index 0000000..be2aa17 --- /dev/null +++ b/usr/src/libm/src/Q/erfl.c @@ -0,0 +1,365 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)erfl.c 1.8 06/01/31 SMI" + +/* + * long double function erf,erfc (long double x) + * K.C. Ng, September, 1989. + * x + * 2 |\ + * erf(x) = --------- | exp(-t*t)dt + * sqrt(pi) \| + * 0 + * + * erfc(x) = 1-erf(x) + * + * method: + * Since erf(-x) = -erf(x), we assume x>=0. + * For x near 0, we have the expansion + * + * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....). + * + * Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688, + * we use x + x*P(x^2) to approximate erf(x). This formula will + * guarantee the error less than one ulp where x is not too far + * away from 0. We note that erf(x)=x at x = 0.6174...... After + * some experiment, we choose the following approximation on + * interval [0,0.84375]. + * + * For x in [0,0.84375] + * 2 2 4 40 + * P = P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x ) + * + * erf(x) = x + x*P + * erfc(x) = 1 - erf(x) if x<=0.25 + * = 0.5 + ((0.5-x)-x*P) if x in [0.25,0.84375] + * precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50 + * + * For x in [0.84375,1.25], let s = x - 1, and + * c = 0.84506291151 rounded to single (24 bits) + * erf(x) = c + P1(s)/Q1(s) + * erfc(x) = (1-c) - P1(s)/Q1(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41 + * + * + * For x in [1.25,1.75], let s = x - 1.5, and + * c = 0.95478588343 rounded to single (24 bits) + * erf(x) = c + P2(s)/Q2(s) + * erfc(x) = (1-c) - P2(s)/Q2(s) + * precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83 + * + * + * For x in [1.75,16/3] + * erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x) + * erf(x) = 1 - erfc(x) + * precision: absolute error of R1/S1 is bounded by 2**-124.03 + * + * For x in [16/3,107] + * erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x) + * erf(x) = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact) + * precision: absolute error of R2/S2 is bounded by 2**-120.07 + * + * Else if inf > x >= 107 + * erf(x) = 1 with inexact + * erfc(x) = 0 with underflow + * + * Special case: + * erf(inf) = 1 + * erfc(inf) = 0 + */ + +#pragma weak erfl = __erfl +#pragma weak erfcl = __erfcl + +#include "libm.h" +#include "longdouble.h" + +static const long double + tiny = 1e-40L, + nearunfl = 1e-4000L, + half = 0.5L, + one = 1.0L, + onehalf = 1.5L, + L16_3 = 16.0L/3.0L; +/* + * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375] + */ +static const long double P[] = { /* 21 coeffs */ + 1.283791670955125738961589031215451715556e-0001L, + -3.761263890318375246320529677071815594603e-0001L, + 1.128379167095512573896158903121205899135e-0001L, + -2.686617064513125175943235483344625046092e-0002L, + 5.223977625442187842111846652980454568389e-0003L, + -8.548327023450852832546626271083862724358e-0004L, + 1.205533298178966425102164715902231976672e-0004L, + -1.492565035840625097674944905027897838996e-0005L, + 1.646211436588924733604648849172936692024e-0006L, + -1.636584469123491976815834704799733514987e-0007L, + 1.480719281587897445302529007144770739305e-0008L, + -1.229055530170782843046467986464722047175e-0009L, + 9.422759064320307357553954945760654341633e-0011L, + -6.711366846653439036162105104991433380926e-0012L, + 4.463224090341893165100275380693843116240e-0013L, + -2.783513452582658245422635662559779162312e-0014L, + 1.634227412586960195251346878863754661546e-0015L, + -9.060782672889577722765711455623117802795e-0017L, + 4.741341801266246873412159213893613602354e-0018L, + -2.272417596497826188374846636534317381203e-0019L, + 8.069088733716068462496835658928566920933e-0021L, +}; + +/* + * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25] + */ +static const long double C1 = (long double)((float)0.84506291151); +static const long double P1[] = { /* 12 top coeffs */ + -2.362118560752659955654364917390741930316e-0003L, + 4.129623379624420034078926610650759979146e-0001L, + -3.973857505403547283109417923182669976904e-0002L, + 4.357503184084022439763567513078036755183e-0002L, + 8.015593623388421371247676683754171456950e-0002L, + -1.034459310403352486685467221776778474602e-0002L, + 5.671850295381046679675355719017720821383e-0003L, + 1.219262563232763998351452194968781174318e-0003L, + 5.390833481581033423020320734201065475098e-0004L, + -1.978853912815115495053119023517805528300e-0004L, + 6.184234513953600118335017885706420552487e-0005L, + -5.331802711697810861017518515816271808286e-0006L, +}; +static const long double Q1[] = { /* 12 bottom coeffs with leading 1.0 hidden */ + 9.081506296064882195280178373107623196655e-0001L, + 6.821049531968204097604392183650687642520e-0001L, + 4.067869178233539502315055970743271822838e-0001L, + 1.702332233546316765818144723063881095577e-0001L, + 7.498098377690553934266423088708614219356e-0002L, + 2.050154396918178697056927234366372760310e-0002L, + 7.012988534031999899054782333851905939379e-0003L, + 1.149904787014400354649843451234570731076e-0003L, + 3.185620255011299476196039491205159718620e-0004L, + 1.273405072153008775426376193374105840517e-0005L, + 4.753866999959432971956781228148402971454e-0006L, + -1.002287602111660026053981728549540200683e-0006L, +}; +/* + * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5) + * on [1.25,1.75] + */ +static const long double C2 = (long double)((float)0.95478588343); +static const long double P2[] = { /* 12 top coeffs */ + 1.131926304864446730135126164594785863512e-0002L, + 1.273617996967754151544330055186210322832e-0001L, + -8.169980734667512519897816907190281143423e-0002L, + 9.512267486090321197833634271787944271746e-0002L, + -2.394251569804872160005274999735914368170e-0002L, + 1.108768660227528667525252333184520222905e-0002L, + 3.527435492933902414662043314373277494221e-0004L, + 4.946116273341953463584319006669474625971e-0004L, + -4.289851942513144714600285769022420962418e-0005L, + 8.304719841341952705874781636002085119978e-0005L, + -1.040460226177309338781902252282849903189e-0005L, + 2.122913331584921470381327583672044434087e-0006L, +}; +static const long double Q2[] = { /* 13 bottom coeffs with leading 1.0 hidden */ + 7.448815737306992749168727691042003832150e-0001L, + 7.161813850236008294484744312430122188043e-0001L, + 3.603134756584225766144922727405641236121e-0001L, + 1.955811609133766478080550795194535852653e-0001L, + 7.253059963716225972479693813787810711233e-0002L, + 2.752391253757421424212770221541238324978e-0002L, + 7.677654852085240257439050673446546828005e-0003L, + 2.141102244555509687346497060326630061069e-0003L, + 4.342123013830957093949563339130674364271e-0004L, + 8.664587895570043348530991997272212150316e-0005L, + 1.109201582511752087060167429397033701988e-0005L, + 1.357834375781831062713347000030984364311e-0006L, + 4.957746280594384997273090385060680016451e-0008L, +}; +/* + * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3] + */ +static const long double R1[] = { /* 14 top coeffs */ + 4.630195122654315016370705767621550602948e+0006L, + 1.257949521746494830700654204488675713628e+0007L, + 1.704153822720260272814743497376181625707e+0007L, + 1.502600568706061872381577539537315739943e+0007L, + 9.543710793431995284827024445387333922861e+0006L, + 4.589344808584091011652238164935949522427e+0006L, + 1.714660662941745791190907071920671844289e+0006L, + 5.034802147768798894307672256192466283867e+0005L, + 1.162286400443554670553152110447126850725e+0005L, + 2.086643834548901681362757308058660399137e+0004L, + 2.839793161868140305907004392890348777338e+0003L, + 2.786687241658423601778258694498655680778e+0002L, + 1.779177837102695602425897452623985786464e+0001L, + 5.641895835477470769043614623819144434731e-0001L, +}; +static const long double S1[] = { /* 15 bottom coeffs with leading 1.0 hidden */ + 4.630195122654331529595606896287596843110e+0006L, + 1.780411093345512024324781084220509055058e+0007L, + 3.250113097051800703707108623715776848283e+0007L, + 3.737857099176755050912193712123489115755e+0007L, + 3.029787497516578821459174055870781168593e+0007L, + 1.833850619965384765005769632103205777227e+0007L, + 8.562719999736915722210391222639186586498e+0006L, + 3.139684562074658971315545539760008136973e+0006L, + 9.106421313731384880027703627454366930945e+0005L, + 2.085108342384266508613267136003194920001e+0005L, + 3.723126272693120340730491416449539290600e+0004L, + 5.049169878567344046145695360784436929802e+0003L, + 4.944274532748010767670150730035392093899e+0002L, + 3.153510608818213929982940249162268971412e+0001L, + 1.0e00L, +}; + +/* + * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107] + */ +static const long double R2[] = { /* 15 top coeffs in reverse order!!*/ + 2.447288012254302966796326587537136931669e+0005L, + 8.768592567189861896653369912716538739016e+0005L, + 1.552293152581780065761497908005779524953e+0006L, + 1.792075924835942935864231657504259926729e+0006L, + 1.504001463155897344947500222052694835875e+0006L, + 9.699485556326891411801230186016013019935e+0005L, + 4.961449933661807969863435013364796037700e+0005L, + 2.048726544693474028061176764716228273791e+0005L, + 6.891532964330949722479061090551896886635e+0004L, + 1.888014709010307507771964047905823237985e+0004L, + 4.189692064988957745054734809642495644502e+0003L, + 7.362346487427048068212968889642741734621e+0002L, + 9.980359714211411423007641056580813116207e+0001L, + 9.426910895135379181107191962193485174159e+0000L, + 5.641895835477562869480794515623601280429e-0001L, +}; +static const long double S2[] = { /* 16 coefficients */ + 2.447282203601902971246004716790604686880e+0005L, + 1.153009852759385309367759460934808489833e+0006L, + 2.608580649612639131548966265078663384849e+0006L, + 3.766673917346623308850202792390569025740e+0006L, + 3.890566255138383910789924920541335370691e+0006L, + 3.052882073900746207613166259994150527732e+0006L, + 1.885574519970380988460241047248519418407e+0006L, + 9.369722034759943185851450846811445012922e+0005L, + 3.792278350536686111444869752624492443659e+0005L, + 1.257750606950115799965366001773094058720e+0005L, + 3.410830600242369370645608634643620355058e+0004L, + 7.513984469742343134851326863175067271240e+0003L, + 1.313296320593190002554779998138695507840e+0003L, + 1.773972700887629157006326333696896516769e+0002L, + 1.670876451822586800422009013880457094162e+0001L, + 1.000L, +}; + +long double erfl(x) +long double x; +{ + long double s,y,t; + + if(!finitel(x)) { + if(x!=x) return x+x; /* NaN */ + return copysignl(one,x); /* return +-1.0 is x=Inf */ + } + + y = fabsl(x); + if(y <= 0.84375L) { + if(y<=tiny) return x+P[0]*x; + s = y*y; + t = __poly_libmq(s,21,P); + return x+x*t; + } + if(y<=1.25L) { + s = y-one; + t = C1+__poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + return (signbitl(x))? -t: t; + } else if(y<=1.75L) { + s = y-onehalf; + t = C2+__poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + return (signbitl(x))? -t: t; + } + if(y<=9.0L) t = erfcl(y); else t = tiny; + return (signbitl(x))? t-one: one-t; +} + +long double erfcl(x) +long double x; +{ + long double s,y,t; + + if(!finitel(x)) { + if(x!=x) return x+x; /* NaN */ + /* return 2.0 if x= -inf; 0.0 if x= +inf */ + if (x < 0.0L) return 2.0L; else return 0.0L; + } + + if(x <= 0.84375L) { + if(x<=0.25) return one-erfl(x); + s = x*x; + t = half-x; + t = t - x*__poly_libmq(s,21,P); + return half+t; + } + if(x<=1.25L) { + s = x-one; + t = one-C1; + return t - __poly_libmq(s,12,P1)/(one+s*__poly_libmq(s,12,Q1)); + } else if(x<=1.75L) { + s = x-onehalf; + t = one-C2; + return t - __poly_libmq(s,12,P2)/(one+s*__poly_libmq(s,13,Q2)); + } + if(x>=107.0L) return nearunfl*nearunfl; /* underflow */ + else if(x >= L16_3) { + y = __poly_libmq(x,15,R2); + t = y/__poly_libmq(x,16,S2); + } else { + y = __poly_libmq(x,14,R1); + t = y/__poly_libmq(x,15,S1); + } + /* + * Note that exp(-x*x+d) = exp(-x*x)*exp(d), so to compute + * exp(-x*x) with a small relative error, we need to compute + * -x*x with a small absolute error. To this end, we set y + * equal to the leading part of x but with enough trailing + * zeros that y*y can be computed exactly and we rewrite x*x + * as y*y + (x-y)*(x+y), distributing the latter expression + * across the exponential. + * + * We could construct y in a portable way by setting + * + * int i = (int)(x * ptwo); + * y = (long double)i * 1/ptwo; + * + * where ptwo is some power of two large enough to make x-y + * small but not so large that the conversion to int overflows. + * When long double arithmetic is slow, however, the following + * non-portable code is preferable. + */ + y = x; + *(2+(int*)&y) = *(3+(int*)&y) = 0; + t *= expl(-y*y)*expl(-(x-y)*(x+y)); + return t; +} diff --git a/usr/src/libm/src/Q/exp10l.c b/usr/src/libm/src/Q/exp10l.c new file mode 100644 index 0000000..ac45c58 --- /dev/null +++ b/usr/src/libm/src/Q/exp10l.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp10l.c 1.6 06/01/31 SMI" + +#pragma weak exp10l = __exp10l + +#include "libm.h" + +/* + * exp10l(x) + * n = nint(x*(log10/log2)) ; + * exp10(x) = 10**x = exp(x*ln(10)) = exp(n*ln2+(x*ln10-n*ln2)) + * = 2**n*exp(ln10*(x-n*log2/log10))) + * If x is an integer <= M then use repeat multiplication. For + * 10**M is the largest representable integer, where + * M = 10 single precision (24 bits) + * M = 22 double precision (53 bits) + * M = 48 quadruple precision (113 bits) + */ + +#define TINY 1.0e-20L /* single: 1e-5, double: 1e-10, quad: 1e-20 */ +#define LG10OVT 4933.L /* single: 39, double: 309, quad: 4933 */ +#define LG10UFT -4966.L /* single: -45, double: -323, quad: -4966 */ +#define M 48 + /* logt2hi : last 32 bits is zero for quad prec */ +#define LOGT2HI 0.30102999566398119521373889472420986034688L +#define LOGT2LO 2.831664213089468167896664371953e-31L + +static const long double + zero = 0.0L, + tiny = TINY * TINY, + one = 1.0L, + lg10 = 3.321928094887362347870319429489390175865e+0000L, + ln10 = 2.302585092994045684017991454684364207601e+0000L, + logt2hi = LOGT2HI, + logt2lo = LOGT2LO, + lg10ovt = LG10OVT, + lg10uft = LG10UFT; + +long double +exp10l(long double x) { + long double t, tenp; + int k; + + if (!finitel(x)) { + if (isnanl(x) || x > zero) + return (x + x); + else + return (zero); + } + if (fabsl(x) < tiny) + return (one + x); + if (x <= lg10ovt) + if (x >= lg10uft) { + k = (int) x; + tenp = 10.0L; + /* x is a small +integer */ + if (0 <= k && k <= M && (long double) k == x) { + t = one; + if (k & 1) + t *= tenp; + k >>= 1; + while (k) { + tenp *= tenp; + if (k & 1) + t *= tenp; + k >>= 1; + } + return (t); + } + t = anintl(x * lg10); + return (scalbnl(expl(ln10 * ((x - t * logt2hi) - + t * logt2lo)), (int) t)); + } else + return (scalbnl(one, -50000)); /* underflow */ + else + return (scalbnl(one, 50000)); /* overflow */ +} diff --git a/usr/src/libm/src/Q/exp2l.c b/usr/src/libm/src/Q/exp2l.c new file mode 100644 index 0000000..07245d5 --- /dev/null +++ b/usr/src/libm/src/Q/exp2l.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp2l.c 1.6 06/01/31 SMI" + +#pragma weak exp2l = __exp2l + +#include "libm.h" + +/* + * exp2l(x) = 2**x = 2**((x-anint(x))+anint(x)) + * = 2**anint(x)*2**(x-anint(x)) + * = 2**anint(x)*exp((x-anint(x))*ln2) + */ + +#define TINY 1.0e-20L /* single: 1e-5, double: 1e-10, quad: 1e-20 */ +#define OVFLEXP 16400 /* single: 130, double 1030, quad: 16400 */ +#define UNFLEXP -16520 /* single:-155, double -1080, quad:-16520 */ + +static const long double + zero = 0.0L, + tiny = TINY * TINY, + half = 0.5L, + ln2 = 6.931471805599453094172321214581765680755e-0001L, + one = 1.0L; + +static const int + ovflexp = OVFLEXP, + unflexp = UNFLEXP; + +long double +exp2l(long double x) { + long double t; + + if (!finitel(x)) { + if (isnanl(x) || x > zero) + return (x + x); + else + return (zero); + } + t = fabsl(x); + if (t < half) { + if (t < tiny) + return (one + x); + else + return (expl(ln2 * x)); + } + t = anintl(x); + if (t < ovflexp) { + if (t >= unflexp) + return (scalbnl(expl(ln2 * (x - t)), (int) t)); + else + return (scalbnl(one, unflexp)); /* underflow */ + } else + return (scalbnl(one, ovflexp)); /* overflow */ +} diff --git a/usr/src/libm/src/Q/expl.c b/usr/src/libm/src/Q/expl.c new file mode 100644 index 0000000..f180c8c --- /dev/null +++ b/usr/src/libm/src/Q/expl.c @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)expl.c 1.10 06/01/31 SMI" + +/* + * expl(x) + * Table driven method + * Written by K.C. Ng, November 1988. + * Algorithm : + * 1. Argument Reduction: given the input x, find r and integer k + * and j such that + * x = (32k+j)*ln2 + r, |r| <= (1/64)*ln2 . + * + * 2. expl(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r)) + * Note: + * a. expm1(r) = (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) + * b. 2^(j/32) is represented as + * _TBL_expl_hi[j]+_TBL_expl_lo[j] + * where + * _TBL_expl_hi[j] = 2^(j/32) rounded + * _TBL_expl_lo[j] = 2^(j/32) - _TBL_expl_hi[j]. + * + * Special cases: + * expl(INF) is INF, expl(NaN) is NaN; + * expl(-INF)= 0; + * for finite argument, only expl(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * an ulp (unit in the last place). + * + * Misc. info. + * For 113 bit long double + * if x > 1.135652340629414394949193107797076342845e+4 + * then expl(x) overflow; + * if x < -1.143346274333629787883724384345262150341e+4 + * then expl(x) underflow + * + * Constants: + * Only decimal values are given. We assume that the compiler will convert + * from decimal to binary accurately enough to produce the correct + * hexadecimal values. + */ + +#pragma weak expl = __expl + +#include "libm.h" + +extern const long double _TBL_expl_hi[], _TBL_expl_lo[]; + +static const long double +one = 1.0L, +two = 2.0L, +ln2_64 = 1.083042469624914545964425189778400898568e-2L, +ovflthreshold = 1.135652340629414394949193107797076342845e+4L, +unflthreshold = -1.143346274333629787883724384345262150341e+4L, +invln2_32 = 4.616624130844682903551758979206054839765e+1L, +ln2_32hi = 2.166084939249829091928849858592451515688e-2L, +ln2_32lo = 5.209643502595475652782654157501186731779e-27L; + +/* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ +static const long double +t1 = 1.666666666666666666666666666660876387437e-1L, +t2 = -2.777777777777777777777707812093173478756e-3L, +t3 = 6.613756613756613482074280932874221202424e-5L, +t4 = -1.653439153392139954169609822742235851120e-6L, +t5 = 4.175314851769539751387852116610973796053e-8L; + +long double +expl(long double x) { + int *px = (int *) &x, ix, j, k, m; + long double t, r; + + ix = px[0]; /* high word of x */ + if (ix >= 0x7fff0000) + return (x + x); /* NaN of +inf */ + if (((unsigned) ix) >= 0xffff0000) + return (-one / x); /* NaN or -inf */ + if ((ix & 0x7fffffff) < 0x3fc30000) { + if ((int) x < 1) + return (one + x); /* |x|<2^-60 */ + } + if (ix > 0) { + if (x > ovflthreshold) + return (scalbnl(x, 20000)); + k = (int) (invln2_32 * (x + ln2_64)); + } else { + if (x < unflthreshold) + return (scalbnl(-x, -40000)); + k = (int) (invln2_32 * (x - ln2_64)); + } + j = k&0x1f; + m = k>>5; + t = (long double) k; + x = (x - t * ln2_32hi) - t * ln2_32lo; + t = x * x; + r = (x - t * (t1 + t * (t2 + t * (t3 + t * (t4 + t * t5))))) - two; + x = _TBL_expl_hi[j] - ((_TBL_expl_hi[j] * (x + x)) / r - + _TBL_expl_lo[j]); + return (scalbnl(x, m)); +} diff --git a/usr/src/libm/src/Q/expm1l.c b/usr/src/libm/src/Q/expm1l.c new file mode 100644 index 0000000..fa9830e --- /dev/null +++ b/usr/src/libm/src/Q/expm1l.c @@ -0,0 +1,184 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)expm1l.c 1.16 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak expm1l = __expm1l +#endif +#if !defined(__sparc) +#error Unsupported architecture +#endif + +/* + * expm1l(x) + * + * Table driven method + * Written by K.C. Ng, June 1995. + * Algorithm : + * 1. expm1(x) = x if x<2**-114 + * 2. if |x| <= 0.0625 = 1/16, use approximation + * expm1(x) = x + x*P/(2-P) + * where + * P = x - z*(P1+z*(P2+z*(P3+z*(P4+z*(P5+z*P6+z*P7))))), z = x*x; + * (this formula is derived from + * 2-P+x = R = x*(exp(x)+1)/(exp(x)-1) ~ 2 + x*x/6 - x^4/360 + ...) + * + * P1 = 1.66666666666666666666666666666638500528074603030e-0001 + * P2 = -2.77777777777777777777777759668391122822266551158e-0003 + * P3 = 6.61375661375661375657437408890138814721051293054e-0005 + * P4 = -1.65343915343915303310185228411892601606669528828e-0006 + * P5 = 4.17535139755122945763580609663414647067443411178e-0008 + * P6 = -1.05683795988668526689182102605260986731620026832e-0009 + * P7 = 2.67544168821852702827123344217198187229611470514e-0011 + * + * Accuracy: |R-x*(exp(x)+1)/(exp(x)-1)|<=2**-119.13 + * + * 3. For 1/16 < |x| < 1.125, choose x(+-i) ~ +-(i+4.5)/64, i=0,..,67 + * since + * exp(x) = exp(xi+(x-xi))= exp(xi)*exp((x-xi)) + * we have + * expm1(x) = expm1(xi)+(exp(xi))*(expm1(x-xi)) + * where + * |s=x-xi| <= 1/128 + * and + * expm1(s)=2s/(2-R), R= s-s^2*(T1+s^2*(T2+s^2*(T3+s^2*(T4+s^2*T5)))) + * + * T1 = 1.666666666666666666666666666660876387437e-1L, + * T2 = -2.777777777777777777777707812093173478756e-3L, + * T3 = 6.613756613756613482074280932874221202424e-5L, + * T4 = -1.653439153392139954169609822742235851120e-6L, + * T5 = 4.175314851769539751387852116610973796053e-8L; + * + * 4. For |x| >= 1.125, return exp(x)-1. + * (see algorithm for exp) + * + * Special cases: + * expm1l(INF) is INF, expm1l(NaN) is NaN; + * expm1l(-INF)= -1; + * for finite argument, only expm1l(0)=0 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 2 ulp (unit in the last place). + * + * Misc. info. + * For 113 bit long double + * if x > 1.135652340629414394949193107797076342845e+4 + * then expm1l(x) overflow; + * + * Constants: + * Only decimal values are given. We assume that the compiler will convert + * from decimal to binary accurately enough to produce the correct + * hexadecimal values. + */ + +#include "libm.h" + +extern const long double _TBL_expl_hi[], _TBL_expl_lo[]; +extern const long double _TBL_expm1lx[], _TBL_expm1l[]; + +static const long double + zero = +0.0L, + one = +1.0L, + two = +2.0L, + ln2_64 = +1.083042469624914545964425189778400898568e-2L, + ovflthreshold = +1.135652340629414394949193107797076342845e+4L, + invln2_32 = +4.616624130844682903551758979206054839765e+1L, + ln2_32hi = +2.166084939249829091928849858592451515688e-2L, + ln2_32lo = +5.209643502595475652782654157501186731779e-27L, + huge = +1.0e4000L, + tiny = +1.0e-4000L, + P1 = +1.66666666666666666666666666666638500528074603030e-0001L, + P2 = -2.77777777777777777777777759668391122822266551158e-0003L, + P3 = +6.61375661375661375657437408890138814721051293054e-0005L, + P4 = -1.65343915343915303310185228411892601606669528828e-0006L, + P5 = +4.17535139755122945763580609663414647067443411178e-0008L, + P6 = -1.05683795988668526689182102605260986731620026832e-0009L, + P7 = +2.67544168821852702827123344217198187229611470514e-0011L, +/* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ + T1 = +1.666666666666666666666666666660876387437e-1L, + T2 = -2.777777777777777777777707812093173478756e-3L, + T3 = +6.613756613756613482074280932874221202424e-5L, + T4 = -1.653439153392139954169609822742235851120e-6L, + T5 = +4.175314851769539751387852116610973796053e-8L; + +long double +expm1l(long double x) { + int hx, ix, j, k, m; + long double t, r, s, w; + + hx = ((int *) &x)[HIXWORD]; + ix = hx & ~0x80000000; + if (ix >= 0x7fff0000) { + if (x != x) + return (x + x); /* NaN */ + if (x < zero) + return (-one); /* -inf */ + return (x); /* +inf */ + } + if (ix < 0x3fff4000) { /* |x| < 1.25 */ + if (ix < 0x3ffb0000) { /* |x| < 0.0625 */ + if (ix < 0x3f8d0000) { + if ((int) x == 0) + return (x); /* |x|<2^-114 */ + } + t = x * x; + r = (x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * + (P5 + t * (P6 + t * P7))))))); + return (x + (x * r) / (two - r)); + } + /* compute i = [64*x] */ + m = 0x4009 - (ix >> 16); + j = ((ix & 0x0000ffff) | 0x10000) >> m; /* j=4,...,67 */ + if (hx < 0) + j += 82; /* negative */ + s = x - _TBL_expm1lx[j]; + t = s * s; + r = s - t * (T1 + t * (T2 + t * (T3 + t * (T4 + t * T5)))); + r = (s + s) / (two - r); + w = _TBL_expm1l[j]; + return (w + (w + one) * r); + } + if (hx > 0) { + if (x > ovflthreshold) + return (huge * huge); + k = (int) (invln2_32 * (x + ln2_64)); + } else { + if (x < -80.0) + return (tiny - x / x); + k = (int) (invln2_32 * (x - ln2_64)); + } + j = k & 0x1f; + m = k >> 5; + t = (long double) k; + x = (x - t * ln2_32hi) - t * ln2_32lo; + t = x * x; + r = (x - t * (T1 + t * (T2 + t * (T3 + t * (T4 + t * T5))))) - two; + x = _TBL_expl_hi[j] - ((_TBL_expl_hi[j] * (x + x)) / r - + _TBL_expl_lo[j]); + return (scalbnl(x, m) - one); +} diff --git a/usr/src/libm/src/Q/fabsl.c b/usr/src/libm/src/Q/fabsl.c new file mode 100644 index 0000000..5e8fca1 --- /dev/null +++ b/usr/src/libm/src/Q/fabsl.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fabsl.c 1.7 06/01/31 SMI" + +#pragma weak fabsl = __fabsl + +#include "libm.h" + +long double +fabsl(long double x) { + int *px = (int *) &x; + + px[0] &= 0x7fffffff; + return (x); +} diff --git a/usr/src/libm/src/Q/finitel.c b/usr/src/libm/src/Q/finitel.c new file mode 100644 index 0000000..2b355d3 --- /dev/null +++ b/usr/src/libm/src/Q/finitel.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)finitel.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak finitel = __finitel +#endif + +#include "libm.h" + +#if defined(__sparc) +int +finitel(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) < 0x7fff0000); +} +#elif defined(__i386) +int +finitel(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t != 0x7fff && ((px[1] & 0x80000000) != 0 || t == 0)); +#else + return (t != 0x7fff); +#endif +} +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/Q/floorl.c b/usr/src/libm/src/Q/floorl.c new file mode 100644 index 0000000..777a91a --- /dev/null +++ b/usr/src/libm/src/Q/floorl.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)floorl.c 1.6 06/01/31 SMI" + +/* + * ceill(x) return the biggest integral value below x + * floorl(x) return the least integral value above x + * + * NOTE: aintl(x), anintl(x), ceill(x), floorl(x), and rintl(x) return result + * with the same sign as x's, including 0.0. + */ + +#pragma weak ceill = __ceill +#pragma weak floorl = __floorl + +#include "libm.h" + +static const long double qone = 1.0L; + +long double +ceill(long double x) { + long double t; + + if (!finitel(x)) + return (x + x); + t = rintl(x); + if (t >= x) /* already ceil(x) */ + return (t); + else /* t < x case: return t+1 */ + return (copysignl(t + qone, x)); +} + +long double +floorl(long double x) { + long double t; + + if (!finitel(x)) + return (x + x); + t = rintl(x); + if (t <= x) + return (t); /* already floor(x) */ + else /* x < t case: return t-1 */ + return (copysignl(t - qone, x)); +} diff --git a/usr/src/libm/src/Q/fmodl.c b/usr/src/libm/src/Q/fmodl.c new file mode 100644 index 0000000..cf0b445 --- /dev/null +++ b/usr/src/libm/src/Q/fmodl.c @@ -0,0 +1,274 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmodl.c 1.12 06/01/31 SMI" + +#pragma weak fmodl = __fmodl + +#include "libm.h" + +static const int + is = -0x7fffffff - 1, + im = 0x0000ffff, + iu = 0x00010000; + +static const long double + zero = 0.0L, + one = 1.0L; + +#ifdef __LITTLE_ENDIAN +#define __H0(x) *(3 + (int *) &x) +#define __H1(x) *(2 + (int *) &x) +#define __H2(x) *(1 + (int *) &x) +#define __H3(x) *(0 + (int *) &x) +#else +#define __H0(x) *(0 + (int *) &x) +#define __H1(x) *(1 + (int *) &x) +#define __H2(x) *(2 + (int *) &x) +#define __H3(x) *(3 + (int *) &x) +#endif + +long double +fmodl(long double x, long double y) { + long double a, b; + int n, ix, iy, k, sx; + int hx; + int x0, y0, z0, carry; + unsigned x1, x2, x3, y1, y2, y3, z1, z2, z3; + + hx = __H0(x); + x1 = __H1(x); + x2 = __H2(x); + x3 = __H3(x); + y0 = __H0(y); + y1 = __H1(y); + y2 = __H2(y); + y3 = __H3(y); + + sx = hx & 0x80000000; + x0 = hx ^ sx; + y0 &= 0x7fffffff; + + /* purge off exception values */ + if (x0 >= 0x7fff0000 || /* !finitel(x) */ + (y0 > 0x7fff0000) || (y0 == 0x7fff0000 && ((y1 | y2 | y3) != 0)) || + (y0 | y1 | y2 | y3) == 0) /* isnanl(y) || y = 0 */ + return ((x * y) / (x * y)); + a = fabsl(x); + b = fabsl(y); + if (a <= b) { + if (a < b) + return (x); + else + return (zero * x); + } + /* determine ix = ilogbl(x) */ + if (x0 < iu) { /* subnormal x */ + ix = -16382; + while (x0 == 0) { + ix -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu) { + ix -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 <<= 1; + } + } else { + ix = (x0 >> 16) - 16383; + x0 = iu | (x0 & im); + } + + /* determine iy = ilogbl(y) */ + if (y0 < iu) { /* subnormal y */ + iy = -16382; + while (y0 == 0) { + iy -= 16; + y0 = y1 >> 16; + y1 = (y1 << 16) | (y2 >> 16); + y2 = (y2 << 16) | (y3 >> 16); + y3 = (y3 << 16); + } + while (y0 < iu) { + iy -= 1; + y0 = (y0 << 1) | (y1 >> 31); + y1 = (y1 << 1) | (y2 >> 31); + y2 = (y2 << 1) | (y3 >> 31); + y3 <<= 1; + } + } else { + iy = (y0 >> 16) - 16383; + y0 = iu | (y0 & im); + } + + /* fix point fmod */ + n = ix - iy; + while (n--) { + while (x0 == 0 && n >= 16) { + n -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu && n >= 1) { + n -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 = (x3 << 1); + } + carry = 0; + z3 = x3 - y3; + carry = (z3 > x3); + if (carry == 0) { + z2 = x2 - y2; + carry = (z2 > x2); + } else { + z2 = x2 - y2 - 1; + carry = (z2 >= x2); + } + if (carry == 0) { + z1 = x1 - y1; + carry = (z1 > x1); + } else { + z1 = x1 - y1 - 1; + carry = (z1 >= x1); + } + z0 = x0 - y0 - carry; + if (z0 < 0) { /* double x */ + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + } else { + if (z0 == 0) { + if ((z1 | z2 | z3) == 0) { /* 0: done */ + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + } + /* x = z << 1 */ + z0 = z0 + z0 + ((z1 & is) != 0); + z1 = z1 + z1 + ((z2 & is) != 0); + z2 = z2 + z2 + ((z3 & is) != 0); + z3 = z3 + z3; + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + } + } + + carry = 0; + z3 = x3 - y3; + carry = (z3 > x3); + if (carry == 0) { + z2 = x2 - y2; + carry = (z2 > x2); + } else { + z2 = x2 - y2 - 1; + carry = (z2 >= x2); + } + if (carry == 0) { + z1 = x1 - y1; + carry = (z1 > x1); + } else { + z1 = x1 - y1 - 1; + carry = (z1 >= x1); + } + z0 = x0 - y0 - carry; + if (z0 >= 0) { + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + } + /* convert back to floating value and restore the sign */ + if ((x0 | x1 | x2 | x3) == 0) { + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + while (x0 < iu) { + if (x0 == 0) { + iy -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } else { + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + iy -= 1; + } + } + + /* normalize output */ + if (iy >= -16382) { + __H0(a) = sx | (x0 - iu) | ((iy + 16383) << 16); + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + } else { /* subnormal output */ + n = -16382 - iy; + k = n & 31; + if (k != 0) { + if (k <= 16) { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 >>= k; + } else { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 = 0; + } + } + while (n >= 32) { + n -= 32; + x3 = x2; + x2 = x1; + x1 = x0; + x0 = 0; + } + __H0(a) = x0 | sx; + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + a *= one; + } + return (a); +} diff --git a/usr/src/libm/src/Q/gammal.c b/usr/src/libm/src/Q/gammal.c new file mode 100644 index 0000000..88d71d2 --- /dev/null +++ b/usr/src/libm/src/Q/gammal.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gammal.c 1.5 06/01/31 SMI" + +#pragma weak gammal = __gammal + +/* + * long double gammal(long double x); + */ + +#include "libm.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +gammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return (y); +} diff --git a/usr/src/libm/src/Q/gammal_r.c b/usr/src/libm/src/Q/gammal_r.c new file mode 100644 index 0000000..2489243 --- /dev/null +++ b/usr/src/libm/src/Q/gammal_r.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gammal_r.c 1.5 06/01/31 SMI" + +/* + * long double gammal_r(long double x, int *signgamlp); + */ + +#pragma weak gammal_r = __gammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +gammal_r(long double x, int *signgamlp) { + return (__k_lgammal(x, signgamlp)); +} diff --git a/usr/src/libm/src/Q/hypotl.c b/usr/src/libm/src/Q/hypotl.c new file mode 100644 index 0000000..ad7221f --- /dev/null +++ b/usr/src/libm/src/Q/hypotl.c @@ -0,0 +1,150 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)hypotl.c 1.11 06/01/31 SMI" + +#pragma weak hypotl = __hypotl + +/* + * long double hypotl(long double x, long double y); + * Method : + * If z=x*x+y*y has error less than sqrt(2)/2 ulp than sqrt(z) has + * error less than 1 ulp. + * So, compute sqrt(x*x+y*y) with some care as follows: + * Assume x>y>0; + * 1. save and set rounding to round-to-nearest + * 2. if x > 2y use + * x1*x1+(y*y+(x2*(x+x2))) for x*x+y*y + * where x1 = x with lower 64 bits cleared, x2 = x-x1; else + * 3. if x <= 2y use + * t1*y1+((x-y)*(x-y)+(t1*y2+t2*y)) + * where t1 = 2x with lower 64 bits cleared, t2 = 2x-t1, y1= y with + * lower 64 bits chopped, y2 = y-y1. + * + * NOTE: DO NOT remove parenthsis! + * + * Special cases: + * hypot(x,y) is INF if x or y is +INF or -INF; else + * hypot(x,y) is NAN if x or y is NAN. + * + * Accuracy: + * hypot(x,y) returns sqrt(x^2+y^2) with error less than 1 ulps (units + * in the last place) + */ + +#include "libm.h" + +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +static const long double zero = 0.0L, one = 1.0L; + +long double +hypotl(long double x, long double y) { + int n0, n1, n2, n3; + long double t1, t2, y1, y2, w; + int *px = (int *) &x, *py = (int *) &y; + int *pt1 = (int *) &t1, *py1 = (int *) &y1; + enum fp_direction_type rd; + int j, k, nx, ny, nz; + + if ((*(int *) &one) != 0) { /* determine word ordering */ + n0 = 0; + n1 = 1; + n2 = 2; + n3 = 3; + } else { + n0 = 3; + n1 = 2; + n2 = 1; + n3 = 0; + } + + px[n0] &= 0x7fffffff; /* clear sign bit of x and y */ + py[n0] &= 0x7fffffff; + k = 0x7fff0000; + nx = px[n0] & k; /* exponent of x and y */ + ny = py[n0] & k; + if (ny > nx) { + w = x; + x = y; + y = w; + nz = ny; + ny = nx; + nx = nz; + } /* force x > y */ + if ((nx - ny) >= 0x00730000) + return (x + y); /* x/y >= 2**116 */ + if (nx < 0x5ff30000 && ny > 0x205b0000) { /* medium x,y */ + /* save and set RD to Rounding to nearest */ + rd = __swapRD(fp_nearest); + w = x - y; + if (w > y) { + pt1[n0] = px[n0]; + pt1[n1] = px[n1]; + pt1[n2] = pt1[n3] = 0; + t2 = x - t1; + x = sqrtl(t1 * t1 - (y * (-y) - t2 * (x + t1))); + } else { + x = x + x; + py1[n0] = py[n0]; + py1[n1] = py[n1]; + py1[n2] = py1[n3] = 0; + y2 = y - y1; + pt1[n0] = px[n0]; + pt1[n1] = px[n1]; + pt1[n2] = pt1[n3] = 0; + t2 = x - t1; + x = sqrtl(t1 * y1 - (w * (-w) - (t2 * y1 + y2 * x))); + } + if (rd != fp_nearest) + (void) __swapRD(rd); /* restore rounding mode */ + return (x); + } else { + if (nx == k || ny == k) { /* x or y is INF or NaN */ + if (isinfl(x)) + t2 = x; + else if (isinfl(y)) + t2 = y; + else + t2 = x + y; /* invalid if x or y is sNaN */ + return (t2); + } + if (ny == 0) { + if (y == zero || x == zero) + return (x + y); + t1 = scalbnl(one, 16381); + x *= t1; + y *= t1; + return (scalbnl(one, -16381) * hypotl(x, y)); + } + j = nx - 0x3fff0000; + px[n0] -= j; + py[n0] -= j; + pt1[n0] = nx; + pt1[n1] = pt1[n2] = pt1[n3] = 0; + return (t1 * hypotl(x, y)); + } +} diff --git a/usr/src/libm/src/Q/ieee_funcl.c b/usr/src/libm/src/Q/ieee_funcl.c new file mode 100644 index 0000000..19dcbef --- /dev/null +++ b/usr/src/libm/src/Q/ieee_funcl.c @@ -0,0 +1,112 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ieee_funcl.c 1.11 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak isinfl = __isinfl +#pragma weak isnormall = __isnormall +#pragma weak issubnormall = __issubnormall +#pragma weak iszerol = __iszerol +#pragma weak signbitl = __signbitl +#endif + +#include "libm.h" + +#if defined(__sparc) +int +isinfl(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) == 0x7fff0000 && px[1] == 0 && + px[2] == 0 && px[3] == 0); +} + +int +isnormall(long double x) { + int *px = (int *) &x; + return ((unsigned) ((px[0] & 0x7fff0000) - 0x10000) < 0x7ffe0000); +} + +int +issubnormall(long double x) { + int *px = (int *) &x; + px[0] &= ~0x80000000; + return (px[0] < 0x00010000 && (px[0] | px[1] | px[2] | px[3]) != 0); +} + +int +iszerol(long double x) { + int *px = (int *) &x; + return (((px[0] & ~0x80000000) | px[1] | px[2] | px[3]) == 0); +} + +int +signbitl(long double x) { + unsigned *px = (unsigned *) &x; + return (px[0] >> 31); +} +#elif defined(__i386) +int +isinfl(long double x) { + int *px = (int *) &x; +#if defined(HANDLE_UNSUPPORTED) + return ((px[2] & 0x7fff) == 0x7fff && + ((px[1] ^ 0x80000000) | px[0]) == 0); +#else + return ((px[2] & 0x7fff) == 0x7fff && + ((px[1] & ~0x80000000) | px[0]) == 0); +#endif +} + +int +isnormall(long double x) { + int *px = (int *) &x; +#if defined(HANDLE_UNSUPPORTED) + return ((unsigned) ((px[2] & 0x7fff) - 1) < 0x7ffe && + (px[1] & 0x80000000) != 0); +#else + return ((unsigned) ((px[2] & 0x7fff) - 1) < 0x7ffe); +#endif +} + +int +issubnormall(long double x) { + int *px = (int *) &x; + return ((px[2] & 0x7fff) == 0 && (px[0] | px[1]) != 0); +} + +int +iszerol(long double x) { + int *px = (int *) &x; + return (((px[2] & 0x7fff) | px[0] | px[1]) == 0); +} + +int +signbitl(long double x) { + unsigned *px = (unsigned *) &x; + return ((px[2] >> 15) & 1); +} +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/Q/ilogbl.c b/usr/src/libm/src/Q/ilogbl.c new file mode 100644 index 0000000..d882417 --- /dev/null +++ b/usr/src/libm/src/Q/ilogbl.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ilogbl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ilogbl = __ilogbl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ + +#if defined(__sparc) +#define ISNORMALL(k, x) (k != 0x7fff) /* assuming k != 0 */ +#define X86PDNRM(k, x) +#define XSCALE_OFFSET 0x406f /* 0x3fff + 112 */ +static const long double xscale = 5192296858534827628530496329220096.0L; + /* 2^112 */ +#elif defined(__i386) +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM(k, x) if (k == 0 && (((int *) &x)[1] & 0x80000000) != 0) \ + ((int *) &x)[2] |= k = 1 +#if defined(HANDLE_UNSUPPORTED) /* assuming k != 0 */ +#define ISNORMALL(k, x) (k != 0x7fff && (((int *) &x)[1] & 0x80000000) != 0) +#else +#define ISNORMALL(k, x) (k != 0x7fff) +#endif +#define XSCALE_OFFSET 0x403e /* 0x3fff + 63 */ +static const long double xscale = 9223372036854775808.0L; /* 2^63 */ +#endif + +static int +raise_invalid(int v) { /* SUSv3 requires ilogbl(0,+/-Inf,NaN) raise invalid */ +#ifndef lint + if ((__xpg6 & _C99SUSv3_ilogb_0InfNaN_raises_invalid) != 0) { + static const double zero = 0.0; + volatile double dummy; + + dummy = zero / zero; + } +#endif + return (v); +} + +int +ilogbl(long double x) { + int k = XBIASED_EXP(x); + + X86PDNRM(k, x); + if (k == 0) { + if (ISZEROL(x)) + return (raise_invalid(0x80000001)); + else { + x *= xscale; /* scale up by 2^112 or 2^63 */ + return (XBIASED_EXP(x) - XSCALE_OFFSET); + } + } else if (ISNORMALL(k, x)) + return (k - 0x3fff); + else + return (raise_invalid(0x7fffffff)); +} diff --git a/usr/src/libm/src/Q/isnanl.c b/usr/src/libm/src/Q/isnanl.c new file mode 100644 index 0000000..98035d8 --- /dev/null +++ b/usr/src/libm/src/Q/isnanl.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)isnanl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak isnanl = __isnanl +#endif + +#include "libm.h" + +#if defined(__sparc) +int +isnanl(long double x) { + int *px = (int *) &x; + return ((px[0] & ~0x80000000) >= 0x7fff0000 && + ((px[0] & ~0xffff0000) | px[1] | px[2] | px[3]) != 0); +} +#elif defined(__i386) +int +isnanl(long double x) { + int *px = (int *) &x, t = px[2] & 0x7fff; +#if defined(HANDLE_UNSUPPORTED) + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0 || + t != 0 && (px[1] & 0x80000000) == 0); +#else + return (t == 0x7fff && ((px[1] & ~0x80000000) | px[0]) != 0); +#endif +} +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/Q/j0l.c b/usr/src/libm/src/Q/j0l.c new file mode 100644 index 0000000..fd27015 --- /dev/null +++ b/usr/src/libm/src/Q/j0l.c @@ -0,0 +1,736 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)j0l.c 1.8 06/01/31 SMI" + +/* + * Floating point Bessel's function of the first and second kinds + * of order zero: j0(x),y0(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j0l = __j0l +#pragma weak y0l = __y0l + +#include "libm.h" + +#define GENERIC long double +static const GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +eight = 8.0L, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pzero(GENERIC); +static GENERIC qzero(GENERIC); + +static GENERIC r0[7] = { + -2.499999999999999999999999999999998934492e-0001L, + 1.272657927360049786327618451133763714880e-0002L, + -2.694499763712963276900636693400659600898e-0004L, + 2.724877475058977576903234070919616447883e-0006L, + -1.432617103214330236967477495393076320281e-0008L, + 3.823248804080079168706683540513792224471e-0011L, + -4.183174277567983647337568504286313665065e-0014L, +}; +static GENERIC s0[7] = { + 1.0e0L, + 1.159368290559800854689526195462884666395e-0002L, + 6.629397597394973383009743876169946772559e-0005L, + 2.426779981394054406305431142501735094340e-0007L, + 6.097663491248511069094400469635449749883e-0010L, + 1.017019133340929220238747413216052224036e-0012L, + 9.012593179306197579518374581969371278481e-0016L, +}; + +GENERIC +j0l(x) GENERIC x;{ + GENERIC z, s,c,ss,cc,r,u,v; + int i; + + if(isnanl(x)) return x+x; + x = fabsl(x); + if(x > 1.28L){ + if(!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if(signbitl(s)!=signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + */ + if(x>1.0e120L) return (invsqrtpi*cc)/sqrtl(x); + u = pzero(x); v = qzero(x); + return invsqrtpi*(u*cc-v*ss)/sqrtl(x); + } + if(x<=small) { + if(x<=tiny) return one-x; + else return one-x*x*0.25L; + } + z = x*x; + r = r0[6]; s = s0[6]; + for(i=5;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + return(one+z*(r/s)); +} + +static const GENERIC u0[8] = { + -7.380429510868722527434392794848301631220e-0002L, + 1.766855559625940791857536949301981816513e-0001L, + -1.386470722701047923235553251240162839408e-0002L, + 3.520149242724811578636970811631224862615e-0004L, + -3.978599663243790049853642275624951870025e-0006L, + 2.228801153263957224547222556806915479763e-0008L, + -6.121246764298785018658597179498837316177e-0011L, + 6.677103629722678833475965810525587396596e-0014L, +}; +static const GENERIC v0[8] = { + 1.0e0L, + 1.247164416539111311571676766127767127970e-0002L, + 7.829144749639791500052900281489367443576e-0005L, + 3.247126540422245330511218321013360336606e-0007L, + 9.750516724789499678567062572549568447869e-0010L, + 2.156713223173591212250543390258458098776e-0012L, + 3.322169561597890004231482431236452752624e-0015L, + 2.821213295314000924252226486305726805093e-0018L, +}; + +GENERIC +y0l(x) GENERIC x;{ + GENERIC z, s,c,ss,cc,u,v; + int i; + volatile GENERIC d; + + if(isnanl(x)) return x+x; + if(x <= zero){ + if(x==zero) + d= -one/(x-x); + else + d = zero/(x-x); + } +#ifdef lint + d = d; +#endif + if(x > 1.28L){ + if(!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* + * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) + * where x0 = x-pi/4 + * Better formula: + * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + * = 1/sqrt(2) * (cos(x) + sin(x)) + * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = s-c; + cc = s+c; + } else if(signbitl(s)!=signbitl(c)) { + ss = s - c; + cc = -cosl(x+x)/ss; + } else { + cc = s + c; + ss = -cosl(x+x)/cc; + } + /* + * j0(x) = 1/sqrt(pi*x) * (P(0,x)*cc - Q(0,x)*ss) + * y0(x) = 1/sqrt(pi*x) * (P(0,x)*ss + Q(0,x)*cc) + */ + if(x>1.0e120L) return (invsqrtpi*ss)/sqrtl(x); + return invsqrtpi*(pzero(x)*ss+qzero(x)*cc)/sqrtl(x); + + } + if(x<=tiny) { + return(u0[0] + tpi*logl(x)); + } + z = x*x; + u = u0[7]; v = v0[7]; + for(i=6;i>=0;i--){ + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return(u/v + tpi*(j0l(x)*logl(x))); +} + +static const GENERIC pr0[12] = { /* [16 -- inf] */ + 9.999999999999999999999999999999999997515e-0001L, + 1.065981615377273376425365823967550598358e+0003L, + 4.390991200927588978306374718984240719130e+0005L, + 9.072086218607986711847069407339321363103e+0007L, + 1.022552886177375367408408501046461671528e+0010L, + 6.420766912243658241570635854089597269031e+0011L, + 2.206451725126933913591080211081242266908e+0013L, + 3.928369596816895077363705478743346298368e+0014L, + 3.258159928874124597286701119721482876596e+0015L, + 1.025715808134188978860679130140685101348e+0016L, + 7.537170874795721255796001687024031280685e+0015L, + -1.579413901450157332307745586004207687796e+0014L, +}; +static const GENERIC ps0[11] = { + 1.0e0L, + 1.066051927877273376425365823967550512687e+0003L, + 4.391739647168381592399173804329266353038e+0005L, + 9.075162261801343671805658294123888867884e+0007L, + 1.023186118519904751819581912075985995058e+0010L, + 6.427861860414223746340515376512730275061e+0011L, + 2.210861503237823589735481303627993406235e+0013L, + 3.943247335784292905915956840901818177989e+0014L, + 3.283720976777545142150200110647270004481e+0015L, + 1.045346918812754048903645641538728986759e+0016L, + 8.043455468065618900750599584291193680463e+0015L, +}; +static const GENERIC pr1[12] = { /* [8 -- 16] */ + 9.999999999999999999999784422701108683618e-0001L, + 6.796098532948334207755488692777907062894e+0002L, + 1.840036112605722168824530758797169836042e+0005L, + 2.598490483191916637264894340635847598122e+0007L, + 2.105774863242707025525730249472054578523e+0009L, + 1.015822044230542426666314997796944979959e+0011L, + 2.931557457008110436764077699944189071875e+0012L, + 4.962885121125457633655259224179322808824e+0013L, + 4.705424055148223269155430598563351566279e+0014L, + 2.294439854910747229152056080910427001110e+0015L, + 4.905531843137486691500950019322475458629e+0015L, + 3.187543169710339218793442542845735994565e+0015L, +}; +static const GENERIC ps1[14] = { + 1.0e0L, + 6.796801657948334207754571576066758180288e+0002L, + 1.840512891201300567325421059826676366447e+0005L, + 2.599777028312918975306252167127695075221e+0007L, + 2.107582572771047636846811284634244892537e+0009L, + 1.017275794694156108975782763889979940348e+0011L, + 2.938487645192463845428059755454762316011e+0012L, + 4.982512164735557054521042916182317924466e+0013L, + 4.737639900153703274792677468264564361437e+0014L, + 2.323398719123742743524249528275097100646e+0015L, + 5.033419107069210577868909797896984419391e+0015L, + 3.409036105931068609601317076759804716059e+0015L, + 7.505655364352679737585745147753521662166e+0013L, + -9.976837153983688250780198248297109118313e+0012L, +}; +static const GENERIC pr2[12] = { /* [5 -- 8 ] */ + 9.999999999999999937857236789277366320220e-0001L, + 3.692848765268649571651602420376358849214e+0002L, + 5.373022067535476576926715900057760985410e+0004L, + 4.038738891191314969971504035057219430725e+0006L, + 1.728285706306940523397385566659762646999e+0008L, + 4.375400819645889911158688737206054788534e+0009L, + 6.598950418204912408375591217782088567076e+0010L, + 5.827182039183238492480275401520072793783e+0011L, + 2.884222642913492390887572414999490975844e+0012L, + 7.373278873797767721932837830628688632775e+0012L, + 8.338295457568973761205077964397969230489e+0012L, + 2.911383183467288345772308817209806922143e+0012L, +}; +static const GENERIC ps2[14] = { + 1.0e0L, + 3.693551890268649477288896267171993213102e+0002L, + 5.375607880998361502474715133828068514297e+0004L, + 4.042477764024108249744998862572786367328e+0006L, + 1.731069838737016956685839588670132939513e+0008L, + 4.387147674049898778738226585935491417728e+0009L, + 6.628058659620653765349556940567715258165e+0010L, + 5.869659904164177740471685856367322160664e+0011L, + 2.919839445622817017058977559638969436383e+0012L, + 7.535314897696671402628203718612309253907e+0012L, + 8.696355561452933775773309859748610658935e+0012L, + 3.216155103141537221173601557697083216257e+0012L, + 4.756857081068942248246880159213789086363e+0010L, + -3.496356619666608032231074866481472824067e+0009L, +}; +static const GENERIC pr3[13] = { /* [3.5 -- 5 ] */ + 9.999999999999916693107285612398196588247e-0001L, + 2.263975921282917721194425320484974336945e+0002L, + 1.994358386744245848889492762781484199966e+0004L, + 8.980067458430542243559962493831661323168e+0005L, + 2.282213787521372663705567756420087553508e+0007L, + 3.409784374889063618250288699908375135923e+0008L, + 3.024380857401448589254343517589811711108e+0009L, + 1.571110368046740246895071721443082286379e+0010L, + 4.603187020243604632153685300463160593768e+0010L, + 7.087196453409712719449549280664058793403e+0010L, + 5.046196021776346356803687409644239065041e+0010L, + 1.287758439080165765709154276618854799932e+0010L, + 5.900679773415023433787846658096813590784e+0008L, +}; +static const GENERIC ps3[13] = { + 1.0e0L, + 2.264679046282855061328604619231774747116e+0002L, + 1.995939523988944553755653255389812103448e+0004L, + 8.993853144706348727038389967490183236820e+0005L, + 2.288326099634588843906989983704795468773e+0007L, + 3.424967100255240885169240956804790118282e+0008L, + 3.046311797972463991368023759640028910016e+0009L, + 1.589614961932826812790222479700797224003e+0010L, + 4.692406624527744816497089139325073939927e+0010L, + 7.320486495902008912866462849073108323948e+0010L, + 5.345945972828978289935309597742981360994e+0010L, + 1.444033091910423754121309915092247171008e+0010L, + 7.987714685115314668378957273824383610525e+0008L, +}; +static const GENERIC pr4[13] = { /* [2.5 , 3.5] */ + 9.999999999986736677961118722747757712260e-0001L, + 1.453824980703800559037873123568378845663e+0002L, + 8.097327216430682288267610447006508661032e+0003L, + 2.273847252038264370231169686380192662135e+0005L, + 3.561056728046211111354759998976985449622e+0006L, + 3.244933588800096378434627029369680378599e+0007L, + 1.740112392860717950376210038908476792588e+0008L, + 5.426170187455893285197878563881579269524e+0008L, + 9.490107486454362321004377336020526281371e+0008L, + 8.688872439428470049801714121070005313806e+0008L, + 3.673315853166437222811910656900123215515e+0008L, + 5.577770470359303305164877446339693270239e+0007L, + 1.540438642031689641308197880181291865714e+0006L, +}; +static const GENERIC ps4[13] = { /* [2.5 , 3.5] */ + 1.0e0L, + 1.454528105698159439773035951959131799816e+0002L, + 8.107442215200392397172179900434987859618e+0003L, + 2.279390393778242887574177096606328994140e+0005L, + 3.576251625592252008424781111770934135844e+0006L, + 3.267909499056932631405942058670933813863e+0007L, + 1.760021515330805537499778238099704648805e+0008L, + 5.525553787667353981242060222587465726729e+0008L, + 9.769870295912820457889384082671269328511e+0008L, + 9.110582071004774279226905629624018008454e+0008L, + 3.981857678621955599371967680343918454345e+0008L, + 6.482404686230769399073192961667697036706e+0007L, + 2.210046943095878402443535460329391782298e+0006L, +}; +static const GENERIC pr5[13] = { /* [1.777..., 2.5] */ + 9.999999999114986107951817871144655880699e-0001L, + 9.252583736048588342568344570315435947614e+0001L, + 3.218726757856078715214631502407386264637e+0003L, + 5.554009964621111656479588505862577040831e+0004L, + 5.269993115643664338253196944523510290175e+0005L, + 2.874613773778430691192912190618220544575e+0006L, + 9.133538151103658353874146919613442436035e+0006L, + 1.673067041410338922825193013077354249193e+0007L, + 1.706913873848398011744790289200151840498e+0007L, + 9.067766583853288534551600235576747618679e+0006L, + 2.216746733457884568532695355036338655872e+0006L, + 1.945753880802872541235703812722344514405e+0005L, + 3.132374412921948071539195638885330951749e+0003L, +}; +static const GENERIC ps5[13] = { /* [1.777..., 2.5] */ + 1.0e0L, + 9.259614983862181118883831670990340052982e+0001L, + 3.225125275462903384842124075132609290304e+0003L, + 5.575705362829101545292760055941855246492e+0004L, + 5.306049863037087855496170121958448492522e+0005L, + 2.907060758873509564309729903109018597215e+0006L, + 9.298059206584995898298257827131208539289e+0006L, + 1.720391071006963176836108026556547062980e+0007L, + 1.782614812922865190479394509487941920612e+0007L, + 9.708016389605273153536452032839879950155e+0006L, + 2.476495084688170096480215640962175140027e+0006L, + 2.363200660365585759668077790194604917187e+0005L, + 4.803239569848196077121203575704356936731e+0003L, +}; +static const GENERIC pr6[13] = { /* [1.28, 1.777...] */ + 9.999999969777095495998606925524322559556e-0001L, + 5.825486719466194430503283824096872219216e+0001L, + 1.248155491637757281915184824965379905380e+0003L, + 1.302093199842358609321338417071710477615e+0004L, + 7.353835804186292782840961999810543016039e+0004L, + 2.356471661113686180549195092555751341757e+0005L, + 4.350553267429009581632987060942780847101e+0005L, + 4.588762661876600638719159826652389418235e+0005L, + 2.675796398548523436544221045225290128611e+0005L, + 8.077649557108971388298292919988449940464e+0004L, + 1.117640459221306873519068741664054573776e+0004L, + 5.544400072396814695175787511557757885585e+0002L, + 5.072550541191480498431289089905822910718e+0000L, +}; +static const GENERIC ps6[13] = { /* [1.28, 1.777...] */ + 1.0e0L, + 5.832517925357165050639075848183613063291e+0001L, + 1.252144364743592128171256104364976466898e+0003L, + 1.310300234342216813579118022415585740772e+0004L, + 7.434667697093812197817292154032863632923e+0004L, + 2.398706595587719165726469002404004614711e+0005L, + 4.472737517625103157004869372427480602511e+0005L, + 4.786313523337761975294171429067037723611e+0005L, + 2.851161872872731228472536061865365370192e+0005L, + 8.891648269899148412331918021801385815586e+0004L, + 1.297097489535351517572978123584751042287e+0004L, + 7.096761640545975756202184143400469812618e+0002L, + 8.378049338590233325977702401733340820351e+0000L, +}; +static const GENERIC sixteen = 16.0L; +static const GENERIC huge = 1.0e30L; + +static GENERIC pzero(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return one; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for(i=9;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if (x > eight){ + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for(i=10;i>=0;i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if (x > five){ /* x > 5.0 */ + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for(i=10;i>=0;i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if( x>3.5L) { + r = pr3[12]; s = ps3[12]; + for(i=11;i>=0;i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if( x>2.5L) { + r = pr4[12]; s = ps4[12]; + for(i=11;i>=0;i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if( x> (1.0L/0.5625L)){ + r = pr5[12]; s = ps5[12]; + for(i=11;i>=0;i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for(i=11;i>=0;i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return r/s; +} + + +static const GENERIC qr0[12] = { /* [16, inf] */ + -1.249999999999999999999999999999999972972e-0001L, + -1.425179595545670577414395762503991596897e+0002L, + -6.312499645625970845534460257936222407219e+0004L, + -1.411374326457208384315121243698814446848e+0007L, + -1.735034212758873581410984757860787252842e+0009L, + -1.199777647512789489421826342485055280680e+0011L, + -4.596025334081655714499860409699100373644e+0012L, + -9.262525628201284107792924477031653399187e+0013L, + -8.858394728685039245344398842180662867639e+0014L, + -3.267527953687534887623740622709505972113e+0015L, + -2.664222971186311967587129347029450062019e+0015L, + 3.442464060723987869585180095344504100204e+0014L, +}; +static const GENERIC qs0[11] = { + 1.0e0L, + 1.140729613936536461931516610003185687881e+0003L, + 5.056665510442299351009198186490085803580e+0005L, + 1.132041763825642787943941650522718199115e+0008L, + 1.394570111872581606392620678214246479767e+0010L, + 9.677945218152264789534431079563744378421e+0011L, + 3.731140327851536828225143058896348502096e+0013L, + 7.612785951064869291722846681020881676410e+0014L, + 7.476077016406764891730191004811863975940e+0015L, + 2.951246482613592035421503427100393831709e+0016L, + 3.108361803691811711136854587074302034901e+0016L, +}; +static const GENERIC qr1[12] = { /* [8, 16 ] */ + -1.249999999999999999997949010383433818157e-0001L, + -9.051215166393822640636752244895124126934e+0001L, + -2.620782703428148837671179031904208303947e+0004L, + -3.975571261553504457766177974508785790884e+0006L, + -3.479029330759311306270072218074074994090e+0008L, + -1.823955008124268573036216746186239829089e+0010L, + -5.765932697111801375765156029221568664435e+0011L, + -1.079843680798742592954002192417934779114e+0013L, + -1.146893630504592739082205764611581332897e+0014L, + -6.367016059683898464936104447282880704182e+0014L, + -1.583109041961213490464459111903484209098e+0015L, + -1.230149555764242473103128650135795639412e+0015L, +}; +static const GENERIC qs1[14] = { + 1.0e0L, + 7.246831508115058112438579847778014458432e+0002L, + 2.100854184439168518399383786306927037611e+0005L, + 3.192636418837951507430188285940994235122e+0007L, + 2.801558443383354674538443461124434216152e+0009L, + 1.475026997664373739293483927250653467487e+0011L, + 4.694486824913954608552363821799927145318e+0012L, + 8.890350100919200250838438709601547334021e+0013L, + 9.626844429082905144874701068760469752067e+0014L, + 5.541110744600460773528263862687521642140e+0015L, + 1.486500494789452556727470329232123096563e+0016L, + 1.415840104845959400365430773732093899210e+0016L, + 1.780866095241517418081312567239682336483e+0015L, + -2.359230917384889357887631544079990129494e+0014L, +}; +static const GENERIC qr2[12] = { /* [5, 8] */ + -1.249999999999999531937744362527772181614e-0001L, + -4.944373897356969774839375977239241573966e+0001L, + -7.728449175433465285314261650078450473909e+0003L, + -6.262574329612752346336901434651220705903e+0005L, + -2.900948220220943306027235217424380672732e+0007L, + -7.988719647634192770463917157562874119535e+0008L, + -1.318228171927181389547760026626357012375e+0010L, + -1.282439773983029245309263271945424928196e+0011L, + -7.050925570827818040186149940257918845138e+0011L, + -2.021751882573871990004205616874202684429e+0012L, + -2.592939962400668552384333900573812635658e+0012L, + -1.038267109518891262840601514932972850326e+0012L, +}; +static const GENERIC qs2[14] = { + 1.0e0L, + 3.961358492885570003202784022894248952116e+0002L, + 6.205788738864701882828752634586510926968e+0004L, + 5.045715603932670286550673813011764406749e+0006L, + 2.349248611362658323353343389430968751429e+0008L, + 6.520244524415828635917683553721880063911e+0009L, + 1.089111211223507719337067159886281887722e+0011L, + 1.080406000905359867958779409414903018610e+0012L, + 6.135645280895514703514154680623769562148e+0012L, + 1.862433040246625874245867151368643668215e+0013L, + 2.667780805786648888840777888702193708994e+0013L, + 1.394401107289087774765300711809313112824e+0013L, + 1.093247500616320375562898297156722445484e+0012L, + -7.228875530378928722826604216491493780775e+0010L, +}; +static const GENERIC qr3[13] = { /* [3.5 5] */ + -1.249999999999473067748420379578481661075e-0001L, + -3.044549048635289351913574324803250977998e+0001L, + -2.890081140649769078496693003524681440869e+0003L, + -1.404922456817202235879343275330529107684e+0005L, + -3.862746614385573443518177403617349281869e+0006L, + -6.257517309110249049201133708911155047689e+0007L, + -6.031451330920839916987079782727323477520e+0008L, + -3.411542405173830611454025765755854382346e+0009L, + -1.089392478149726672133014498723021526099e+0010L, + -1.824934078420210941290140903415956782726e+0010L, + -1.400780278304358710423481070486939531139e+0010L, + -3.716484136064917363926635716743771092093e+0009L, + -1.397591075296425529970434890954904331580e+0008L, +}; +static const GENERIC qs3[13] = { + 1.0e0L, + 2.441498613904962049391000187014945858042e+0002L, + 2.326188882072370711500164222341514337043e+0004L, + 1.137138213121231338494977104659239578165e+0006L, + 3.152918070735662728722998452605364253517e+0007L, + 5.172877993426507259314270488444013595108e+0008L, + 5.083086439731669807455961078856470774115e+0009L, + 2.961842732066434123119325521139476909941e+0010L, + 9.912185866862440735829781856081353151390e+0010L, + 1.793560561251622234430564181567297983598e+0011L, + 1.577090119341228122525265108497940403073e+0011L, + 5.509910306780166194333889999985463681636e+0010L, + 4.761691134078874491202320181517936758141e+0009L, +}; +static const GENERIC qr4[13] = { /* [2.5 3.5] */ + -1.249999999928567734339745043490705340835e-0001L, + -1.967201748731419063051601624435565528481e+0001L, + -1.186329146714562236407099740615528170707e+0003L, + -3.607736959222941810356301491152457934060e+0004L, + -6.119200717978104904932828468575194267125e+0005L, + -6.037847781158358226670305078652205586384e+0006L, + -3.503558153336140359700536720393565984740e+0007L, + -1.180196478268225718757218523746787309773e+0008L, + -2.221860232085134915841426363505169680528e+0008L, + -2.173372505452747585296176761701746236760e+0008L, + -9.649364865061237558517730539506568013963e+0007L, + -1.465429227847933034546039640094862650385e+0007L, + -3.083003197920262085170581866246663380607e+0005L, +}; +static const GENERIC qs4[13] = { /* [2.5 3.5] */ + 1.0e0L, + 1.579620773732259142752614142139986854055e+0002L, + 9.581372220329138733203879503753685054968e+0003L, + 2.939598672379108095776114131010825885308e+0005L, + 5.052183049314542218630341818692588448168e+0006L, + 5.083497695595206639433839326338971980149e+0007L, + 3.036385361800553388049719014005099206516e+0008L, + 1.067826481452753409910563785161661492137e+0009L, + 2.145644125557118044720741775125319669272e+0009L, + 2.324115615959719949363946673491552216799e+0009L, + 1.223262962112070757966959855619847011146e+0009L, + 2.569765553318495423738478585947110270709e+0008L, + 1.354744744299227127897905787732636565504e+0007L, +}; +static const GENERIC qr5[13] = { /* [1.777.., 2.5] */ + -1.249999995936639697637680428174576069971e-0001L, + -1.260846055371311453485891923426489068315e+0001L, + -4.772398467544467480801174330290141578895e+0002L, + -8.939852599990298486613760833996490599724e+0003L, + -9.184070787149542050979542226446134243197e+0004L, + -5.406038945018274458362637897739280435171e+0005L, + -1.845896544705190261018653728678171084418e+0006L, + -3.613616990680809501878667570653308071547e+0006L, + -3.908782978135693252252557720414348623779e+0006L, + -2.173711022517323927109138170588442768176e+0006L, + -5.431253130679918485836408549007856244495e+0005L, + -4.591098546452684510082591587275940765959e+0004L, + -5.244711364168207806835520057792229646578e+0002L, +}; +static const GENERIC qs5[13] = { /* [1.777.., 2.5] */ + 1.0e0L, + 1.014536210851290878350892750972474861447e+0002L, + 3.875547510687135314064434160096139681076e+0003L, + 7.361913122670079814955259281995617732580e+0004L, + 7.720288944218771126581086539585529314636e+0005L, + 4.681529554446752496404431433608306558038e+0006L, + 1.667882621940503925455031252308367745820e+0007L, + 3.469403153761399881888272620855305156241e+0007L, + 4.096992047964210711867089384719947863019e+0007L, + 2.596804755829217449311530735959560630554e+0007L, + 7.983933774697889238154465064019410763845e+0006L, + 9.818133816979900819087242425280757938152e+0005L, + 3.061083930868694396013541535670745443560e+0004L, +}; + +static const GENERIC qr6[13] = { /* [1.28, 1.777..] */ + -1.249999881577289001807137282824929082771e-0001L, + -7.998273510053110759610810594119533619282e+0000L, + -1.872481955335172543369089617771565632719e+0002L, + -2.122116786726300805079874003303799646812e+0003L, + -1.293850285839529282503178263484773478457e+0004L, + -4.445024742266316181033354192262529356093e+0004L, + -8.730161378334357767668344467356505347070e+0004L, + -9.706222895172078442801444972505315054736e+0004L, + -5.896325518259858270165531513618195321041e+0004L, + -1.823172034368108822276420827074668832233e+0004L, + -2.509304178635055926638833040337472387175e+0003L, + -1.156608965715779237316769828941729964099e+0002L, + -7.028005789650731396887346826397785210442e-0001L, +}; +static const GENERIC qs6[13] = { /* [1.28, 1.777..] */ + 1.0e0L, + 6.457211085058064845601261321277721075900e+0001L, + 1.534005216588011210342824555136008682950e+0003L, + 1.777217999176441782593357660462379097171e+0004L, + 1.118372652642469468091084810263231199696e+0005L, + 4.015242433858461813142365748386473605294e+0005L, + 8.377081045517098645448616514388280497673e+0005L, + 1.011495020008010352575398009604164287337e+0006L, + 6.886722075290430568652227875200208955970e+0005L, + 2.504735189948021472047157148613171956537e+0005L, + 4.408138920171044846941001844352009817062e+0004L, + 3.105572178072115145673058722853640854884e+0003L, + 5.588294821118916113437396504573817033678e+0001L, +}; +static GENERIC qzero(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return -0.125L/x; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for(i=9;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if(x>eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for(i=10;i>=0;i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if(x>five){ /* assume x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for(i=10;i>=0;i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if(x>3.5L) { + r = qr3[12]; s = qs3[12]; + for(i=11;i>=0;i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if(x>2.5L) { + r = qr4[12]; s = qs4[12]; + for(i=11;i>=0;i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if(x> (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for(i=11;i>=0;i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for(i=11;i>=0;i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return t*(r/s); +} diff --git a/usr/src/libm/src/Q/j1l.c b/usr/src/libm/src/Q/j1l.c new file mode 100644 index 0000000..06fd942 --- /dev/null +++ b/usr/src/libm/src/Q/j1l.c @@ -0,0 +1,732 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)j1l.c 1.9 06/01/31 SMI" + +/* + * floating point Bessel's function of the first and second kinds + * of order zero: j1(x),y1(x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + */ + +#pragma weak j1l = __j1l +#pragma weak y1l = __y1l + +#include "libm.h" + +#define GENERIC long double +static const GENERIC +zero = 0.0L, +small = 1.0e-9L, +tiny = 1.0e-38L, +one = 1.0L, +five = 5.0L, +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +tpi = 0.636619772367581343075535053490057448L; + +static GENERIC pone(GENERIC); +static GENERIC qone(GENERIC); + +static const GENERIC r0[7] = { + -6.249999999999999999999999999999999627320e-0002L, + 1.940606727194041716205384618494641565464e-0003L, + -3.005630423155733701856481469986459043883e-0005L, + 2.345586219403918667468341047369572169358e-0007L, + -9.976809285885253587529010109133336669724e-0010L, + 2.218743258363623946078958783775107473381e-0012L, + -2.071079656218700604767650924103578046280e-0015L, +}; +static const GENERIC s0[7] = { + 1.0e0L, + 1.061695903156199920738051277075003059555e-0002L, + 5.521860513111180371566951179398862692060e-0005L, + 1.824214367413754193524107877084979441407e-0007L, + 4.098957778439576834818838198039029353925e-0010L, + 6.047735079699666389853240090925264056197e-0013L, + 4.679044728878836197247923279512047035041e-0016L, +}; + +GENERIC +j1l(x) GENERIC x;{ + GENERIC z, d, s,c,ss,cc,r; + int i, sgn; + + if(!finitel(x)) return one/x; + sgn = signbitl(x); + x = fabsl(x); + if(x > 1.28L){ + s = sinl(x); + c = cosl(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if(signbitl(s)!=signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if(x>1.0e120L) return (invsqrtpi*cc)/sqrtl(x); + d = invsqrtpi*(pone(x)*cc-qone(x)*ss)/sqrtl(x); + if(sgn==0) return d; else return -d; + } + if(x<=small) { + if(x<=tiny) d = 0.5L*x; + else d = x*(0.5L-x*x*0.125L); + if(sgn==0) return d; else return -d; + } + z = x*x; + r = r0[6]; + s = s0[6]; + for(i=5;i>=0;i--) { + r = r*z + r0[i]; + s = s*z + s0[i]; + } + d = x*0.5L+x*(z*(r/s)); + if(sgn==0) return d; else return -d; +} + +static const GENERIC u0[7] = { + -1.960570906462389484060557273467558703503e-0001L, + 5.166389353148318460304315890665450006495e-0002L, + -2.229699464105910913337190798743451115604e-0003L, + 3.625437034548863342715657067759078267158e-0005L, + -2.689902826993117212255524537353883987171e-0007L, + 9.304570592456930912969387719010256018466e-0010L, + -1.234878126794286643318321347997500346131e-0012L, +}; +static const GENERIC v0[8] = { + 1.0e0L, + 1.369394302535807332517110204820556695644e-0002L, + 9.508438148097659501433367062605935379588e-0005L, + 4.399007309420092056052714797296467565655e-0007L, + 1.488083087443756398305819693177715000787e-0009L, + 3.751609832625793536245746965768587624922e-0012L, + 6.680926434086257291872903276124244131448e-0015L, + 6.676602383908906988160099057991121446058e-0018L, +}; + +GENERIC +y1l(x) GENERIC x;{ + GENERIC z, s,c,ss,cc,u,v; + int i; + + if(isnanl(x)) return x+x; + if(x <= zero){ + if(x==zero) + return -one/zero; + else + return zero/zero; + } + if(x > 1.28L){ + if(!finitel(x)) return zero; + s = sinl(x); + c = cosl(x); + /* + * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x0)-q1(x)*sin(x0)) + * where x0 = x-3pi/4 + * Better formula: + * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + * = 1/sqrt(2) * (sin(x) - cos(x)) + * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + * = -1/sqrt(2) * (cos(x) + sin(x)) + * To avoid cancellation, use + * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + * to compute the worse one. + */ + if(x>1.0e2450L) { /* x+x may overflow */ + ss = -s-c; + cc = s-c; + } else if(signbitl(s)!=signbitl(c)) { + cc = s - c; + ss = cosl(x+x)/cc; + } else { + ss = -s-c; + cc = cosl(x+x)/ss; + } + /* + * j1(x) = 1/sqrt(pi*x) * (P(1,x)*cc - Q(1,x)*ss) + * y1(x) = 1/sqrt(pi*x) * (P(1,x)*ss + Q(1,x)*cc) + */ + if(x>1.0e91L) return (invsqrtpi*ss)/sqrtl(x); + return invsqrtpi*(pone(x)*ss+qone(x)*cc)/sqrtl(x); + } + if(x<=tiny) { + return(-tpi/x); + } + z = x*x; + u = u0[6]; v = v0[6]+z*v0[7]; + for(i=5;i>=0;i--){ + u = u*z + u0[i]; + v = v*z + v0[i]; + } + return(x*(u/v) + tpi*(j1l(x)*logl(x)-one/x)); +} + +static const GENERIC pr0[12] = { + 1.000000000000000000000000000000000000267e+0000L, + 1.060717875045891455602180843276758003035e+0003L, + 4.344347542892127024446687712181105852335e+0005L, + 8.915680220724007016377924252717410457094e+0007L, + 9.969502259938406062809873257569171272819e+0009L, + 6.200290193138613035646510338707386316595e+0011L, + 2.105978548788015119851815854422247330118e+0013L, + 3.696635772784601239371730810311998368948e+0014L, + 3.015913097920694682057958412534134515156e+0015L, + 9.370298471339353098123277427328592725921e+0015L, + 7.190349005196335967340799265074029443057e+0015L, + 2.736097786240689996880391074927552517982e+0014L, +}; +static const GENERIC ps0[11] = { + 1.0e0L, + 1.060600687545891455602180843276758095107e+0003L, + 4.343106093416975589147153906505338900961e+0005L, + 8.910605869002176566582072242244353399059e+0007L, + 9.959122058635087888690713917622056540190e+0009L, + 6.188744967234948231792482949171041843894e+0011L, + 2.098863976953783506401759873801990304907e+0013L, + 3.672870357018063196746729751479938908450e+0014L, + 2.975538419246824921049011529574385888420e+0015L, + 9.063657659995043205018686029284479837091e+0015L, + 6.401953344314747916729366441508892711691e+0015L, +}; +static const GENERIC pr1[12] = { + 1.000000000000000000000023667524130660984e+0000L, + 6.746154419979618754354803488126452971204e+0002L, + 1.811210781083390154857018330296145970502e+0005L, + 2.533098390379924268038005329095287842244e+0007L, + 2.029683619805342145252338570875424600729e+0009L, + 9.660859662192711465301069401598929980319e+0010L, + 2.743396238644831519934098967716621316316e+0012L, + 4.553097354140854377931023170263455246288e+0013L, + 4.210245069852219757476169864974870720374e+0014L, + 1.987334056229596485076645967176169801727e+0015L, + 4.067120052787096893838970455751338930462e+0015L, + 2.486539606380406398310845264910691398133e+0015L, +}; +static const GENERIC ps1[14] = { + 1.0e0L, + 6.744982544979618754355808680196859521782e+0002L, + 1.810421795396966762032155290441364740350e+0005L, + 2.530986460644310651529583759699988435573e+0007L, + 2.026743276048023121360249288818290224145e+0009L, + 9.637461924407405935245269407052641341836e+0010L, + 2.732378628423766417402292797028314160831e+0012L, + 4.522345274960527124354844364012184278488e+0013L, + 4.160650668341743132685335758415469856545e+0014L, + 1.943730242988858208243492424892435901211e+0015L, + 3.880228532692127989901131618598067450001e+0015L, + 2.178020816161154615841000173683302999728e+0015L, + -8.994062666842225551554346698171600634173e+0013L, + 1.368520368508851253495764806934619574990e+0013L, +}; +static const GENERIC pr2[12] = { + 1.000000000000000006938651621840396237282e+0000L, + 3.658416291850404981407101077037948144698e+0002L, + 5.267073772170356547709794670602812447537e+0004L, + 3.912012101226837463014925210735894620442e+0006L, + 1.651295648974103957193874928714180765625e+0008L, + 4.114901144480797609972484998142146783499e+0009L, + 6.092524309766036681542980572526335147672e+0010L, + 5.263913178071282616719249969074134570577e+0011L, + 2.538408581124324223367341020538081330994e+0012L, + 6.288607929360291027895126983015365677648e+0012L, + 6.848330048211148419047055075386525945280e+0012L, + 2.290309646838867941423178163991423244690e+0012L, +}; +static const GENERIC ps2[14] = { + 1.0e0L, + 3.657244416850405086459410165762319861856e+0002L, + 5.262802358425023243992387075861237306312e+0004L, + 3.905896813959919648136295861661483848364e+0006L, + 1.646791907791461220742694842108202772763e+0008L, + 4.096132803064256022224954120208201437344e+0009L, + 6.046665195915950447544429445730680236759e+0010L, + 5.198061739781991313414052212328653295168e+0011L, + 2.484233851814333966401527626421254279796e+0012L, + 6.047868806925315879339651539434315255940e+0012L, + 6.333103831254091652501642567294101813354e+0012L, + 1.875143098754284994467609936924685024968e+0012L, + -5.238330920563392692965412762508813601534e+0010L, + 4.656888609439364725427789198383779259957e+0009L, +}; +static const GENERIC pr3[13] = { + 1.000000000000009336887318068056137842897e+0000L, + 2.242719942728459588488051572002835729183e+0002L, + 1.955450611382026550266257737331095691092e+0004L, + 8.707143293993619899395400562409175590739e+0005L, + 2.186267894487004565948324289010954505316e+0007L, + 3.224328510541957792360691585667502864688e+0008L, + 2.821057355151380597331792896882741364897e+0009L, + 1.445371387295422404365584793796028979840e+0010L, + 4.181743160669891357783011002656658107864e+0010L, + 6.387371088767993119325536137794535513922e+0010L, + 4.575619999412716078064070587767416436396e+0010L, + 1.228415651211639160620284441690503550842e+0010L, + 7.242170349875563053436050532153112882072e+0008L, +}; +static const GENERIC ps3[13] = { + 1.0e0L, + 2.241548067728529551049804610486061401070e+0002L, + 1.952838216795552145132137932931237181307e+0004L, + 8.684574926493185744628127341069974575526e+0005L, + 2.176357771067037962940853412819852189164e+0007L, + 3.199958682356132977319258783167122100567e+0008L, + 2.786218931525334687844675219914201872570e+0009L, + 1.416283776951741549631417572317916039767e+0010L, + 4.042962659271567948735676834609348842922e+0010L, + 6.028168462646694510083847222968444402161e+0010L, + 4.118410226794641413833887606580085281111e+0010L, + 9.918735736297038430744161253338202230263e+0009L, + 4.092967198238098023219124487437130332038e+0008L, +}; +static const GENERIC pr4[13] = { + 1.000000000001509220978157399042059553390e+0000L, + 1.437551868378147851133499996323782607787e+0002L, + 7.911335537418177296041518061404505428004e+0003L, + 2.193710939115317214716518908935756104804e+0005L, + 3.390662495136730962513489796538274984382e+0006L, + 3.048655347929348891006070609293884274789e+0007L, + 1.613781633489496606354045161527450975195e+0008L, + 4.975089835037230277110156150038482159988e+0008L, + 8.636047087015115403880904418339566323264e+0008L, + 7.918202912328366140110671223076949101509e+0008L, + 3.423294665798984733439650311722794853294e+0008L, + 5.621904953441963961040503934782662613621e+0007L, + 2.086303543310240260758670404509484499793e+0006L, +}; +static const GENERIC ps4[13] = { + 1.0e0L, + 1.436379993384532371670493319591847362304e+0002L, + 7.894647154785430678061053848847436659499e+0003L, + 2.184659753392097529008981741550878586174e+0005L, + 3.366109083305465176803513738147049499361e+0006L, + 3.011911545968996817697665866587226343186e+0007L, + 1.582262913779689851316760148459414895301e+0008L, + 4.819268809494937919217938589530138201770e+0008L, + 8.201355762990450679702837123432527154830e+0008L, + 7.268232093982510937417446421282341425212e+0008L, + 2.950911909015572933262131323934036480462e+0008L, + 4.242839924305934423010858966540621219396e+0007L, + 1.064387620445090779182117666330405186866e+0006L, +}; +static const GENERIC pr5[13] = { + 1.000000000102434805241171427253847353861e+0000L, + 9.129332257083629259060502249025963234821e+0001L, + 3.132238483586953037576119377504557191413e+0003L, + 5.329782528269307971278943122454171107861e+0004L, + 4.988460157184117790692873002103052944145e+0005L, + 2.686602071615786816147010334256047469378e+0006L, + 8.445418526028961197703799808701268301831e+0006L, + 1.536575358646141157475725889907900827390e+0007L, + 1.568405818236523821796862770586544811945e+0007L, + 8.450876239888770102387618667362302173547e+0006L, + 2.154414900139567328424026827163203446077e+0006L, + 2.105656926565043898888460254808062352205e+0005L, + 4.739165011023396507022134303736862812975e+0003L, +}; +static const GENERIC ps5[13] = { + 1.0e0L, + 9.117613509595327476509152673394703847793e+0001L, + 3.121697972484015639301279229281770795147e+0003L, + 5.294447222735893568040911873834576440255e+0004L, + 4.930368882192772335798256684110887882807e+0005L, + 2.634854685641165298302167435798357437768e+0006L, + 8.185462775400326393555896157031818280918e+0006L, + 1.462417423080215192609668642663030667086e+0007L, + 1.450624993985851675982860844153954896015e+0007L, + 7.460467647561995283219086567162006113864e+0006L, + 1.754210981405612478869227142579056338965e+0006L, + 1.463286721155271971526264914524746699596e+0005L, + 2.155894725796702015341211116579827039459e+0003L, +}; +static const GENERIC pr6[13] = { + 1.000000003564855546741735920315743157129e+0000L, + 5.734003934862540458119423509909510288366e+0001L, + 1.209572491935850486086559692291796887976e+0003L, + 1.243398391422281247933674779163660286838e+0004L, + 6.930996755181437937258220998601708278787e+0004L, + 2.198067659532757598646722249966767620099e+0005L, + 4.033659432712058633933179115820576858455e+0005L, + 4.257759657219008027016047206574574358678e+0005L, + 2.511917395876004349480721277445763916389e+0005L, + 7.813756153070623654178731651381881953552e+0004L, + 1.152069173381127881385588092905864352891e+0004L, + 6.548580782804088553777816037551523398082e+0002L, + 8.668725370116906132327542766127938496880e+0000L, +}; +static const GENERIC ps6[13] = { + 1.0e0L, + 5.722285236357114566499221525736286205184e+0001L, + 1.203010842878317935444582950620339570506e+0003L, + 1.230058335378583550155825502172435371208e+0004L, + 6.800998550607861288865300438648089894412e+0004L, + 2.130767829599304262987769347536850885921e+0005L, + 3.840483466643916681759936972992155310026e+0005L, + 3.947432373459225542861819148108081160393e+0005L, + 2.237816239393081111481588434457838526738e+0005L, + 6.545820495124419723398946273790921540774e+0004L, + 8.729563630320892741500726213278834737196e+0003L, + 4.130762660291894753450174794196998813709e+0002L, + 3.480368898672684645130335786015075595598e+0000L, +}; +static const GENERIC sixteen = 16.0L; +static const GENERIC eight = 8.0L; +static const GENERIC huge = 1.0e30L; + +static GENERIC pone(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return one; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*pr0[11]+pr0[10]; s = ps0[10]; + for(i=9;i>=0;i--) { + r = z*r + pr0[i]; + s = z*s + ps0[i]; + } + } else if(x>eight) { + r = pr1[11]; s = ps1[11]+z*(ps1[12]+z*ps1[13]); + for(i=10;i>=0;i--) { + r = z*r + pr1[i]; + s = z*s + ps1[i]; + } + } else if(x>five) { + r = pr2[11]; s = ps2[11]+z*(ps2[12]+z*ps2[13]); + for(i=10;i>=0;i--) { + r = z*r + pr2[i]; + s = z*s + ps2[i]; + } + } else if( x>3.5L) { + r = pr3[12]; s = ps3[12]; + for(i=11;i>=0;i--) { + r = z*r + pr3[i]; + s = z*s + ps3[i]; + } + } else if( x>2.5L) { + r = pr4[12]; s = ps4[12]; + for(i=11;i>=0;i--) { + r = z*r + pr4[i]; + s = z*s + ps4[i]; + } + } else if( x> (1.0L/0.5625L)){ + r = pr5[12]; s = ps5[12]; + for(i=11;i>=0;i--) { + r = z*r + pr5[i]; + s = z*s + ps5[i]; + } + } else { /* assume x > 1.28 */ + r = pr6[12]; s = ps6[12]; + for(i=11;i>=0;i--) { + r = z*r + pr6[i]; + s = z*s + ps6[i]; + } + } + return r/s; +} + + +static const GENERIC qr0[12] = { + 3.749999999999999999999999999999999971033e-0001L, + 4.256726035237050601607682277433094262226e+0002L, + 1.875976490812878489192409978945401066066e+0005L, + 4.170314268048041914273603680317745592790e+0007L, + 5.092750132543855817293451118974555746551e+0009L, + 3.494749676278488654103505795794139483404e+0011L, + 1.327062148257437316997667817096694173709e+0013L, + 2.648993138273427226907503742066551150490e+0014L, + 2.511695665909547412222430494473998127684e+0015L, + 9.274694506662289043224310499164702306096e+0015L, + 8.150904170663663829331320302911792892002e+0015L, + -5.001918733707662355772037829620388765122e+0014L, +}; +static const GENERIC qs0[11] = { + 1.0e0L, + 1.135400380229880160428715273982155760093e+0003L, + 5.005701183877126164326765545516590744360e+0005L, + 1.113444200113712167984337603933040102987e+0008L, + 1.361074819925223062778717565699039471124e+0010L, + 9.355750985802849484438933905325982809653e+0011L, + 3.563462786008988825003965543857998084828e+0013L, + 7.155145113900094163648726863803802910454e+0014L, + 6.871266835834472758055559013851843654113e+0015L, + 2.622030899226736712644974988157345234092e+0016L, + 2.602912729172876330650077021706139707746e+0016L, +}; +static const GENERIC qr1[12] = { + 3.749999999999999999997762458207284405806e-0001L, + 2.697883998881706839929255517498189980485e+0002L, + 7.755195925781028489386938870473834411019e+0004L, + 1.166777762104017777198211072895528968355e+0007L, + 1.011504772984321168320010084520261069362e+0009L, + 5.246007703574156853577754571720205550010e+0010L, + 1.637692549885592683166116551691266537647e+0012L, + 3.022303623698185669912990310925039382495e+0013L, + 3.154769927290655684846107030265909987946e+0014L, + 1.715819913441554770089730934808123360921e+0015L, + 4.165044355759732622273534445131736188510e+0015L, + 3.151381420874174705643100381708086287596e+0015L, +}; +static const GENERIC qs1[14] = { + 1.0e0L, + 7.197091705351218239785633172408276982828e+0002L, + 2.070012799599548685544883041297609861055e+0005L, + 3.117014815317656221871840152778458754516e+0007L, + 2.705719678902554974863325877025902971727e+0009L, + 1.406113614727345726925060648750867264098e+0011L, + 4.403777536067131320363005978631674817359e+0012L, + 8.170725690209322283061499386703167242894e+0013L, + 8.609458844975495289227794126964431210566e+0014L, + 4.766766367015473481257280600694952920204e+0015L, + 1.202286587943342194863557940888115641650e+0016L, + 1.012474328306200909525063936061756024120e+0016L, + 6.183552022678917858273222879615824070703e+0014L, + -9.756731548558226997573737400988225722740e+0013L, +}; +static const GENERIC qr2[12] = { + 3.749999999999999481245647262226994293189e-0001L, + 1.471366807289771354491181140167359026735e+0002L, + 2.279432486768448220142080962843526951250e+0004L, + 1.828943048523771225163804043356958285893e+0006L, + 8.379828388647823135832220596417725010837e+0007L, + 2.279814029335044024585393671278378022053e+0009L, + 3.711653952257118120832817785271466441420e+0010L, + 3.557650914518554549916730572553105048068e+0011L, + 1.924583483146095896259774329498934160650e+0012L, + 5.424386256063736390759567088291887140278e+0012L, + 6.839325621241776786206509704671746841737e+0012L, + 2.702169563144001166291686452305436313971e+0012L, +}; +static const GENERIC qs2[14] = { + 1.0e0L, + 3.926379194439388135703211933895203191089e+0002L, + 6.089148804106598297488336063007609312276e+0004L, + 4.893546162973278583711376356041614150645e+0006L, + 2.247571119114497845046388801813832219404e+0008L, + 6.137635663350177751290469334200757872645e+0009L, + 1.005115019784102856424493519524998953678e+0011L, + 9.725664462014503832860151384604677240620e+0011L, + 5.345525100485511116148634192844434636072e+0012L, + 1.549944007398946691720862738173956994779e+0013L, + 2.067148441178952625710302124163264760362e+0013L, + 9.401565402641963611295119487242595462301e+0012L, + 3.548217088622398274748837287769709374385e+0011L, + -2.934470341719047120076509938432417352365e+0010L, +}; +static const GENERIC qr3[13] = { + 3.749999999999412724084579833297451472091e-0001L, + 9.058478580291706212422978492938435582527e+0001L, + 8.524056033161038750461083666711724381171e+0003L, + 4.105967158629109427753434569223631014730e+0005L, + 1.118326603378531348259783091972623333657e+0007L, + 1.794636683403578918528064904714132329343e+0008L, + 1.714314157463635959556133236004368896724e+0009L, + 9.622092032236084846572067257267661456030e+0009L, + 3.057759524485859159957762858780768355020e+0010L, + 5.129306780754798531609621454415938890020e+0010L, + 3.999122002794961070680636194346316041352e+0010L, + 1.122298454643493485989721564358100345388e+0010L, + 5.603981987645989709668830968522362582221e+0008L, +}; +static const GENERIC qs3[13] = { + 1.0e0L, + 2.418328663076578169836155170053634419922e+0002L, + 2.279620205900121042587523541281272875520e+0004L, + 1.100984222585729521470129014992217092794e+0006L, + 3.010743223679247091004262516286654516282e+0007L, + 4.860925542827367817289619265215599433996e+0008L, + 4.686668111035348691982715864307839581243e+0009L, + 2.668701788405102017427214705946730894074e+0010L, + 8.677395746106802640390580944836650584903e+0010L, + 1.511936455574951790658498795945106643036e+0011L, + 1.260845604432623478002018696873608353093e+0011L, + 4.052692278419853853911440231600864589805e+0010L, + 2.965516519212226064983267822243329694729e+0009L, +}; +static const GENERIC qr4[13] = { + 3.749999999919234164154669754440123072618e-0001L, + 5.844218580776819864791168253485055101858e+0001L, + 3.489273514092912982675669411371435670220e+0003L, + 1.050523637774575684509663430018995479594e+0005L, + 1.764549172059701565500717319792780115289e+0006L, + 1.725532438844133795028063102681497371154e+0007L, + 9.938114847359778539965140247590176334874e+0007L, + 3.331710808184595545396883770200772842314e+0008L, + 6.271970557641881511609560444872797282698e+0008L, + 6.188529798677357075020774923903737913285e+0008L, + 2.821905302742849974509982167877885011629e+0008L, + 4.615467358646911976773290256984329814896e+0007L, + 1.348140608731546467396685802693380693275e+0006L, +}; +static const GENERIC qs4[13] = { + 1.0e0L, + 1.561192663112345185261418296389902133372e+0002L, + 9.346678031144098270547225423124213083072e+0003L, + 2.825851246482293547838023847601704751590e+0005L, + 4.776572711622156091710902891124911556293e+0006L, + 4.715106953717135402977938048006267859302e+0007L, + 2.753962350894311316439652227611209035193e+0008L, + 9.428501434615463207768964787500411575223e+0008L, + 1.832650858775206787088236896454141572617e+0009L, + 1.901697378939743226948920874296595242257e+0009L, + 9.433322226854293780627188599226380812725e+0008L, + 1.808520540608671608680284520798858587370e+0008L, + 7.983342331736662753157217446919462398008e+0006L, +}; +static const GENERIC qr5[13] = { + 3.749999995331364437028988850515190446719e-0001L, + 3.739356381766559882677514593041627547911e+0001L, + 1.399562500629413529355265462912819802551e+0003L, + 2.594154053098947925345332218062210111753e+0004L, + 2.640149879297408640394163979394594318371e+0005L, + 1.542471854873199142031889093591449397995e+0006L, + 5.242272868972053374067572098992335425895e+0006L, + 1.025834487769410221329633071426044839935e+0007L, + 1.116553924239448940142230579060124209622e+0007L, + 6.318076065595910176374916303525884653514e+0006L, + 1.641218086168640408527639735915512881785e+0006L, + 1.522369793529178644168813882912134706444e+0005L, + 2.526530541062297200914180060208669584055e+0003L, +}; +static const GENERIC qs5[13] = { + 1.0e0L, + 9.998960735935075380397545659016287506660e+0001L, + 3.758767417842043742686475060540416737562e+0003L, + 7.013652806952306520121959742519780781653e+0004L, + 7.208949808818615099246529616211730446850e+0005L, + 4.272753927109614455417836186072202009252e+0006L, + 1.482524411356470699336129814111025434703e+0007L, + 2.988750366665678233425279237627700803473e+0007L, + 3.396957890261080492694709150553619185065e+0007L, + 2.050652487738593004111578091156304540386e+0007L, + 5.900504120811732547616511555946279451316e+0006L, + 6.563391409260160897024498082273183468347e+0005L, + 1.692629845012790205348966731477187041419e+0004L, +}; +static const GENERIC qr6[13] = { + 3.749999861516664133157566870858975421296e-0001L, + 2.367863756747764863120797431599473468918e+0001L, + 5.476715802114976248882067325630793143777e+0002L, + 6.143190357869842894025012945444096170251e+0003L, + 3.716250534677997850513733595140463851730e+0004L, + 1.270883463823876752138326905022875657430e+0005L, + 2.495301449636814481646371665429083801388e+0005L, + 2.789578988212952248340486296254398601942e+0005L, + 1.718247946911109055931819087137397324634e+0005L, + 5.458973214011665714330326732204106364229e+0004L, + 7.912102686687948786048943339759596652813e+0003L, + 4.077961006160866935722030715149087938091e+0002L, + 3.765206972770245085551057237882528510428e+0000L, +}; +static const GENERIC qs6[13] = { + 1.0e0L, + 6.341646532940517305641893852673926809601e+0001L, + 1.477058277414040790932597537920671025359e+0003L, + 1.674406564031044491436044253393536487604e+0004L, + 1.028516501369755949895050806908994650768e+0005L, + 3.593620042532885295087463507733285434207e+0005L, + 7.267924991381020915185873399453724799625e+0005L, + 8.462277510768818399961191426205006083088e+0005L, + 5.514399892230892163373611895645500250514e+0005L, + 1.898084241009259353540620272932188102299e+0005L, + 3.102941242117739015721984123081026253068e+0004L, + 1.958971184431466907681440650181421086143e+0003L, + 2.878853357310495087181721613889455121867e+0001L, +}; +static GENERIC qone(x) +GENERIC x; +{ + GENERIC s,r,t,z; + int i; + if(x>huge) return 0.375L/x; + t = one/x; z = t*t; + if(x>sixteen) { + r = z*qr0[11]+qr0[10]; s = qs0[10]; + for(i=9;i>=0;i--) { + r = z*r + qr0[i]; + s = z*s + qs0[i]; + } + } else if(x>eight) { + r = qr1[11]; s = qs1[11]+z*(qs1[12]+z*qs1[13]); + for(i=10;i>=0;i--) { + r = z*r + qr1[i]; + s = z*s + qs1[i]; + } + } else if (x>five) { /* x > 5.0 */ + r = qr2[11]; s = qs2[11]+z*(qs2[12]+z*qs2[13]); + for(i=10;i>=0;i--) { + r = z*r + qr2[i]; + s = z*s + qs2[i]; + } + } else if(x>3.5L) { + r = qr3[12]; s = qs3[12]; + for(i=11;i>=0;i--) { + r = z*r + qr3[i]; + s = z*s + qs3[i]; + } + } else if(x>2.5L) { + r = qr4[12]; s = qs4[12]; + for(i=11;i>=0;i--) { + r = z*r + qr4[i]; + s = z*s + qs4[i]; + } + } else if(x> (1.0L/0.5625L)) { + r = qr5[12]; s = qs5[12]; + for(i=11;i>=0;i--) { + r = z*r + qr5[i]; + s = z*s + qs5[i]; + } + } else { /* assume x > 1.28 */ + r = qr6[12]; s = qs6[12]; + for(i=11;i>=0;i--) { + r = z*r + qr6[i]; + s = z*s + qs6[i]; + } + } + return t*(r/s); +} diff --git a/usr/src/libm/src/Q/jnl.c b/usr/src/libm/src/Q/jnl.c new file mode 100644 index 0000000..3016e22 --- /dev/null +++ b/usr/src/libm/src/Q/jnl.c @@ -0,0 +1,269 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)jnl.c 1.11 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak jnl = __jnl +#pragma weak ynl = __ynl +#endif + +/* + * floating point Bessel's function of the 1st and 2nd kind + * of order n: jn(n,x),yn(n,x); + * + * Special cases: + * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; + * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. + * Note 2. About jn(n,x), yn(n,x) + * For n=0, j0(x) is called, + * for n=1, j1(x) is called, + * for nx, a continued fraction approximation to + * j(n,x)/j(n-1,x) is evaluated and then backward + * recursion is used starting from a supposed value + * for j(n,x). The resulting value of j(0,x) is + * compared with the actual value to correct the + * supposed value of j(n,x). + * + * yn(n,x) is similar in all respects, except + * that forward recursion is used for all + * values of n>1. + * + */ + +#include "libm.h" +#include /* LDBL_MAX */ + +#define GENERIC long double + +static const GENERIC +invsqrtpi= 5.641895835477562869480794515607725858441e-0001L, +two = 2.0L, +zero = 0.0L, +one = 1.0L; + +GENERIC +jnl(n,x) int n; GENERIC x;{ + int i, sgn; + GENERIC a, b, temp, z, w; + + /* + * J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) + * Thus, J(-n,x) = J(n,-x) + */ + if(n<0){ + n = -n; + x = -x; + } + if(n==0) return(j0l(x)); + if(n==1) return(j1l(x)); + if(x!=x) return x+x; + if((n&1)==0) + sgn=0; /* even n */ + else + sgn = signbitl(x); /* old n */ + x = fabsl(x); + if(x == zero||!finitel(x)) b = zero; + else if((GENERIC)n<=x) { /* Safe to use + J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + */ + if(x>1.0e91L) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s=sin(x), c=cos(x), + xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch(n&3) { + case 0: temp = cosl(x)+sinl(x); break; + case 1: temp = -cosl(x)+sinl(x); break; + case 2: temp = -cosl(x)-sinl(x); break; + case 3: temp = cosl(x)-sinl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = j0l(x); + b = j1l(x); + for(i=1;i 1e4 good for single + * When Q(k) > 1e9 good for double + * When Q(k) > 1e17 good for quaduple + */ + /* determin k */ + GENERIC t,v; + double q0,q1,h,tmp; int k,m; + w = (n+n)/(double)x; h = 2.0/(double)x; + q0 = w; z = w+h; q1 = w*z - 1.0; k=1; + while(q1<1.0e17) { + k += 1; z += h; + tmp = z*q1 - q0; + q0 = q1; + q1 = tmp; + } + m = n+n; + for(t=zero, i = 2*(n+k); i>=m; i -= 2) t = one/(i/x-t); + a = t; + b = one; + /* + * estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * long double 1.1356523406294143949491931077970765006170e+04 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + tmp = n; + v = two/x; + tmp = tmp*logl(fabsl(v*tmp)); + if(tmp<1.1356523406294143949491931077970765e+04L) { + for(i=n-1;i>0;i--){ + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + } + } else { + for(i=n-1;i>0;i--){ + temp = b; + b = ((i+i)/x)*b - a; + a = temp; + if(b>1e1000L) { + a /= b; + t /= b; + b = 1.0; + } + } + } + b = (t*j0l(x)/b); + } + } + if(sgn==1) return -b; else return b; +} + +GENERIC ynl(n,x) +int n; GENERIC x;{ + int i; + int sign; + GENERIC a, b, temp; + + if(x!=x) return x+x; + if (x <= zero) + if(x==zero) + return -one/zero; + else + return zero/zero; + sign = 1; + if(n<0){ + n = -n; + if((n&1) == 1) sign = -1; + } + if(n==0) return(y0l(x)); + if(n==1) return(sign*y1l(x)); + if(!finitel(x)) return zero; + + if(x>1.0e91L) { /* x >> n**2 + Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + Let s=sin(x), c=cos(x), + xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + + n sin(xn)*sqt2 cos(xn)*sqt2 + ---------------------------------- + 0 s-c c+s + 1 -s-c -c+s + 2 -s+c -c-s + 3 s+c c-s + */ + switch(n&3) { + case 0: temp = sinl(x)-cosl(x); break; + case 1: temp = -sinl(x)-cosl(x); break; + case 2: temp = -sinl(x)+cosl(x); break; + case 3: temp = sinl(x)+cosl(x); break; + } + b = invsqrtpi*temp/sqrtl(x); + } else { + a = y0l(x); + b = y1l(x); + /* + * fix 1262058 and take care of non-default rounding + */ + for (i = 1; i < n; i++) { + temp = b; + b *= (GENERIC) (i + i) / x; + if (b <= -LDBL_MAX) + break; + b -= a; + a = temp; + } + } + if(sign>0) return b; else return -b; +} diff --git a/usr/src/libm/src/Q/lgammal.c b/usr/src/libm/src/Q/lgammal.c new file mode 100644 index 0000000..b66052f --- /dev/null +++ b/usr/src/libm/src/Q/lgammal.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgammal.c 1.12 06/01/31 SMI" + +#pragma weak lgammal = __lgammal + +/* + * long double lgammal(long double x); + */ + +#include "libm.h" +#include "longdouble.h" + +extern int signgam; +extern int signgaml; + +long double +lgammal(long double x) { + long double y = __k_lgammal(x, &signgaml); + + signgam = signgaml; /* SUSv3 requires the setting of signgam */ + return (y); +} diff --git a/usr/src/libm/src/Q/lgammal_r.c b/usr/src/libm/src/Q/lgammal_r.c new file mode 100644 index 0000000..6f1d8f5 --- /dev/null +++ b/usr/src/libm/src/Q/lgammal_r.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgammal_r.c 1.5 06/01/31 SMI" + +/* + * long double lgammal_r(long double x, int *signgamlp); + */ + +#pragma weak lgammal_r = __lgammal_r + +#include "libm.h" +#include "longdouble.h" + +long double +lgammal_r(long double x, int *signgamlp) { + return (__k_lgammal(x, signgamlp)); +} diff --git a/usr/src/libm/src/Q/log10l.c b/usr/src/libm/src/Q/log10l.c new file mode 100644 index 0000000..57696fa --- /dev/null +++ b/usr/src/libm/src/Q/log10l.c @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log10l.c 1.9 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak log10l = __log10l +#endif + +/* + * log10l(X) + * + * Method : + * Let log10_2hi = leading 98(SPARC)/49(x86) bits of log10(2) and + * log10_2lo = log10(2) - log10_2hi, + * ivln10 = 1/log(10) rounded. + * Then + * n = ilogb(x), + * if(n<0) n = n+1; + * x = scalbn(x,-n); + * LOG10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) + * + * Note1: + * For fear of destroying log10(10**n)=n, the rounding mode is + * set to Round-to-Nearest. + * + * Special cases: + * log10(x) is NaN with signal if x < 0; + * log10(+INF) is +INF with no signal; log10(0) is -INF with signal; + * log10(NaN) is that NaN with no signal; + * log10(10**N) = N for N=0,1,...,22. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#include "libm.h" + +#if defined(__i386) +#define __swapRD __swap87RD +#endif +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +static const long double + zero = 0.0L, + ivln10 = 4.342944819032518276511289189166050822944e-0001L, + one = 1.0L, +#if defined(__i386) + log10_2hi = 3.010299956639803653501985536422580480576e-01L, + log10_2lo = 8.298635403410822349787106337291183585413e-16L; +#elif defined(__sparc) + log10_2hi = 3.010299956639811952137388947242098603469e-01L, + log10_2lo = 2.831664213089468167896664371953210945664e-31L; +#else +#error Unknown Architecture! +#endif + +long double +log10l(long double x) { + long double y, z; + enum fp_direction_type rd; + int n; + + if (!finitel(x)) + return (x + fabsl(x)); /* x is +-INF or NaN */ + else if (x > zero) { + n = ilogbl(x); + if (n < 0) + n += 1; + rd = __swapRD(fp_nearest); + y = n; + x = scalbnl(x, -n); + z = y * log10_2lo + ivln10 * logl(x); + z += y * log10_2hi; + if (rd != fp_nearest) + (void) __swapRD(rd); + return (z); + } else if (x == zero) /* -INF */ + return (-one / zero); + else /* x <0 , return NaN */ + return (zero / zero); +} diff --git a/usr/src/libm/src/Q/log1pl.c b/usr/src/libm/src/Q/log1pl.c new file mode 100644 index 0000000..7088be1 --- /dev/null +++ b/usr/src/libm/src/Q/log1pl.c @@ -0,0 +1,216 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log1pl.c 1.18 06/01/31 SMI" + +#ifdef __LITTLE_ENDIAN +#define H0(x) *(3 + (int *) &x) +#define H1(x) *(2 + (int *) &x) +#define H2(x) *(1 + (int *) &x) +#define H3(x) *(int *) &x +#else +#define H0(x) *(int *) &x +#define H1(x) *(1 + (int *) &x) +#define H2(x) *(2 + (int *) &x) +#define H3(x) *(3 + (int *) &x) +#endif + +/* + * log1pl(x) + * Table look-up algorithm by modifying logl.c + * By K.C. Ng, July 6, 1995 + * + * (a). For 1+x in [31/33,33/31], using a special approximation: + * s = x/(2.0+x); ... here |s| <= 0.03125 + * z = s*s; + * return x-s*(x-z*(B1+z*(B2+z*(B3+z*(B4+...+z*B9)...)))); + * (i.e., x is in [-2/33,2/31]) + * + * (b). Otherwise, normalize 1+x = 2^n * 1.f. + * Here we may need a correction term for 1+x rounded. + * Use a 6-bit table look-up: find a 6 bit g that match f to 6.5 bits, + * then + * log(1+x) = n*ln2 + log(1.g) + log(1.f/1.g). + * Here the leading and trailing values of log(1.g) are obtained from + * a size-64 table. + * For log(1.f/1.g), let s = (1.f-1.g)/(1.f+1.g). Note that + * 1.f = 2^-n(1+x) + * + * then + * log(1.f/1.g) = log((1+s)/(1-s)) = 2s + 2/3 s^3 + 2/5 s^5 +... + * Note that |s|<2**-8=0.00390625. We use an odd s-polynomial + * approximation to compute log(1.f/1.g): + * s*(A1+s^2*(A2+s^2*(A3+s^2*(A4+s^2*(A5+s^2*(A6+s^2*A7)))))) + * (Precision is 2**-136.91 bits, absolute error) + * + * CAUTION: + * For x>=1, compute 1+x will lost one bit (OK). + * For x in [-0.5,-1), 1+x is exact. + * For x in (-0.5,-2/33]U[2/31,1), up to 4 last bits of x will be lost + * in 1+x. Therefore, to recover the lost bits, one need to compute + * 1.f-1.g accurately. + * + * Let hx = HI(x), m = (hx>>16)-0x3fff (=ilogbl(x)), note that + * -2/33 = -0.0606...= 2^-5 * 1.939..., + * 2/31 = 0.09375 = 2^-4 * 1.500..., + * so for x in (-0.5,-2/33], -5<=m<=-2, n= -1, 1+f=2*(1+x) + * for x in [2/33,1), -4<=m<=-1, n= 0, f=x + * + * In short: + * if x>0, let g: hg= ((hx + (0x200<<(-m)))>>(10-m))<<(10-m) + * then 1.f-1.g = x-g + * if x<0, let g': hg' =((ix-(0x200)<<(-m-1))>>(9-m))<<(9-m) + * (ix=hx&0x7fffffff) + * then 1.f-1.g = 2*(g'+x), + * + * (c). The final result is computed by + * (n*ln2_hi+_TBL_logl_hi[j]) + + * ( (n*ln2_lo+_TBL_logl_lo[j]) + s*(A1+...) ) + * + * Note. + * For ln2_hi and _TBL_logl_hi[j], we force their last 32 bit to be zero + * so that n*ln2_hi + _TBL_logl_hi[j] is exact. Here + * _TBL_logl_hi[j] + _TBL_logl_lo[j] match log(1+j*2**-6) to 194 bits + * + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#pragma weak log1pl = __log1pl + +#include "libm.h" + +extern const long double _TBL_logl_hi[], _TBL_logl_lo[]; + +static const long double +zero = 0.0L, +one = 1.0L, +two = 2.0L, +ln2hi = 6.931471805599453094172319547495844850203e-0001L, +ln2lo = 1.667085920830552208890449330400379754169e-0025L, +A1 = 2.000000000000000000000000000000000000024e+0000L, +A2 = 6.666666666666666666666666666666091393804e-0001L, +A3 = 4.000000000000000000000000407167070220671e-0001L, +A4 = 2.857142857142857142730077490612903681164e-0001L, +A5 = 2.222222222222242577702836920812882605099e-0001L, +A6 = 1.818181816435493395985912667105885828356e-0001L, +A7 = 1.538537835211839751112067512805496931725e-0001L, +B1 = 6.666666666666666666666666666666961498329e-0001L, +B2 = 3.999999999999999999999999990037655042358e-0001L, +B3 = 2.857142857142857142857273426428347457918e-0001L, +B4 = 2.222222222222222221353229049747910109566e-0001L, +B5 = 1.818181818181821503532559306309070138046e-0001L, +B6 = 1.538461538453809210486356084587356788556e-0001L, +B7 = 1.333333344463358756121456892645178795480e-0001L, +B8 = 1.176460904783899064854645174603360383792e-0001L, +B9 = 1.057293869956598995326368602518056990746e-0001L; + +long double +log1pl(long double x) { + long double f, s, z, qn, h, t, y, g; + int i, j, ix, iy, n, hx, m; + + hx = H0(x); + ix = hx & 0x7fffffff; + if (ix < 0x3ffaf07c) { /* |x|<2/33 */ + if (ix <= 0x3f8d0000) { /* x <= 2**-114, return x */ + if ((int) x == 0) + return (x); + } + s = x / (two + x); /* |s|<2**-8 */ + z = s * s; + return (x - s * (x - z * (B1 + z * (B2 + z * (B3 + z * (B4 + + z * (B5 + z * (B6 + z * (B7 + z * (B8 + z * B9)))))))))); + } + if (ix >= 0x7fff0000) { /* x is +inf or NaN */ + return (x + fabsl(x)); + } + if (hx < 0 && ix >= 0x3fff0000) { + if (ix > 0x3fff0000 || (H1(x) | H2(x) | H3(x)) != 0) + x = zero; + return (x / zero); /* log1p(x) is NaN if x<-1 */ + /* log1p(-1) is -inf */ + } + if (ix >= 0x7ffeffff) + y = x; /* avoid spurious overflow */ + else + y = one + x; + iy = H0(y); + n = ((iy + 0x200) >> 16) - 0x3fff; + iy = (iy & 0x0000ffff) | 0x3fff0000; /* scale 1+x to [1,2] */ + H0(y) = iy; + z = zero; + m = (ix >> 16) - 0x3fff; + /* HI(1+x) = (((hx&0xffff)|0x10000)>>(-m))|0x3fff0000 */ + if (n == 0) { /* x in [2/33,1) */ + g = zero; + H0(g) = ((hx + (0x200 << (-m))) >> (10 - m)) << (10 - m); + t = x - g; + i = (((((hx & 0xffff) | 0x10000) >> (-m)) | 0x3fff0000) + + 0x200) >> 10; + H0(z) = i << 10; + + } else if ((1 + n) == 0 && (ix < 0x3ffe0000)) { /* x in (-0.5,-2/33] */ + g = zero; + H0(g) = ((ix + (0x200 << (-m - 1))) >> (9 - m)) << (9 - m); + t = g + x; + t = t + t; + /* + * HI(2*(1+x)) = + * ((0x10000-(((hx&0xffff)|0x10000)>>(-m)))<<1)|0x3fff0000 + */ + /* + * i = + * ((((0x10000-(((hx&0xffff)|0x10000)>>(-m)))<<1)|0x3fff0000)+ + * 0x200)>>10; H0(z)=i<<10; + */ + z = two * (one - g); + i = H0(z) >> 10; + } else { + i = (iy + 0x200) >> 10; + H0(z) = i << 10; + t = y - z; + } + + s = t / (y + z); + j = i & 0x3f; + z = s * s; + qn = (long double) n; + t = qn * ln2lo + _TBL_logl_lo[j]; + h = qn * ln2hi + _TBL_logl_hi[j]; + f = t + s * (A1 + z * (A2 + z * (A3 + z * (A4 + z * (A5 + z * (A6 + + z * A7)))))); + return (h + f); +} diff --git a/usr/src/libm/src/Q/log2l.c b/usr/src/libm/src/Q/log2l.c new file mode 100644 index 0000000..74cc06e --- /dev/null +++ b/usr/src/libm/src/Q/log2l.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log2l.c 1.6 06/01/31 SMI" + +/* + * log2l(x) + * RETURN THE BASE 2 LOGARITHM OF X + * + * Method: + * purge off 0,INF, and NaN. + * n = ilogb(x) + * if(n<0) n+=1 + * z = scalbn(x,-n) + * LOG2(x) = n + (1/ln2)*log(x) + */ + +#pragma weak log2l = __log2l + +#include "libm.h" + +static const long double + zero = 0.0L, + half = 0.5L, + one = 1.0L, + invln2 = 1.442695040888963407359924681001892137427e+0000L; + +long double +log2l(long double x) { + int n; + + if (x == zero || !finitel(x)) + return (logl(x)); + n = ilogbl(x); + if (n < 0) + n += 1; + x = scalbnl(x, -n); + if (x == half) + return (n - one); + return (n + invln2 * logl(x)); +} diff --git a/usr/src/libm/src/Q/logbl.c b/usr/src/libm/src/Q/logbl.c new file mode 100644 index 0000000..09e3083 --- /dev/null +++ b/usr/src/libm/src/Q/logbl.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)logbl.c 1.9 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak logbl = __logbl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(__sparc) +#define ISNORMALL(k, x) (k != 0x7fff) /* assuming k != 0 */ +#define X86PDNRM(k, x) +#define XSCALE_OFFSET 0x406f /* 0x3fff + 112 */ +static const long double xscale = 5192296858534827628530496329220096.0L; + /* 2^112 */ +#elif defined(__i386) +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM(k, x) if (k == 0 && (((int *) &x)[1] & 0x80000000) != 0) \ + ((int *) &x)[2] |= k = 1 +#if defined(HANDLE_UNSUPPORTED) /* assuming k != 0 */ +#define ISNORMALL(k, x) (k != 0x7fff && (((int *) &x)[1] & 0x80000000) != 0) +#else +#define ISNORMALL(k, x) (k != 0x7fff) +#endif +#define XSCALE_OFFSET 0x403e /* 0x3fff + 63 */ +static const long double xscale = 9223372036854775808.0L; /* 2^63 */ +#endif + +static long double +raise_division(long double v) { +#pragma STDC FENV_ACCESS ON + static const long double zero = 0.0L; + return (v / zero); +} + +long double +logbl(long double x) { + int k = XBIASED_EXP(x); + + X86PDNRM(k, x); + if (k == 0) { + if (ISZEROL(x)) + return (raise_division(-1.0L)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { + x *= xscale; /* scale up by 2^112 or 2^63 */ + return (long double) (XBIASED_EXP(x) - XSCALE_OFFSET); + } else + return (-16382.L); + } else if (ISNORMALL(k, x)) + return ((long double) (k - 0x3fff)); + else + return (x * x); +} diff --git a/usr/src/libm/src/Q/logl.c b/usr/src/libm/src/Q/logl.c new file mode 100644 index 0000000..9d76ca4 --- /dev/null +++ b/usr/src/libm/src/Q/logl.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)logl.c 1.8 06/01/31 SMI" + +#pragma weak logl = __logl + +/* + * logl(x) + * Table look-up algorithm + * By K.C. Ng, March 6, 1989 + * + * (a). For x in [31/33,33/31], using a special approximation: + * f = x - 1; + * s = f/(2.0+f); ... here |s| <= 0.03125 + * z = s*s; + * return f-s*(f-z*(B1+z*(B2+z*(B3+z*(B4+...+z*B9)...)))); + * + * (b). Otherwise, normalize x = 2^n * 1.f. + * Use a 6-bit table look-up: find a 6 bit g that match f to 6.5 bits, + * then + * log(x) = n*ln2 + log(1.g) + log(1.f/1.g). + * Here the leading and trailing values of log(1.g) are obtained from + * a size-64 table. + * For log(1.f/1.g), let s = (1.f-1.g)/(1.f+1.g), then + * log(1.f/1.g) = log((1+s)/(1-s)) = 2s + 2/3 s^3 + 2/5 s^5 +... + * Note that |s|<2**-8=0.00390625. We use an odd s-polynomial + * approximation to compute log(1.f/1.g): + * s*(A1+s^2*(A2+s^2*(A3+s^2*(A4+s^2*(A5+s^2*(A6+s^2*A7)))))) + * (Precision is 2**-136.91 bits, absolute error) + * + * (c). The final result is computed by + * (n*ln2_hi+_TBL_logl_hi[j]) + + * ( (n*ln2_lo+_TBL_logl_lo[j]) + s*(A1+...) ) + * + * Note. + * For ln2_hi and _TBL_logl_hi[j], we force their last 32 bit to be zero + * so that n*ln2_hi + _TBL_logl_hi[j] is exact. Here + * _TBL_logl_hi[j] + _TBL_logl_lo[j] match log(1+j*2**-6) to 194 bits + * + * + * Special cases: + * log(x) is NaN with signal if x < 0 (including -INF) ; + * log(+INF) is +INF; log(0) is -INF with signal; + * log(NaN) is that NaN with no signal. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +#include "libm.h" + +extern const long double _TBL_logl_hi[], _TBL_logl_lo[]; + +static const long double + zero = 0.0L, + one = 1.0L, + two = 2.0L, + two113 = 10384593717069655257060992658440192.0L, + ln2hi = 6.931471805599453094172319547495844850203e-0001L, + ln2lo = 1.667085920830552208890449330400379754169e-0025L, + A1 = 2.000000000000000000000000000000000000024e+0000L, + A2 = 6.666666666666666666666666666666091393804e-0001L, + A3 = 4.000000000000000000000000407167070220671e-0001L, + A4 = 2.857142857142857142730077490612903681164e-0001L, + A5 = 2.222222222222242577702836920812882605099e-0001L, + A6 = 1.818181816435493395985912667105885828356e-0001L, + A7 = 1.538537835211839751112067512805496931725e-0001L, + B1 = 6.666666666666666666666666666666961498329e-0001L, + B2 = 3.999999999999999999999999990037655042358e-0001L, + B3 = 2.857142857142857142857273426428347457918e-0001L, + B4 = 2.222222222222222221353229049747910109566e-0001L, + B5 = 1.818181818181821503532559306309070138046e-0001L, + B6 = 1.538461538453809210486356084587356788556e-0001L, + B7 = 1.333333344463358756121456892645178795480e-0001L, + B8 = 1.176460904783899064854645174603360383792e-0001L, + B9 = 1.057293869956598995326368602518056990746e-0001L; + +long double +logl(long double x) { + long double f, s, z, qn, h, t; + int *px = (int *) &x; + int *pz = (int *) &z; + int i, j, ix, i0, i1, n; + + /* get long double precision word ordering */ + if (*(int *) &one == 0) { + i0 = 3; + i1 = 0; + } else { + i0 = 0; + i1 = 3; + } + + n = 0; + ix = px[i0]; + if (ix > 0x3ffee0f8) { /* if x > 31/33 */ + if (ix < 0x3fff1084) { /* if x < 33/31 */ + f = x - one; + z = f * f; + if (((ix - 0x3fff0000) | px[i1] | px[2] | px[1]) == 0) { + return (zero); /* log(1)= +0 */ + } + s = f / (two + f); /* |s|<2**-8 */ + z = s * s; + return (f - s * (f - z * (B1 + z * (B2 + z * (B3 + + z * (B4 + z * (B5 + z * (B6 + z * (B7 + + z * (B8 + z * B9)))))))))); + } + if (ix >= 0x7fff0000) + return (x + x); /* x is +inf or NaN */ + goto LARGE_N; + } + if (ix >= 0x00010000) + goto LARGE_N; + i = ix & 0x7fffffff; + if ((i | px[i1] | px[2] | px[1]) == 0) { + px[i0] |= 0x80000000; + return (one / x); /* log(0.0) = -inf */ + } + if (ix < 0) { + if ((unsigned) ix >= 0xffff0000) + return (x - x); /* x is -inf or NaN */ + return (zero / zero); /* log(x<0) is NaN */ + } + /* subnormal x */ + x *= two113; + n = -113; + ix = px[i0]; +LARGE_N: + n += ((ix + 0x200) >> 16) - 0x3fff; + ix = (ix & 0x0000ffff) | 0x3fff0000; /* scale x to [1,2] */ + px[i0] = ix; + i = ix + 0x200; + pz[i0] = i & 0xfffffc00; + pz[i1] = pz[1] = pz[2] = 0; + s = (x - z) / (x + z); + j = (i >> 10) & 0x3f; + z = s * s; + qn = (long double) n; + t = qn * ln2lo + _TBL_logl_lo[j]; + h = qn * ln2hi + _TBL_logl_hi[j]; + f = t + s * (A1 + z * (A2 + z * (A3 + z * (A4 + z * (A5 + + z * (A6 + z * A7)))))); + return (h + f); +} diff --git a/usr/src/libm/src/Q/longdouble.h b/usr/src/libm/src/Q/longdouble.h new file mode 100644 index 0000000..aa34da1 --- /dev/null +++ b/usr/src/libm/src/Q/longdouble.h @@ -0,0 +1,155 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)longdouble.h 1.10 06/01/31 SMI" + +#include + +extern long double __k_cosl(long double, long double); +extern long double __k_lgammal(long double, int *); +extern long double __k_sincosl(long double, long double, long double *); +extern long double __k_sinl(long double, long double); +extern long double __k_tanl(long double, long double, int); +extern long double __poly_libmq(long double, int, const long double *); +extern int __rem_pio2l(long double, long double *); + +extern long double acosdl(long double); +extern long double acoshl(long double); +extern long double acosl(long double); +extern long double acospil(long double); +extern long double acospl(long double); +extern long double aintl(long double); +extern long double anintl(long double); +extern long double annuityl(long double, long double); +extern long double asindl(long double); +extern long double asinhl(long double); +extern long double asinl(long double); +extern long double asinpil(long double); +extern long double asinpl(long double); +extern long double atan2dl(long double, long double); +extern long double atan2l(long double, long double); +extern long double atan2pil(long double, long double); +extern long double atandl(long double); +extern long double atanhl(long double); +extern long double atanl(long double); +extern long double atanpil(long double); +extern long double atanpl(long double); +extern long double cbrtl(long double); +extern long double ceill(long double); +extern long double compoundl(long double, long double); +extern long double copysignl(long double, long double); +extern long double cosdl(long double); +extern long double coshl(long double); +extern long double cosl(long double); +extern long double cospil(long double); +extern long double cospl(long double); +extern long double erfcl(long double); +extern long double erfl(long double); +extern long double exp10l(long double); +extern long double exp2l(long double); +extern long double expl(long double); +extern long double expm1l(long double); +extern long double fabsl(long double); +extern int finitel(long double); +extern long double floorl(long double); +extern long double fmodl(long double, long double); +extern enum fp_class_type fp_classl(long double); +extern long double gammal(long double); +extern long double hypotl(long double, long double); +extern int ilogbl(long double); +extern long double infinityl(void); +extern int irintl(long double); +extern int isinfl(long double); +extern int isnanl(long double); +extern int isnormall(long double); +extern int issubnormall(long double); +extern int iszerol(long double); +extern long double j0l(long double); +extern long double j1l(long double); +extern long double jnl(int, long double); +extern long double lgammal(long double); +extern long double log10l(long double); +extern long double log1pl(long double); +extern long double log2l(long double); +extern long double logbl(long double); +extern long double logl(long double); +extern long double max_normall(void); +extern long double max_subnormall(void); +extern long double min_normall(void); +extern long double min_subnormall(void); +extern long double nextafterl(long double, long double); +extern int nintl(long double); +extern long double pow_li(long double *, int *); +extern long double powl(long double, long double); +extern long double quiet_nanl(long); +extern long double remainderl(long double, long double); +extern long double rintl(long double); +extern long double scalbl(long double, long double); +extern long double scalbnl(long double, int); +extern long double signaling_nanl(long); +extern int signbitl(long double); +extern long double significandl(long double); +extern void sincosdl(long double, long double *, long double *); +extern void sincosl(long double, long double *, long double *); +extern void sincospil(long double, long double *, long double *); +extern void sincospl(long double, long double *, long double *); +extern long double sindl(long double); +extern long double sinhl(long double); +extern long double sinl(long double); +extern long double sinpil(long double); +extern long double sinpl(long double); +extern long double sqrtl(long double); +extern long double tandl(long double); +extern long double tanhl(long double); +extern long double tanl(long double); +extern long double tanpil(long double); +extern long double tanpl(long double); +extern long double y0l(long double); +extern long double y1l(long double); +extern long double ynl(int, long double); + +extern long double q_copysign_(long double *, long double *); +extern long double q_fabs_(long double *); +extern int iq_finite_(long double *); +extern long double q_fmod_(long double *, long double *); +extern enum fp_class_type iq_fp_class_(long double *); +extern int iq_ilogb_(long double *); +extern long double q_infinity_(void); +extern int iq_isinf_(long double *); +extern int iq_isnan_(long double *); +extern int iq_isnormal_(long double *); +extern int iq_issubnormal_(long double *); +extern int iq_iszero_(long double *); +extern long double q_max_normal_(void); +extern long double q_max_subnormal_(void); +extern long double q_min_normal_(void); +extern long double q_min_subnormal_(void); +extern long double q_nextafter_(long double *, long double *); +extern long double q_quiet_nan_(long *); +extern long double q_remainder_(long double *, long double *); +extern long double q_scalbn_(long double *, int *); +extern long double q_signaling_nan_(long *); +extern int iq_signbit_(long double *); diff --git a/usr/src/libm/src/Q/nextafterl.c b/usr/src/libm/src/Q/nextafterl.c new file mode 100644 index 0000000..f1d042f --- /dev/null +++ b/usr/src/libm/src/Q/nextafterl.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nextafterl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nextafterl = __nextafterl +#endif + +#include "libm.h" +#include /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__i386) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nextafterl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/libm/src/Q/powl.c b/usr/src/libm/src/Q/powl.c new file mode 100644 index 0000000..f1fdb44 --- /dev/null +++ b/usr/src/libm/src/Q/powl.c @@ -0,0 +1,319 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)powl.c 1.18 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak powl = __powl +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_pow _C99SUSv3_pow_treats_Inf_as_an_even_int + +#if defined(__sparc) +#define i0 0 +#define i1 1 +#define i2 2 +#define i3 3 + +static const long double zero = 0.0L, one = 1.0L, two = 2.0L; + +extern const long double _TBL_logl_hi[], _TBL_logl_lo[]; + +static const long double + two113 = 10384593717069655257060992658440192.0L, + ln2hi = 6.931471805599453094172319547495844850203e-0001L, + ln2lo = 1.667085920830552208890449330400379754169e-0025L, + A2 = 6.666666666666666666666666666666091393804e-0001L, + A3 = 4.000000000000000000000000407167070220671e-0001L, + A4 = 2.857142857142857142730077490612903681164e-0001L, + A5 = 2.222222222222242577702836920812882605099e-0001L, + A6 = 1.818181816435493395985912667105885828356e-0001L, + A7 = 1.538537835211839751112067512805496931725e-0001L, + B1 = 6.666666666666666666666666666666666667787e-0001L, + B2 = 3.999999999999999999999999999999848524411e-0001L, + B3 = 2.857142857142857142857142865084581075070e-0001L, + B4 = 2.222222222222222222222010781800643808497e-0001L, + B5 = 1.818181818181818185051442171337036403674e-0001L, + B6 = 1.538461538461508363540720286292008207673e-0001L, + B7 = 1.333333333506731842033180638329317108428e-0001L, + B8 = 1.176469984587418890634302788283946761670e-0001L, + B9 = 1.053794891561452331722969901564862497132e-0001L; + +static long double +logl_x(long double x, long double *w) { + long double f, f1, v, s, z, qn, h, t; + int *px = (int *) &x; + int *pz = (int *) &z; + int i, j, ix, n; + + n = 0; + ix = px[i0]; + if (ix > 0x3ffef03f && ix < 0x3fff0820) { /* 65/63 > x > 63/65 */ + f = x - one; + z = f * f; + if (((ix - 0x3fff0000) | px[i1] | px[i2] | px[i3]) == 0) { + *w = zero; + return (zero); /* log(1)= +0 */ + } + qn = one / (two + f); + s = f * qn; /* |s|<2**-6 */ + v = s * s; + h = (long double) (2.0 * (double) s); + f1 = (long double) ((double) f); + t = ((two * (f - h) - h * f1) - h * (f - f1)) * qn + + s * (v * (B1 + v * (B2 + v * (B3 + v * (B4 + + v * (B5 + v * (B6 + v * (B7 + v * (B8 + v * B9))))))))); + s = (long double) ((double) (h + t)); + *w = t - (s - h); + return (s); + } + if (ix < 0x00010000) { /* subnormal x */ + x *= two113; + n = -113; + ix = px[i0]; + } + /* LARGE_N */ + n += ((ix + 0x200) >> 16) - 0x3fff; + ix = (ix & 0x0000ffff) | 0x3fff0000; /* scale x to [1,2] */ + px[i0] = ix; + i = ix + 0x200; + pz[i0] = i & 0xfffffc00; + pz[i1] = pz[i2] = pz[i3] = 0; + qn = one / (x + z); + f = x - z; + s = f * qn; + f1 = (long double) ((double) f); + h = (long double) (2.0 * (double) s); + t = qn * ((two * (f - z * h) - h * f1) - h * (f - f1)); + j = (i >> 10) & 0x3f; + v = s * s; + qn = (long double) n; + t += qn * ln2lo + _TBL_logl_lo[j]; + t += s * (v * (A2 + v * (A3 + v * (A4 + v * (A5 + v * (A6 + + v * A7)))))); + v = qn * ln2hi + _TBL_logl_hi[j]; + s = h + v; + t += (h - (s - v)); + z = (long double) ((double) (s + t)); + *w = t - (z - s); + return (z); +} + +extern const long double _TBL_expl_hi[], _TBL_expl_lo[]; +static const long double + invln2_32 = 4.616624130844682903551758979206054839765e+1L, + ln2_32hi = 2.166084939249829091928849858592451515688e-2L, + ln2_32lo = 5.209643502595475652782654157501186731779e-27L, + ln2_64 = 1.083042469624914545964425189778400898568e-2L; + +long double +powl(long double x, long double y) { + long double z, ax; + long double y1, y2, w1, w2; + int sbx, sby, j, k, yisint, m; + int hx, lx, hy, ly, ahx, ahy; + int *pz = (int *) &z; + int *px = (int *) &x; + int *py = (int *) &y; + + hx = px[i0]; + lx = px[i1] | px[i2] | px[i3]; + hy = py[i0]; + ly = py[i1] | py[i2] | py[i3]; + ahx = hx & ~0x80000000; + ahy = hy & ~0x80000000; + + if ((ahy | ly) == 0) + return (one); /* x**+-0 = 1 */ + else if (hx == 0x3fff0000 && lx == 0 && + (__xpg6 & _C99SUSv3_pow) != 0) + return (one); /* C99: 1**anything = 1 */ + else if (ahx > 0x7fff0000 || (ahx == 0x7fff0000 && lx != 0) || + ahy > 0x7fff0000 || (ahy == 0x7fff0000 && ly != 0)) + return (x + y); /* +-NaN return x+y */ + /* includes Sun: 1**NaN = NaN */ + sbx = (unsigned) hx >> 31; + sby = (unsigned) hy >> 31; + ax = fabsl(x); + /* + * determine if y is an odd int when x < 0 + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + yisint = 0; + if (sbx) { + if (ahy >= 0x40700000) /* if |y|>=2**113 */ + yisint = 2; /* even integer y */ + else if (ahy >= 0x3fff0000) { + k = (ahy >> 16) - 0x3fff; /* exponent */ + if (k > 80) { + j = ((unsigned) py[i3]) >> (112 - k); + if ((j << (112 - k)) == py[i3]) + yisint = 2 - (j & 1); + } else if (k > 48) { + j = ((unsigned) py[i2]) >> (80 - k); + if ((j << (80 - k)) == py[i2]) + yisint = 2 - (j & 1); + } else if (k > 16) { + j = ((unsigned) py[i1]) >> (48 - k); + if ((j << (48 - k)) == py[i1]) + yisint = 2 - (j & 1); + } else if (ly == 0) { + j = ahy >> (16 - k); + if ((j << (16 - k)) == ahy) + yisint = 2 - (j & 1); + } + } + } + + /* special value of y */ + if (ly == 0) { + if (ahy == 0x7fff0000) { /* y is +-inf */ + if (((ahx - 0x3fff0000) | lx) == 0) { + if ((__xpg6 & _C99SUSv3_pow) != 0) + return (one); + /* C99: (-1)**+-inf = 1 */ + else + return (y - y); + /* Sun: (+-1)**+-inf = NaN */ + } else if (ahx >= 0x3fff0000) + /* (|x|>1)**+,-inf = inf,0 */ + return (sby == 0 ? y : zero); + else /* (|x|<1)**-,+inf = inf,0 */ + return (sby != 0 ? -y : zero); + } else if (ahy == 0x3fff0000) { /* y is +-1 */ + if (sby != 0) + return (one / x); + else + return (x); + } else if (hy == 0x40000000) /* y is 2 */ + return (x * x); + else if (hy == 0x3ffe0000) { /* y is 0.5 */ + if (!((ahx | lx) == 0 || ((ahx - 0x7fff0000) | lx) == + 0)) + return (sqrtl(x)); + } + } + + /* special value of x */ + if (lx == 0) { + if (ahx == 0x7fff0000 || ahx == 0 || ahx == 0x3fff0000) { + /* x is +-0,+-inf,+-1 */ + z = ax; + if (sby == 1) + z = one / z; /* z = 1/|x| if y is negative */ + if (sbx == 1) { + if (ahx == 0x3fff0000 && yisint == 0) + z = zero / zero; + /* (-1)**non-int is NaN */ + else if (yisint == 1) + z = -z; /* (x<0)**odd = -(|x|**odd) */ + } + return (z); + } + } + + /* (x<0)**(non-int) is NaN */ + if (sbx == 1 && yisint == 0) + return (zero / zero); /* should be volatile */ + + /* Now ax is finite, y is finite */ + /* first compute log(ax) = w1+w2, with 53 bits w1 */ + w1 = logl_x(ax, &w2); + + /* split up y into y1+y2 and compute (y1+y2)*(w1+w2) */ + if (ly == 0 || ahy >= 0x43fe0000) { + y1 = y * w1; + y2 = y * w2; + } else { + y1 = (long double) ((double) y); + y2 = (y - y1) * w1 + y * w2; + y1 *= w1; + } + z = y1 + y2; + j = pz[i0]; + if ((unsigned) j >= 0xffff0000) { /* NaN or -inf */ + if (sbx == 1 && yisint == 1) + return (one / z); + else + return (-one / z); + } else if ((j & ~0x80000000) < 0x3fc30000) { /* |x|<2^-60 */ + if (sbx == 1 && yisint == 1) + return (-one - z); + else + return (one + z); + } else if (j > 0) { + if (j > 0x400d0000) { + if (sbx == 1 && yisint == 1) + return (scalbnl(-one, 20000)); + else + return (scalbnl(one, 20000)); + } + k = (int) (invln2_32 * (z + ln2_64)); + } else { + if ((unsigned) j > 0xc00d0000) { + if (sbx == 1 && yisint == 1) + return (scalbnl(-one, -20000)); + else + return (scalbnl(one, -20000)); + } + k = (int) (invln2_32 * (z - ln2_64)); + } + j = k & 0x1f; + m = k >> 5; + { + /* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ + long double + t1 = 1.666666666666666666666666666660876387437e-1L, + t2 = -2.777777777777777777777707812093173478756e-3L, + t3 = 6.613756613756613482074280932874221202424e-5L, + t4 = -1.653439153392139954169609822742235851120e-6L, + t5 = 4.175314851769539751387852116610973796053e-8L; + long double t = (long double) k; + + w1 = (y2 - (t * ln2_32hi - y1)) - t * ln2_32lo; + t = w1 * w1; + w2 = (w1 - t * (t1 + t * (t2 + t * (t3 + t * (t4 + t * t5))))) - + two; + z = _TBL_expl_hi[j] - ((_TBL_expl_hi[j] * (w1 + w1)) / w2 - + _TBL_expl_lo[j]); + } + j = m + (pz[i0] >> 16); + if (j && (unsigned) j < 0x7fff) + pz[i0] += m << 16; + else + z = scalbnl(z, m); + + if (sbx == 1 && yisint == 1) + z = -z; /* (-ve)**(odd int) */ + return (z); +} +#else +#error Unsupported Architecture +#endif /* defined(__sparc) */ diff --git a/usr/src/libm/src/Q/remainderl.c b/usr/src/libm/src/Q/remainderl.c new file mode 100644 index 0000000..35f94e7 --- /dev/null +++ b/usr/src/libm/src/Q/remainderl.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remainderl.c 1.7 06/01/31 SMI" + +#pragma weak remainderl = __remainderl + +#include "libm.h" + +/* + * remainderl(x,p) + * returns x REM p = x - [x/p]*p as if in infinite + * precise arithmetic, where [x/p] is the (inifinite bit) + * integer nearest x/p (in half way case choose the even one). + * Method : + * Based on fmodl() return x-[x/p]chopped*p exactly. + */ + +#define HFMAX 5.948657476786158825428796633140035080982e+4931L +#define DBMIN 6.724206286224187012525355634643505205196e-4932L + +static const long double + zero = 0.0L, + half = 0.5L, + hfmax = HFMAX, /* half of the maximum number */ + dbmin = DBMIN; /* double of the minimum (normal) number */ + +long double +remainderl(long double x, long double p) { + long double hp; + int sx; + + if (isnanl(p)) + return (x + p); + if (!finitel(x)) + return (x - x); + p = fabsl(p); + if (p <= hfmax) + x = fmodl(x, p + p); + sx = signbitl(x); + x = fabsl(x); + if (p < dbmin) { + if (x + x > p) { + if (x == p) + x = zero; + else + x -= p; /* avoid x-x=-0 in RM mode */ + if (x + x >= p) + x -= p; + } + } else { + hp = half * p; + if (x > hp) { + if (x == p) + x = zero; + else + x -= p; /* avoid x-x=-0 in RM mode */ + if (x >= hp) + x -= p; + } + } + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/Q/rintl.c b/usr/src/libm/src/Q/rintl.c new file mode 100644 index 0000000..e48a520 --- /dev/null +++ b/usr/src/libm/src/Q/rintl.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)rintl.c 1.10 06/01/31 SMI" + +#pragma weak rintl = __rintl + +/* + * rintl(long double x) return x rounded to integral according to + * the prevailing rounding direction + * + * NOTE: aintl(x), anintl(x), ceill(x), floorl(x), and rintl(x) return result + * with the same sign as x's, including 0.0L. + */ + +#include "libm.h" + +extern enum fp_precision_type __swapRP(enum fp_precision_type); + +static const double one = 1.0; +static const long double qzero = 0.0L; + +long double +rintl(long double x) { + enum fp_precision_type rp; + long double t, w, two112; + int *pt = (int *) &two112; + + if (!finitel(x)) + return (x + x); + + if (*(int *) &one != 0) { /* set two112 = 2^112 */ + pt[0] = 0x406f0000; + pt[1] = pt[2] = pt[3] = 0; + } else { + pt[3] = 0x406f0000; + pt[0] = pt[1] = pt[2] = 0; + } + + if (fabsl(x) >= two112) + return (x); /* already an integer */ + t = copysignl(two112, x); + rp = __swapRP(fp_extended); /* make sure precision is long double */ + w = x + t; /* x+sign(x)*2^112 rounded to integer */ + (void) __swapRP(rp); /* restore precision mode */ + if (w == t) + return (copysignl(qzero, x)); /* x rounded to zero */ + else + return (w - t); +} diff --git a/usr/src/libm/src/Q/rndintl.c b/usr/src/libm/src/Q/rndintl.c new file mode 100644 index 0000000..e66d24f --- /dev/null +++ b/usr/src/libm/src/Q/rndintl.c @@ -0,0 +1,114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)rndintl.c 1.10 06/01/23 SMI" + +#pragma weak aintl = __aintl +#pragma weak anintl = __anintl +#pragma weak irintl = __irintl +#pragma weak nintl = __nintl + +/* + * aintl(x) return x chopped to integral value + * anintl(x) return sign(x)*(|x|+0.5) chopped to integral value + * irintl(x) return rint(x) in integer format + * nintl(x) return anint(x) in integer format + * + * NOTE: aintl(x), anintl(x), ceill(x), floorl(x), and rintl(x) return result + * with the same sign as x's, including 0.0. + */ + +#include "libm.h" + +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +static const long double qone = 1.0L, qhalf = 0.5L, qmhalf = -0.5L; + +long double +aintl(long double x) { + long double t, w; + + if (!finitel(x)) + return (x + x); + w = fabsl(x); + t = rintl(w); + if (t <= w) + return (copysignl(t, x)); /* NaN or already aint(|x|) */ + else /* |t|>|x| case */ + return (copysignl(t - qone, x)); /* |t-1|*sign(x) */ +} + +long double +anintl(long double x) { + long double t, w, z; + + if (!finitel(x)) + return (x + x); + w = fabsl(x); + t = rintl(w); + if (t == w) + return (copysignl(t, x)); + z = t - w; + if (z > qhalf) + t = t - qone; + else if (z <= qmhalf) + t = t + qone; + return (copysignl(t, x)); +} + +int +irintl(long double x) { + enum fp_direction_type rd; + + rd = __swapRD(fp_nearest); + (void) __swapRD(rd); /* restore Rounding Direction */ + switch (rd) { + case fp_nearest: + if (x < 2147483647.5L && x >= -2147483648.5L) + return ((int)rintl(x)); + break; + case fp_tozero: + if (x < 2147483648.0L && x > -2147483649.0L) + return ((int)rintl(x)); + break; + case fp_positive: + if (x <= 2147483647.0L && x > -2147483649.0L) + return ((int)rintl(x)); + break; + case fp_negative: + if (x < 2147483648.0L && x >= -2147483648.0L) + return ((int)rintl(x)); + break; + } + return ((int)copysignl(1.0e100L, x)); +} + +int +nintl(long double x) { + if ((x < 2147483647.5L) && (x > -2147483648.5L)) + return ((int)anintl(x)); + else + return ((int)copysignl(1.0e100L, x)); +} diff --git a/usr/src/libm/src/Q/scalbl.c b/usr/src/libm/src/Q/scalbl.c new file mode 100644 index 0000000..48bd811 --- /dev/null +++ b/usr/src/libm/src/Q/scalbl.c @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbl.c 1.8 06/01/31 SMI" + +#pragma weak scalbl = __scalbl + +/* + * scalbl(x,n): return x * 2^n by manipulating exponent. + */ + +#include "libm.h" + +#define n0 0 + +long double +scalbl(long double x, long double fn) { + int *py = (int *) &fn, n; + long double z; + + if (isnanl(x) || isnanl(fn)) + return (x * fn); + + /* + * fn is inf or NaN + */ + if ((py[n0] & 0x7fff0000) == 0x7fff0000) { + if ((py[n0] & 0x80000000) != 0) + return (x / (-fn)); + else + return (x * fn); + } + if (rintl(fn) != fn) + return ((fn - fn) / (fn - fn)); + if (fn > 65000.0L) + z = scalbnl(x, 65000); + else if (-fn > 65000.0L) + z = scalbnl(x, -65000); + else { + n = (int) fn; + z = scalbnl(x, n); + } + return (z); +} diff --git a/usr/src/libm/src/Q/scalbnl.c b/usr/src/libm/src/Q/scalbnl.c new file mode 100644 index 0000000..689af20 --- /dev/null +++ b/usr/src/libm/src/Q/scalbnl.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbnl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalbnl = __scalbnl +#endif + +#include "libm.h" +#include /* LDBL_MAX, LDBL_MIN */ +#include /* abs */ + +#if defined(__sparc) +#define XSET_EXP(k, x) ((int *) &x)[0] = (((int *) &x)[0] & ~0x7fff0000) | \ + (k << 16) +#define ISINFNANL(k, x) (k == 0x7fff) +#define XTWOT_OFFSET 113 +static const long double xtwot = 10384593717069655257060992658440192.0L, + /* 2^113 */ + twomtm1 = 4.814824860968089632639944856462318296E-35L; /* 2^-114 */ +#elif defined(__i386) +#define XSET_EXP(k, x) ((int *) &x)[2] = (((int *) &x)[2] & ~0x7fff) | k +#if defined(HANDLE_UNSUPPORTED) +#define ISINFNANL(k, x) (k == 0x7fff || k != 0 && \ + (((int *) &x)[1] & 0x80000000) == 0) +#else +#define ISINFNANL(k, x) (k == 0x7fff) +#endif +#define XTWOT_OFFSET 64 +static const long double xtwot = 18446744073709551616.0L, /* 2^64 */ + twomtm1 = 2.7105054312137610850186E-20L; /* 2^-65 */ +#endif + +long double +scalbnl(long double x, int n) { + int k = XBIASED_EXP(x); + + if (ISINFNANL(k, x)) + return (x + x); + if (ISZEROL(x) || n == 0) + return (x); + if (k == 0) { + x *= xtwot; + k = XBIASED_EXP(x) - XTWOT_OFFSET; + } + if ((unsigned) abs(n) >= 131072) /* cast to unsigned for -2^31 */ + n >>= 1; /* avoid subsequent integer overflow */ + k += n; + if (k > 0x7ffe) + return (LDBL_MAX * copysignl(LDBL_MAX, x)); + if (k <= -XTWOT_OFFSET - 1) + return (LDBL_MIN * copysignl(LDBL_MIN, x)); + if (k > 0) { + XSET_EXP(k, x); + return (x); + } + k += XTWOT_OFFSET + 1; + XSET_EXP(k, x); + return (x * twomtm1); +} diff --git a/usr/src/libm/src/Q/signgaml.c b/usr/src/libm/src/Q/signgaml.c new file mode 100644 index 0000000..5b1a587 --- /dev/null +++ b/usr/src/libm/src/Q/signgaml.c @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)signgaml.c 1.4 06/01/31 SMI" + +#pragma weak signgaml = __signgaml + +#include "libm.h" + +int signgaml = 0; diff --git a/usr/src/libm/src/Q/significandl.c b/usr/src/libm/src/Q/significandl.c new file mode 100644 index 0000000..8c31902 --- /dev/null +++ b/usr/src/libm/src/Q/significandl.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)significandl.c 1.8 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak significandl = __significandl +#endif + +#include "libm.h" + +long double +significandl(long double x) { + if (ISZEROL(x) || XBIASED_EXP(x) == 0x7fff) /* 0/+-Inf/NaN */ + return (x + x); + else + return (scalbnl(x, -ilogbl(x))); +} diff --git a/usr/src/libm/src/Q/sincosl.c b/usr/src/libm/src/Q/sincosl.c new file mode 100644 index 0000000..607d9b2 --- /dev/null +++ b/usr/src/libm/src/Q/sincosl.c @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincosl.c 1.7 06/01/31 SMI" + +/* + * sincosl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sincosl ... sin and cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak sincosl = __sincosl + +#include "libm.h" +#include "longdouble.h" + +void +sincosl(long double x, long double *s, long double *c) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + + /* |x| ~< pi/4 */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) + *s = __k_sincosl(x, z, c); + else if (ix >= 0x7fff0000) + *s = *c = x - x; /* trig(Inf or NaN) is NaN */ + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + *s = __k_sincosl(y[0], y[1], c); + break; + case 1: + *c = -__k_sincosl(y[0], y[1], s); + break; + case 2: + *s = -__k_sincosl(y[0], y[1], c); + *c = -*c; + break; + case 3: + *c = __k_sincosl(y[0], y[1], s); + *s = -*s; + break; + } + } +} diff --git a/usr/src/libm/src/Q/sincospil.c b/usr/src/libm/src/Q/sincospil.c new file mode 100644 index 0000000..9cd487c --- /dev/null +++ b/usr/src/libm/src/Q/sincospil.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincospil.c 1.11 06/01/31 SMI" + +#pragma weak sincospil = __sincospil + +/* + * void sincospil(long double x, long double *s, long double *c) + * *s = sinl(pi*x); *c = cosl(pi*x); + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y==z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "longdouble.h" + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__LITTLE_ENDIAN) || defined(__i386) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof (long double) / sizeof (int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +void +sincospil(long double x, long double *s, long double *c) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) { + *s = *c = x - x; + } else { + if (k >= PREC) { + *s = zero; + *c = one; + } else if (k == PRECM1) { + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } else { + *s = -zero; + *c = -one; + } + } else { /* k = Prec - 2 */ + if ((lx & 1) == 0) { + *s = zero; + *c = one; + } else { + *s = one; + *c = zero; + } + if ((lx & 2) != 0) { + *s = -*s; + *c = -*c; + } + } + } + } else if (k < -2) /* |x| < 0.25 */ + *s = __k_sincosl(pi * fabsl(x), zero, c); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } else + t = (y - t) * quater; + } else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + *s = *c = sqrth + tiny; + else + if ((n & 2) == 0) { + *s = zero; + *c = one; + } else { + *s = one; + *c = zero; + } + if ((n & 4) != 0) + *s = -*s; + if (((n + 1) & 4) != 0) + *c = -*c; + } else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + *s = __k_sincosl(pi * t, zero, c); + else + *c = __k_sincosl(pi * t, zero, s); + if ((n & 4) != 0) + *s = -*s; + if (((n + 2) & 4) != 0) + *c = -*c; + } + } + if (hx < 0) + *s = -*s; +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/libm/src/Q/sinhl.c b/usr/src/libm/src/Q/sinhl.c new file mode 100644 index 0000000..9edea43 --- /dev/null +++ b/usr/src/libm/src/Q/sinhl.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinhl.c 1.8 06/01/31 SMI" + +#pragma weak sinhl = __sinhl + +#include "libm.h" + +/* + * sinhl(X) + * RETURN THE HYPERBOLIC SINE OF X + * + * Method : + * 1. reduce x to non-negative by sinhl(-x) = - sinhl(x). + * 2. + * + * expm1l(x) + expm1l(x)/(expm1l(x)+1) + * 0 <= x <= lnovft : sinhl(x) := -------------------------------- + * 2 + * + * lnovft <= x < INF : sinhl(x) := expl(x-MEP1*ln2)*2**ME + * + * here + * lnovft: logrithm of the overflow threshold + * = MEP1*ln2 chopped to machine precision. + * ME maximum exponent + * MEP1 maximum exponent plus 1 + * + * Special cases: + * sinhl(x) is x if x is +INF, -INF, or NaN. + * only sinhl(0)=0 is exact for finite argument. + * + */ + +#define ME 16383 +#define MEP1 16384 +#define LNOVFT 1.135652340629414394949193107797076342845e+4L + /* last 32 bits of LN2HI is zero */ +#define LN2HI 6.931471805599453094172319547495844850203e-0001L +#define LN2LO 1.667085920830552208890449330400379754169e-0025L + +static const long double + half = 0.5L, + one = 1.0L, + ln2hi = LN2HI, + ln2lo = LN2LO, + lnovftL = LNOVFT; + +long double +sinhl(long double x) { + long double r, t; + + if (!finitel(x)) + return (x + x); /* sinh of NaN or +-INF is itself */ + r = fabsl(x); + if (r < lnovftL) { + t = expm1l(r); + r = copysignl((t + t / (one + t)) * half, x); + } else { + r = copysignl(expl((r - MEP1 * ln2hi) - MEP1 * ln2lo), x); + r = scalbnl(r, ME); + } + return (r); +} diff --git a/usr/src/libm/src/Q/sinl.c b/usr/src/libm/src/Q/sinl.c new file mode 100644 index 0000000..6a38773 --- /dev/null +++ b/usr/src/libm/src/Q/sinl.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinl.c 1.7 06/01/31 SMI" + +/* + * sinl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_sinl ... sin function on [-pi/4,pi/4] + * __k_cosl ... cos function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak sinl = __sinl + +#include "libm.h" +#include "longdouble.h" + +long double +sinl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) /* |x| ~< pi/4 */ + return (__k_sinl(x, z)); + else if (ix >= 0x7fff0000) /* sin(Inf or NaN) is NaN */ + return (x - x); + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + switch (n & 3) { + case 0: + return (__k_sinl(y[0], y[1])); + case 1: + return (__k_cosl(y[0], y[1])); + case 2: + return (-__k_sinl(y[0], y[1])); + case 3: + return (-__k_cosl(y[0], y[1])); + } + } + /* NOTREACHED */ +} diff --git a/usr/src/libm/src/Q/sinpil.c b/usr/src/libm/src/Q/sinpil.c new file mode 100644 index 0000000..83eed49 --- /dev/null +++ b/usr/src/libm/src/Q/sinpil.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinpil.c 1.9 06/01/31 SMI" + +#pragma weak sinpil = __sinpil + +/* + * long double sinpil(long double x), + * return long double precision sinl(pi*x). + * + * Algorithm, 10/17/2002, K.C. Ng + * ------------------------------ + * Let y = |4x|, z = floor(y), and n = (int)(z mod 8.0) (displayed in binary). + * 1. If y==z, then x is a multiple of pi/4. Return the following values: + * --------------------------------------------------- + * n x mod 2 sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 0.00 +0 ___ +1 ___ +0 + * 001 0.25 +\/0.5 +\/0.5 +1 + * 010 0.50 +1 ___ +0 ___ +inf + * 011 0.75 +\/0.5 -\/0.5 -1 + * 100 1.00 -0 ___ -1 ___ +0 + * 101 1.25 -\/0.5 -\/0.5 +1 + * 110 1.50 -1 ___ -0 ___ +inf + * 111 1.75 -\/0.5 +\/0.5 -1 + * --------------------------------------------------- + * 2. Otherwise, + * --------------------------------------------------- + * n t sin(x*pi) cos(x*pi) tan(x*pi) + * --------------------------------------------------- + * 000 (y-z)/4 sinpi(t) cospi(t) tanpi(t) + * 001 (z+1-y)/4 cospi(t) sinpi(t) 1/tanpi(t) + * 010 (y-z)/4 cospi(t) -sinpi(t) -1/tanpi(t) + * 011 (z+1-y)/4 sinpi(t) -cospi(t) -tanpi(t) + * 100 (y-z)/4 -sinpi(t) -cospi(t) tanpi(t) + * 101 (z+1-y)/4 -cospi(t) -sinpi(t) 1/tanpi(t) + * 110 (y-z)/4 -cospi(t) sinpi(t) -1/tanpi(t) + * 111 (z+1-y)/4 -sinpi(t) cospi(t) -tanpi(t) + * --------------------------------------------------- + * + * NOTE. This program compute sinpi/cospi(t<0.25) by __k_sin/cos(pi*t, 0.0). + * This will return a result with error slightly more than one ulp (but less + * than 2 ulp). If one wants accurate result, one may break up pi*t in + * high (tpi_h) and low (tpi_l) parts and call __k_sin/cos(tip_h, tip_lo) + * instead. + */ + +#include "libm.h" +#include "longdouble.h" + +#define I(q, m) ((int *) &(q))[m] +#define U(q, m) ((unsigned *) &(q))[m] +#if defined(__LITTLE_ENDIAN) || defined(__i386) +#define LDBL_MOST_SIGNIF_I(ld) ((I(ld, 2) << 16) | (0xffff & (I(ld, 1) >> 15))) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, 0) +#define PREC 64 +#define PRECM1 63 +#define PRECM2 62 +static const long double twoPRECM2 = 9.223372036854775808000000000000000e+18L; +#else +#define LDBL_MOST_SIGNIF_I(ld) I(ld, 0) +#define LDBL_LEAST_SIGNIF_U(ld) U(ld, sizeof (long double) / sizeof (int) - 1) +#define PREC 113 +#define PRECM1 112 +#define PRECM2 111 +static const long double twoPRECM2 = 5.192296858534827628530496329220096e+33L; +#endif + +static const long double +zero = 0.0L, +quater = 0.25L, +one = 1.0L, +pi = 3.141592653589793238462643383279502884197e+0000L, +sqrth = 0.707106781186547524400844362104849039284835937688474, +tiny = 1.0e-100; + +long double +sinpil(long double x) { + long double y, z, t; + int hx, n, k; + unsigned lx; + + hx = LDBL_MOST_SIGNIF_I(x); + lx = LDBL_LEAST_SIGNIF_U(x); + k = ((hx & 0x7fff0000) >> 16) - 0x3fff; + if (k >= PRECM2) { /* |x| >= 2**(Prec-2) */ + if (k >= 16384) + y = x - x; + else { + if (k >= PREC) + y = zero; + else if (k == PRECM1) + y = (lx & 1) == 0 ? zero: -zero; + else { /* k = Prec - 2 */ + y = (lx & 1) == 0 ? zero : one; + if ((lx & 2) != 0) + y = -y; + } + } + } else if (k < -2) /* |x| < 0.25 */ + y = __k_sinl(pi * fabsl(x), zero); + else { + /* y = |4x|, z = floor(y), and n = (int)(z mod 8.0) */ + y = 4.0L * fabsl(x); + if (k < PRECM2) { + z = y + twoPRECM2; + n = LDBL_LEAST_SIGNIF_U(z) & 7; /* 3 LSb of z */ + t = z - twoPRECM2; + k = 0; + if (t == y) + k = 1; + else if (t > y) { + n -= 1; + t = quater + (y - t) * quater; + } else + t = (y - t) * quater; + } else { /* k = Prec-3 */ + n = LDBL_LEAST_SIGNIF_U(y) & 7; /* 3 LSb of z */ + k = 1; + } + if (k) { /* x = N/4 */ + if ((n & 1) != 0) + y = sqrth + tiny; + else + y = (n & 2) == 0 ? zero : one; + if ((n & 4) != 0) + y = -y; + } else { + if ((n & 1) != 0) + t = quater - t; + if (((n + (n & 1)) & 2) == 0) + y = __k_sinl(pi * t, zero); + else + y = __k_cosl(pi * t, zero); + if ((n & 4) != 0) + y = -y; + } + } + return (hx >= 0 ? y : -y); +} +#undef U +#undef LDBL_LEAST_SIGNIF_U +#undef I +#undef LDBL_MOST_SIGNIF_I diff --git a/usr/src/libm/src/Q/sqrtl.c b/usr/src/libm/src/Q/sqrtl.c new file mode 100644 index 0000000..1999ab5 --- /dev/null +++ b/usr/src/libm/src/Q/sqrtl.c @@ -0,0 +1,478 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sqrtl.c 1.11 06/01/31 SMI" + +#pragma weak sqrtl = __sqrtl + +#include "libm.h" +#include "longdouble.h" + +extern int __swapTE(int); +extern int __swapEX(int); +extern enum fp_direction_type __swapRD(enum fp_direction_type); + +/* + * in struct longdouble, msw consists of + * unsigned short sgn:1; + * unsigned short exp:15; + * unsigned short frac1:16; + */ + +#ifdef __LITTLE_ENDIAN + +/* array indices used to access words within a double */ +#define HIWORD 1 +#define LOWORD 0 + +/* structure used to access words within a quad */ +union longdouble { + struct { + unsigned int frac4; + unsigned int frac3; + unsigned int frac2; + unsigned int msw; + } l; + long double d; +}; + +/* default NaN returned for sqrt(neg) */ +static const union longdouble + qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }; + +/* signalling NaN used to raise invalid */ +static const union { + unsigned u[2]; + double d; +} snan = { 0, 0x7ff00001 }; + +#else + +/* array indices used to access words within a double */ +#define HIWORD 0 +#define LOWORD 1 + +/* structure used to access words within a quad */ +union longdouble { + struct { + unsigned int msw; + unsigned int frac2; + unsigned int frac3; + unsigned int frac4; + } l; + long double d; +}; + +/* default NaN returned for sqrt(neg) */ +static const union longdouble + qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +/* signalling NaN used to raise invalid */ +static const union { + unsigned u[2]; + double d; +} snan = { 0x7ff00001, 0 }; + +#endif /* __LITTLE_ENDIAN */ + + +static const double + zero = 0.0, + half = 0.5, + one = 1.0, + huge = 1.0e300, + tiny = 1.0e-300, + two36 = 6.87194767360000000000e+10, + two30 = 1.07374182400000000000e+09, + two6 = 6.40000000000000000000e+01, + two4 = 1.60000000000000000000e+01, + twom18 = 3.81469726562500000000e-06, + twom28 = 3.72529029846191406250e-09, + twom42 = 2.27373675443232059479e-13, + twom60 = 8.67361737988403547206e-19, + twom62 = 2.16840434497100886801e-19, + twom66 = 1.35525271560688054251e-20, + twom90 = 8.07793566946316088742e-28, + twom113 = 9.62964972193617926528e-35, + twom124 = 4.70197740328915003187e-38; + + +/* +* Extract the exponent and normalized significand (represented as +* an array of five doubles) from a finite, nonzero quad. +*/ +static int +__q_unpack( const union longdouble *x, double *s ) +{ + union { + double d; + unsigned int l[2]; + } u; + double b; + unsigned int lx, w[3]; + int ex; + + /* get the normalized significand and exponent */ + ex = (int) ( ( x->l.msw & 0x7fffffff ) >> 16 ); + lx = x->l.msw & 0xffff; + if ( ex ) + { + lx |= 0x10000; + w[0] = x->l.frac2; + w[1] = x->l.frac3; + w[2] = x->l.frac4; + } + else + { + if ( lx | ( x->l.frac2 & 0xfffe0000 ) ) + { + w[0] = x->l.frac2; + w[1] = x->l.frac3; + w[2] = x->l.frac4; + ex = 1; + } + else if ( x->l.frac2 | ( x->l.frac3 & 0xfffe0000 ) ) + { + lx = x->l.frac2; + w[0] = x->l.frac3; + w[1] = x->l.frac4; + w[2] = 0; + ex = -31; + } + else if ( x->l.frac3 | ( x->l.frac4 & 0xfffe0000 ) ) + { + lx = x->l.frac3; + w[0] = x->l.frac4; + w[1] = w[2] = 0; + ex = -63; + } + else + { + lx = x->l.frac4; + w[0] = w[1] = w[2] = 0; + ex = -95; + } + while ( ( lx & 0x10000 ) == 0 ) + { + lx = ( lx << 1 ) | ( w[0] >> 31 ); + w[0] = ( w[0] << 1 ) | ( w[1] >> 31 ); + w[1] = ( w[1] << 1 ) | ( w[2] >> 31 ); + w[2] <<= 1; + ex--; + } + } + + /* extract the significand into five doubles */ + u.l[HIWORD] = 0x42300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[LOWORD] = lx; + s[0] = u.d - b; + + u.l[HIWORD] = 0x40300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[LOWORD] = w[0] & 0xffffff00; + s[1] = u.d - b; + + u.l[HIWORD] = 0x3e300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[HIWORD] |= w[0] & 0xff; + u.l[LOWORD] = w[1] & 0xffff0000; + s[2] = u.d - b; + + u.l[HIWORD] = 0x3c300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[HIWORD] |= w[1] & 0xffff; + u.l[LOWORD] = w[2] & 0xff000000; + s[3] = u.d - b; + + u.l[HIWORD] = 0x3c300000; + u.l[LOWORD] = 0; + b = u.d; + u.l[LOWORD] = w[2] & 0xffffff; + s[4] = u.d - b; + + return ex - 0x3fff; +} + + +/* +* Pack an exponent and array of three doubles representing a finite, +* nonzero number into a quad. Assume the sign is already there and +* the rounding mode has been fudged accordingly. +*/ +static void +__q_pack( const double *z, int exp, enum fp_direction_type rm, + union longdouble *x, int *inexact ) +{ + union { + double d; + unsigned int l[2]; + } u; + double s[3], t, t2; + unsigned int msw, frac2, frac3, frac4; + + /* bias exponent and strip off integer bit */ + exp += 0x3fff; + s[0] = z[0] - one; + s[1] = z[1]; + s[2] = z[2]; + + /* + * chop the significand to obtain the fraction; + * use round-to-minus-infinity to ensure chopping + */ + (void) __swapRD( fp_negative ); + + /* extract the first eighty bits of fraction */ + t = s[1] + s[2]; + u.d = two36 + ( s[0] + t ); + msw = u.l[LOWORD]; + s[0] -= ( u.d - two36 ); + + u.d = two4 + ( s[0] + t ); + frac2 = u.l[LOWORD]; + s[0] -= ( u.d - two4 ); + + u.d = twom28 + ( s[0] + t ); + frac3 = u.l[LOWORD]; + s[0] -= ( u.d - twom28 ); + + /* condense the remaining fraction; errors here won't matter */ + t = s[0] + s[1]; + s[1] = ( ( s[0] - t ) + s[1] ) + s[2]; + s[0] = t; + + /* get the last word of fraction */ + u.d = twom60 + ( s[0] + s[1] ); + frac4 = u.l[LOWORD]; + s[0] -= ( u.d - twom60 ); + + /* + * keep track of what's left for rounding; note that + * t2 will be non-negative due to rounding mode + */ + t = s[0] + s[1]; + t2 = ( s[0] - t ) + s[1]; + + if ( t != zero ) + { + *inexact = 1; + + /* decide whether to round the fraction up */ + if ( rm == fp_positive || ( rm == fp_nearest && ( t > twom113 || + ( t == twom113 && ( t2 != zero || frac4 & 1 ) ) ) ) ) + { + /* round up and renormalize if necessary */ + if ( ++frac4 == 0 ) + if ( ++frac3 == 0 ) + if ( ++frac2 == 0 ) + if ( ++msw == 0x10000 ) + { + msw = 0; + exp++; + } + } + } + + /* assemble the result */ + x->l.msw |= msw | ( exp << 16 ); + x->l.frac2 = frac2; + x->l.frac3 = frac3; + x->l.frac4 = frac4; +} + + +/* +* Compute the square root of x and place the TP result in s. +*/ +static void +__q_tp_sqrt( const double *x, double *s ) +{ + double c, rr, r[3], tt[3], t[5]; + + /* approximate the divisor for the Newton iteration */ + c = sqrt( ( x[0] + x[1] ) + x[2] ); + rr = half / c; + + /* compute the first five "digits" of the square root */ + t[0] = ( c + two30 ) - two30; + tt[0] = t[0] + t[0]; + r[0] = ( ( x[0] - t[0] * t[0] ) + x[1] ) + x[2]; + + t[1] = ( rr * ( r[0] + x[3] ) + two6 ) - two6; + tt[1] = t[1] + t[1]; + r[0] -= tt[0] * t[1]; + r[1] = x[3] - t[1] * t[1]; + c = ( r[1] + twom18 ) - twom18; + r[0] += c; + r[1] = ( r[1] - c ) + x[4]; + + t[2] = ( rr * ( r[0] + r[1] ) + twom18 ) - twom18; + tt[2] = t[2] + t[2]; + r[0] -= tt[0] * t[2]; + r[1] -= tt[1] * t[2]; + c = ( r[1] + twom42 ) - twom42; + r[0] += c; + r[1] = ( r[1] - c ) - t[2] * t[2]; + + t[3] = ( rr * ( r[0] + r[1] ) + twom42 ) - twom42; + r[0] = ( ( r[0] - tt[0] * t[3] ) + r[1] ) - tt[1] * t[3]; + r[1] = -tt[2] * t[3]; + c = ( r[1] + twom90 ) - twom90; + r[0] += c; + r[1] = ( r[1] - c ) - t[3] * t[3]; + + t[4] = ( rr * ( r[0] + r[1] ) + twom66 ) - twom66; + + /* here we just need to get the sign of the remainder */ + c = ( ( ( ( ( r[0] - tt[0] * t[4] ) - tt[1] * t[4] ) + r[1] ) + - tt[2] * t[4] ) - ( t[3] + t[3] ) * t[4] ) - t[4] * t[4]; + + /* reduce to three doubles */ + t[0] += t[1]; + t[1] = t[2] + t[3]; + t[2] = t[4]; + + /* if the third term might lie on a rounding boundary, perturb it */ + if ( c != zero && t[2] == ( twom62 + t[2] ) - twom62 ) + { + if ( c < zero ) + t[2] -= twom124; + else + t[2] += twom124; + } + + /* condense the square root */ + c = t[1] + t[2]; + t[2] += ( t[1] - c ); + t[1] = c; + c = t[0] + t[1]; + s[1] = t[1] + ( t[0] - c ); + s[0] = c; + if ( s[1] == zero ) + { + c = s[0] + t[2]; + s[1] = t[2] + ( s[0] - c ); + s[0] = c; + s[2] = zero; + } + else + { + c = s[1] + t[2]; + s[2] = t[2] + ( s[1] - c ); + s[1] = c; + } +} + + +long double +sqrtl( long double ldx ) +{ + union longdouble x; + volatile double t; + double xx[5], zz[3]; + enum fp_direction_type rm; + int ex, inexact, exc, traps; + + /* clear cexc */ + t = zero; + t -= zero; + + /* check for zero operand */ + x.d = ldx; + if ( !( ( x.l.msw & 0x7fffffff ) | x.l.frac2 | x.l.frac3 | x.l.frac4 ) ) + return ldx; + + /* handle nan and inf cases */ + if ( ( x.l.msw & 0x7fffffff ) >= 0x7fff0000 ) + { + if ( ( x.l.msw & 0xffff ) | x.l.frac2 | x.l.frac3 | x.l.frac4 ) + { + if ( !( x.l.msw & 0x8000 ) ) + { + /* snan, signal invalid */ + t += snan.d; + } + x.l.msw |= 0x8000; + return x.d; + } + if ( x.l.msw & 0x80000000 ) + { + /* sqrt(-inf), signal invalid */ + t = -one; + t = sqrt( t ); + return qnan.d; + } + /* sqrt(inf), return inf */ + return x.d; + } + + /* handle negative numbers */ + if ( x.l.msw & 0x80000000 ) + { + t = -one; + t = sqrt( t ); + return qnan.d; + } + + /* now x is finite, positive */ + + traps = __swapTE( 0 ); + exc = __swapEX( 0 ); + rm = __swapRD( fp_nearest ); + + ex = __q_unpack( &x, xx ); + if ( ex & 1 ) + { + /* make exponent even */ + xx[0] += xx[0]; + xx[1] += xx[1]; + xx[2] += xx[2]; + xx[3] += xx[3]; + xx[4] += xx[4]; + ex--; + } + __q_tp_sqrt( xx, zz ); + + /* put everything together */ + x.l.msw = 0; + inexact = 0; + __q_pack( zz, ex >> 1, rm, &x, &inexact ); + + (void) __swapRD( rm ); + (void) __swapEX( exc ); + (void) __swapTE( traps ); + if ( inexact ) + { + t = huge; + t += tiny; + } + return x.d; +} diff --git a/usr/src/libm/src/Q/tanhl.c b/usr/src/libm/src/Q/tanhl.c new file mode 100644 index 0000000..2a4bfbb --- /dev/null +++ b/usr/src/libm/src/Q/tanhl.c @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanhl.c 1.8 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak tanhl = __tanhl +#endif + +/* + * tanhl(x) returns the hyperbolic tangent of x + * + * Method : + * 1. reduce x to non-negative: tanhl(-x) = - tanhl(x). + * 2. + * 0 < x <= small : tanhl(x) := x + * -expm1l(-2x) + * small < x <= 1 : tanhl(x) := -------------- + * expm1l(-2x) + 2 + * 2 + * 1 <= x <= threshold : tanhl(x) := 1 - --------------- + * expm1l(2x) + 2 + * threshold < x <= INF : tanhl(x) := 1. + * + * where + * single : small = 1.e-5 threshold = 11.0 + * double : small = 1.e-10 threshold = 22.0 + * quad : small = 1.e-20 threshold = 45.0 + * + * Note: threshold was chosen so that + * fl(1.0+2/(expm1(2*threshold)+2)) == 1. + * + * Special cases: + * tanhl(NaN) is NaN; + * only tanhl(0.0)=0.0 is exact for finite argument. + */ + +#include "libm.h" + +static const long double small = 1.0e-20L, one = 1.0, two = 2.0, +#ifndef lint + big = 1.0e+20L, +#endif + threshold = 45.0L; + +long double +tanhl(long double x) { + long double t, y, z; + int signx; + + if (isnanl(x)) + return (x + x); /* x is NaN */ + signx = signbitl(x); + t = fabsl(x); + z = one; + if (t <= threshold) { + if (t > one) + z = one - two / (expm1l(t + t) + two); + else if (t > small) { + y = expm1l(-t - t); + z = -y / (y + two); + } else { +#ifndef lint + volatile long double dummy = t + big; + /* inexact if t != 0 */ +#endif + return (x); + } + } else if (!finitel(t)) + return (copysignl(one, x)); + else + return (signx ? -z + small * small : z - small * small); + return (signx ? -z : z); +} diff --git a/usr/src/libm/src/Q/tanl.c b/usr/src/libm/src/Q/tanl.c new file mode 100644 index 0000000..7e9a566 --- /dev/null +++ b/usr/src/libm/src/Q/tanl.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanl.c 1.7 06/01/31 SMI" + +/* + * tanl(x) + * Table look-up algorithm by K.C. Ng, November, 1989. + * + * kernel function: + * __k_tanl ... tangent function on [-pi/4,pi/4] + * __rem_pio2l ... argument reduction routine + * + * Method. + * Let S and C denote the sin and cos respectively on [-PI/4, +PI/4]. + * 1. Assume the argument x is reduced to y1+y2 = x-k*pi/2 in + * [-pi/2 , +pi/2], and let n = k mod 4. + * 2. Let S=S(y1+y2), C=C(y1+y2). Depending on n, we have + * + * n sin(x) cos(x) tan(x) + * ---------------------------------------------------------- + * 0 S C S/C + * 1 C -S -C/S + * 2 -S -C S/C + * 3 -C S -C/S + * ---------------------------------------------------------- + * + * Special cases: + * Let trig be any of sin, cos, or tan. + * trig(+-INF) is NaN, with signals; + * trig(NaN) is that NaN; + * + * Accuracy: + * computer TRIG(x) returns trig(x) nearly rounded. + */ + +#pragma weak tanl = __tanl + +#include "libm.h" +#include "longdouble.h" + +long double +tanl(long double x) { + long double y[2], z = 0.0L; + int n, ix; + + ix = *(int *) &x; /* High word of x */ + ix &= 0x7fffffff; + if (ix <= 0x3ffe9220) /* |x| ~< pi/4 */ + return (__k_tanl(x, z, 0)); + else if (ix >= 0x7fff0000) /* trig(Inf or NaN) is NaN */ + return (x - x); + else { /* argument reduction needed */ + n = __rem_pio2l(x, y); + return (__k_tanl(y[0], y[1], (n & 1))); + } +} diff --git a/usr/src/libm/src/R/_TBL_r_atan_.c b/usr/src/libm/src/R/_TBL_r_atan_.c new file mode 100644 index 0000000..67e4fcb --- /dev/null +++ b/usr/src/libm/src/R/_TBL_r_atan_.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)_TBL_r_atan_.c 1.9 06/01/31 SMI" + +/* + * Table of constants for r_atan_(). + * By K.C. Ng, March 9, 1989 + */ + +#include "libm.h" + +const float _TBL_r_atan_hi[] = { + 4.636476040e-01, 4.883339405e-01, 5.123894811e-01, 5.358112454e-01, + 5.585992932e-01, 5.807563663e-01, 6.022873521e-01, 6.231993437e-01, + 6.435011029e-01, 6.632030010e-01, 6.823165417e-01, 7.008544207e-01, + 7.188299894e-01, 7.362574339e-01, 7.531512976e-01, 7.695264816e-01, + 7.853981853e-01, 8.156919479e-01, 8.441540003e-01, 8.709034324e-01, + 8.960554004e-01, 9.197195768e-01, 9.420000315e-01, 9.629943371e-01, + 9.827937484e-01, 1.001483083e+00, 1.019141316e+00, 1.035841227e+00, + 1.051650167e+00, 1.066630363e+00, 1.080839038e+00, 1.094328880e+00, + 1.107148767e+00, 1.130953789e+00, 1.152572036e+00, 1.172273874e+00, + 1.190289974e+00, 1.206817389e+00, 1.222025275e+00, 1.236059427e+00, + 1.249045730e+00, 1.261093378e+00, 1.272297382e+00, 1.282740831e+00, + 1.292496681e+00, 1.301628828e+00, 1.310193896e+00, 1.318242073e+00, + 1.325817704e+00, 1.339705706e+00, 1.352127433e+00, 1.363300085e+00, + 1.373400807e+00, 1.382574797e+00, 1.390942812e+00, 1.398605466e+00, + 1.405647635e+00, 1.412141085e+00, 1.418146968e+00, 1.423717976e+00, + 1.428899288e+00, 1.433730125e+00, 1.438244820e+00, 1.442473054e+00, + 1.446441293e+00, +}; + +const float _TBL_r_atan_lo[] = { + +5.012158688e-09, +1.055042365e-08, -2.075691974e-08, -7.480973174e-09, + +2.211159789e-08, -1.268522887e-08, -5.950149262e-09, -1.374726910e-08, + +5.868937336e-09, -8.316245470e-09, +1.320299514e-08, -1.277747597e-08, + +1.018833551e-08, -4.909868068e-09, -1.660708016e-08, -1.222759671e-09, + -2.185569414e-08, -2.462078896e-08, -1.416911655e-08, +2.470642002e-08, + -1.580020736e-08, +2.851478520e-08, +8.908211058e-09, -6.400973085e-09, + -2.513142405e-08, +5.292293181e-08, +2.785247055e-08, +2.643104224e-08, + +4.603683834e-08, +1.851388043e-09, -3.735403453e-08, +2.701113111e-08, + -4.872354964e-08, -4.477816518e-08, -3.857382325e-08, +6.845639611e-09, + -2.453011483e-08, -1.824929363e-08, +4.798058129e-08, +6.221672777e-08, + +4.276110843e-08, +4.185424007e-09, +1.285398099e-08, +4.836914869e-08, + -1.342359379e-08, +5.960489879e-09, +3.875391386e-08, -2.204224536e-08, + -4.053271141e-08, -4.604370218e-08, -5.190222652e-08, +1.529194549e-08, + -4.043566193e-08, +2.481348993e-08, +1.503647518e-08, +4.638297924e-08, + +1.392036975e-08, -2.006252586e-08, +3.051175312e-08, -4.209960824e-09, + -1.598675681e-08, +2.705746205e-08, -2.514289044e-08, +4.517691110e-08, + +3.948537852e-08, +}; diff --git a/usr/src/libm/src/R/__cosf.c b/usr/src/libm/src/R/__cosf.c new file mode 100644 index 0000000..07cfbbd --- /dev/null +++ b/usr/src/libm/src/R/__cosf.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__cosf.c 1.15 06/01/31 SMI" + +#include "libm.h" + +/* INDENT OFF */ +/* + * float __k_cos(double x); + * kernel (float) cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Method: Let z = x * x, then + * C(x) = (C0 + C1*z + C2*z*z) * (C3 + C4*z + z*z) + * where + * C0 = 1.09349482127188401868272000389539985058873853699e-0003 + * C1 = -5.03324285989964979398034700054920226866107675091e-0004 + * C2 = 2.43792880266971107750418061559602239831538067410e-0005 + * C3 = 9.14499072605666582228127405245558035523741471271e+0002 + * C4 = -3.63151270591815439197122504991683846785293207730e+0001 + * + * The remez error is bound by |cos(x) - C(x)| < 2**(-34.2) + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +static const double q[] = { +/* C0 = */ 1.09349482127188401868272000389539985058873853699e-0003, +/* C1 = */ -5.03324285989964979398034700054920226866107675091e-0004, +/* C2 = */ 2.43792880266971107750418061559602239831538067410e-0005, +/* C3 = */ 9.14499072605666582228127405245558035523741471271e+0002, +/* C4 = */ -3.63151270591815439197122504991683846785293207730e+0001, +}; + +#define C0 q[0] +#define C1 q[1] +#define C2 q[2] +#define C3 q[3] +#define C4 q[4] + +float +__k_cosf(double x) { + float ft; + double z; + int hx; + + hx = ((int *) &x)[HIWORD]; /* hx = leading x */ + if ((hx & ~0x80000000) < 0x3f100000) { /* |x| < 2**-14 */ + ft = (float) 1; + if (((int) x) == 0) /* raise inexact if x != 0 */ + return (ft); + } + z = x * x; + ft = (float) (((C0 + z * C1) + (z * z) * C2) * (C3 + z * (C4 + z))); + return (ft); +} diff --git a/usr/src/libm/src/R/__sincosf.c b/usr/src/libm/src/R/__sincosf.c new file mode 100644 index 0000000..503cfc6 --- /dev/null +++ b/usr/src/libm/src/R/__sincosf.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sincosf.c 1.12 06/01/31 SMI" + +#include "libm.h" + +/* INDENT OFF */ +/* + * void __k_sincosf(double x, float *s, float *c); + * kernel (float) sincos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Method: Let z = x * x, then + * S(x) = x(S0 + S1*z)(S2 + S3*z + z*z) + * C(x) = (C0 + C1*z + C2*z*z) * (C3 + C4*z + z*z) + * where + * S0 = 1.85735322054308378716204874632872525989806770558e-0003 + * S1 = -1.95035094218403635082921458859320791358115801259e-0004 + * S2 = 5.38400550766074785970952495168558701485841707252e+0002 + * S3 = -3.31975110777873728964197739157371509422022905947e+0001 + * C0 = 1.09349482127188401868272000389539985058873853699e-0003 + * C1 = -5.03324285989964979398034700054920226866107675091e-0004 + * C2 = 2.43792880266971107750418061559602239831538067410e-0005 + * C3 = 9.14499072605666582228127405245558035523741471271e+0002 + * C4 = -3.63151270591815439197122504991683846785293207730e+0001 + * + * The remez error in S is bound by |(sin(x) - S(x))/x| < 2**(-28.2) + * The remez error in C is bound by |cos(x) - C(x)| < 2**(-34.2) + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +static const double q[] = { +/* S0 = */ 1.85735322054308378716204874632872525989806770558e-0003, +/* S1 = */ -1.95035094218403635082921458859320791358115801259e-0004, +/* S2 = */ 5.38400550766074785970952495168558701485841707252e+0002, +/* S3 = */ -3.31975110777873728964197739157371509422022905947e+0001, +/* C0 = */ 1.09349482127188401868272000389539985058873853699e-0003, +/* C1 = */ -5.03324285989964979398034700054920226866107675091e-0004, +/* C2 = */ 2.43792880266971107750418061559602239831538067410e-0005, +/* C3 = */ 9.14499072605666582228127405245558035523741471271e+0002, +/* C4 = */ -3.63151270591815439197122504991683846785293207730e+0001, +}; + + +#define S0 q[0] +#define S1 q[1] +#define S2 q[2] +#define S3 q[3] +#define C0 q[4] +#define C1 q[5] +#define C2 q[6] +#define C3 q[7] +#define C4 q[8] + +void +__k_sincosf(double x, float *s, float *c) { + double z; + int hx; + + hx = ((int *) &x)[HIWORD]; /* hx = leading x */ + /* small argument */ + if ((hx & ~0x80000000) < 0x3f100000) { /* if |x| < 2**-14 */ + *s = (float) x; *c = (float) 1; + if ((int) x == 0) /* raise inexact if x!=0 */ + return; + } + z = x * x; + *s = (float) ((x * (S0 + z * S1)) * (S2 + z * (S3 + z))); + *c = (float) (((C0 + z * C1) + (z * z) * C2) * (C3 + z * (C4 + z))); +} diff --git a/usr/src/libm/src/R/__sinf.c b/usr/src/libm/src/R/__sinf.c new file mode 100644 index 0000000..5b3d07c --- /dev/null +++ b/usr/src/libm/src/R/__sinf.c @@ -0,0 +1,83 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__sinf.c 1.14 06/01/31 SMI" + + +#include "libm.h" + +/* INDENT OFF */ +/* + * float __k_sin(double x); + * kernel (float) sin function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Method: Let z = x * x, then + * S(x) = x(S0 + S1*z)(S2 + S3*z + z*z) + * where + * S0 = 1.85735322054308378716204874632872525989806770558e-0003, + * S1 = -1.95035094218403635082921458859320791358115801259e-0004, + * S2 = 5.38400550766074785970952495168558701485841707252e+0002, + * S3 = -3.31975110777873728964197739157371509422022905947e+0001, + * + * The remez error is bound by |(sin(x) - S(x))/x| < 2**(-28.2) + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ +/* INDENT ON */ + +static const double q[] = { +/* S0 = */ 1.85735322054308378716204874632872525989806770558e-0003, +/* S1 = */ -1.95035094218403635082921458859320791358115801259e-0004, +/* S2 = */ 5.38400550766074785970952495168558701485841707252e+0002, +/* S3 = */ -3.31975110777873728964197739157371509422022905947e+0001, +}; + +#define S0 q[0] +#define S1 q[1] +#define S2 q[2] +#define S3 q[3] + +float +__k_sinf(double x) { + float ft; + double z; + int hx; + + hx = ((int *) &x)[HIWORD]; /* hx = leading x */ + if ((hx & ~0x80000000) < 0x3f100000) { /* if |x| < 2**-14 */ + ft = (float) x; + if ((int) x == 0) /* raise inexact if x!=0 */ + return (ft); + } + z = x * x; + ft = (float) ((x * (S0 + z * S1)) * (S2 + z * (S3 + z))); + return (ft); +} diff --git a/usr/src/libm/src/R/__tanf.c b/usr/src/libm/src/R/__tanf.c new file mode 100644 index 0000000..9255b6e --- /dev/null +++ b/usr/src/libm/src/R/__tanf.c @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__tanf.c 1.14 06/01/31 SMI" + +#include "libm.h" + +/* INDENT OFF */ +/* + * float __k_tan(double x); + * kernel (float) tan function on [-pi/4, pi/4], pi/4 ~ 0.785398164 + * Input x is in double and assumed to be bounded by ~pi/4 in magnitude. + * + * Constants: + * The hexadecimal values are the intended ones for the following constants. + * The decimal values may be used, provided that the compiler will convert + * from decimal to binary accurately enough to produce the hexadecimal values + * shown. + */ + +static const double q[] = { +/* one */ 1.0, +/* P0 */ 4.46066928428959230679140546271810308098793029785e-0003, +/* P1 */ 4.92165316309189027066395283327437937259674072266e+0000, +/* P2 */ -7.11410648161473480044492134766187518835067749023e-0001, +/* P3 */ 4.08549808374053391446523164631798863410949707031e+0000, +/* P4 */ 2.50411070398050927821032018982805311679840087891e+0000, +/* P5 */ 1.11492064560251158411574579076841473579406738281e+0001, +/* P6 */ -1.50565540968422650891511693771462887525558471680e+0000, +/* P7 */ -1.81484378878349295050043110677506774663925170898e+0000, +/* T0 */ 3.333335997532835641297409611782510896641e-0001, +/* T1 */ 2.999997598248363761541668282006867229939e+00, +}; +/* INDENT ON */ + +#define one q[0] +#define P0 q[1] +#define P1 q[2] +#define P2 q[3] +#define P3 q[4] +#define P4 q[5] +#define P5 q[6] +#define P6 q[7] +#define P7 q[8] +#define T0 q[9] +#define T1 q[10] + +float +__k_tanf(double x, int n) { + float ft; + double z, w; + int ix; + + ix = ((int *) &x)[HIWORD] & ~0x80000000; /* ix = leading |x| */ + /* small argument */ + if (ix < 0x3f800000) { /* if |x| < 0.0078125 = 2**-7 */ + if (ix < 0x3f100000) { /* if |x| < 2**-14 */ + if ((int) x == 0) { /* raise inexact if x!=0 */ + ft = n == 0 ? (float) x : (float) (-one / x); + } + return (ft); + } + z = (x * T0) * (T1 + x * x); + ft = n == 0 ? (float) z : (float) (-one / z); + return (ft); + } + z = x * x; + w = ((P0 * x) * (P1 + z * (P2 + z)) * (P3 + z * (P4 + z))) + * (P5 + z * (P6 + z * (P7 + z))); + ft = n == 0 ? (float) w : (float) (-one / w); + return (ft); +} diff --git a/usr/src/libm/src/R/acosf.c b/usr/src/libm/src/R/acosf.c new file mode 100644 index 0000000..48e6a84 --- /dev/null +++ b/usr/src/libm/src/R/acosf.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acosf.c 1.12 06/01/23 SMI" + +#pragma weak acosf = __acosf + +#include "libm.h" + +static const float zero = 0.0f; + +float +acosf(float x) { + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x3f800000) /* |x| > 1 or x is nan */ + return ((x * zero) / zero); + return ((float)acos((double)x)); +} diff --git a/usr/src/libm/src/R/acoshf.c b/usr/src/libm/src/R/acoshf.c new file mode 100644 index 0000000..6e95ed3 --- /dev/null +++ b/usr/src/libm/src/R/acoshf.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)acoshf.c 1.9 06/01/23 SMI" + +#pragma weak acoshf = __acoshf + +#include "libm.h" + +static const float zero = 0.0f; + +float +acoshf(float x) { + int hx; + + hx = *(int *)&x; + if (hx < 0x3f800000 || hx > 0x7f800000) /* x < 1 or x is nan */ + return ((x * zero) / zero); + return ((float)acosh((double)x)); +} diff --git a/usr/src/libm/src/R/asinf.c b/usr/src/libm/src/R/asinf.c new file mode 100644 index 0000000..3cf94b4 --- /dev/null +++ b/usr/src/libm/src/R/asinf.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asinf.c 1.12 06/01/23 SMI" + +#pragma weak asinf = __asinf + +#include "libm.h" + +static const float zero = 0.0f; + +float +asinf(float x) { + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x3f800000) /* |x| > 1 or x is nan */ + return ((x * zero) / zero); + return ((float)asin((double)x)); +} diff --git a/usr/src/libm/src/R/asinhf.c b/usr/src/libm/src/R/asinhf.c new file mode 100644 index 0000000..914d299 --- /dev/null +++ b/usr/src/libm/src/R/asinhf.c @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)asinhf.c 1.8 06/01/31 SMI" + +#pragma weak asinhf = __asinhf + +#include "libm.h" + +float +asinhf(float x) { + if (isnanf(x)) { + return (x * x); /* + -> * for Cheetah */ + } else { + return ((float) asinh((double) x)); + } +} diff --git a/usr/src/libm/src/R/atan2f.c b/usr/src/libm/src/R/atan2f.c new file mode 100644 index 0000000..e8c8e04 --- /dev/null +++ b/usr/src/libm/src/R/atan2f.c @@ -0,0 +1,343 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2f.c 1.5 06/01/23 SMI" + +#pragma weak atan2f = __atan2f + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +/* + * For i = 0, ..., 192, let x[i] be the double precision number whose + * high order 32 bits are 0x3f900000 + (i << 16) and whose low order + * 32 bits are zero. Then TBL[i] := atan(x[i]) to double precision. + */ + +static const double TBL[] = { + 1.56237286204768313e-02, + 1.66000375562312640e-02, + 1.75763148444955872e-02, + 1.85525586258889763e-02, + 1.95287670414137082e-02, + 2.05049382324763683e-02, + 2.14810703409090559e-02, + 2.24571615089905717e-02, + 2.34332098794675855e-02, + 2.44092135955758099e-02, + 2.53851708010611396e-02, + 2.63610796402007873e-02, + 2.73369382578244127e-02, + 2.83127447993351995e-02, + 2.92884974107309737e-02, + 3.02641942386252458e-02, + 3.12398334302682774e-02, + 3.31909314971115949e-02, + 3.51417768027967800e-02, + 3.70923545503918164e-02, + 3.90426499551669928e-02, + 4.09926482452637811e-02, + 4.29423346623621707e-02, + 4.48916944623464972e-02, + 4.68407129159696539e-02, + 4.87893753095156174e-02, + 5.07376669454602178e-02, + 5.26855731431300420e-02, + 5.46330792393594777e-02, + 5.65801705891457105e-02, + 5.85268325663017702e-02, + 6.04730505641073168e-02, + 6.24188099959573500e-02, + 6.63088949198234884e-02, + 7.01969710718705203e-02, + 7.40829225490337306e-02, + 7.79666338315423008e-02, + 8.18479898030765457e-02, + 8.57268757707448092e-02, + 8.96031774848717461e-02, + 9.34767811585894698e-02, + 9.73475734872236709e-02, + 1.01215441667466668e-01, + 1.05080273416329528e-01, + 1.08941956989865793e-01, + 1.12800381201659389e-01, + 1.16655435441069349e-01, + 1.20507009691224562e-01, + 1.24354994546761438e-01, + 1.32039761614638762e-01, + 1.39708874289163648e-01, + 1.47361481088651630e-01, + 1.54996741923940973e-01, + 1.62613828597948568e-01, + 1.70211925285474408e-01, + 1.77790228992676075e-01, + 1.85347949995694761e-01, + 1.92884312257974672e-01, + 2.00398553825878512e-01, + 2.07889927202262986e-01, + 2.15357699697738048e-01, + 2.22801153759394521e-01, + 2.30219587276843718e-01, + 2.37612313865471242e-01, + 2.44978663126864143e-01, + 2.59629629408257512e-01, + 2.74167451119658789e-01, + 2.88587361894077410e-01, + 3.02884868374971417e-01, + 3.17055753209147029e-01, + 3.31096076704132103e-01, + 3.45002177207105132e-01, + 3.58770670270572245e-01, + 3.72398446676754202e-01, + 3.85882669398073752e-01, + 3.99220769575252543e-01, + 4.12410441597387323e-01, + 4.25449637370042266e-01, + 4.38336559857957830e-01, + 4.51069655988523499e-01, + 4.63647609000806094e-01, + 4.88333951056405535e-01, + 5.12389460310737732e-01, + 5.35811237960463704e-01, + 5.58599315343562441e-01, + 5.80756353567670414e-01, + 6.02287346134964152e-01, + 6.23199329934065904e-01, + 6.43501108793284371e-01, + 6.63202992706093286e-01, + 6.82316554874748071e-01, + 7.00854407884450192e-01, + 7.18829999621624527e-01, + 7.36257428981428097e-01, + 7.53151280962194414e-01, + 7.69526480405658297e-01, + 7.85398163397448279e-01, + 8.15691923316223422e-01, + 8.44153986113171051e-01, + 8.70903457075652976e-01, + 8.96055384571343927e-01, + 9.19719605350416858e-01, + 9.42000040379463610e-01, + 9.62994330680936206e-01, + 9.82793723247329054e-01, + 1.00148313569423464e+00, + 1.01914134426634972e+00, + 1.03584125300880014e+00, + 1.05165021254837376e+00, + 1.06663036531574362e+00, + 1.08083900054116833e+00, + 1.09432890732118993e+00, + 1.10714871779409041e+00, + 1.13095374397916038e+00, + 1.15257199721566761e+00, + 1.17227388112847630e+00, + 1.19028994968253166e+00, + 1.20681737028525249e+00, + 1.22202532321098967e+00, + 1.23605948947808186e+00, + 1.24904577239825443e+00, + 1.26109338225244039e+00, + 1.27229739520871732e+00, + 1.28274087974427076e+00, + 1.29249666778978534e+00, + 1.30162883400919616e+00, + 1.31019393504755555e+00, + 1.31824205101683711e+00, + 1.32581766366803255e+00, + 1.33970565959899957e+00, + 1.35212738092095464e+00, + 1.36330010035969384e+00, + 1.37340076694501589e+00, + 1.38257482149012589e+00, + 1.39094282700241845e+00, + 1.39860551227195762e+00, + 1.40564764938026987e+00, + 1.41214106460849531e+00, + 1.41814699839963154e+00, + 1.42371797140649403e+00, + 1.42889927219073276e+00, + 1.43373015248470903e+00, + 1.43824479449822262e+00, + 1.44247309910910193e+00, + 1.44644133224813509e+00, + 1.45368758222803240e+00, + 1.46013910562100091e+00, + 1.46591938806466282e+00, + 1.47112767430373470e+00, + 1.47584462045214027e+00, + 1.48013643959415142e+00, + 1.48405798811891154e+00, + 1.48765509490645531e+00, + 1.49096634108265924e+00, + 1.49402443552511865e+00, + 1.49685728913695626e+00, + 1.49948886200960629e+00, + 1.50193983749385196e+00, + 1.50422816301907281e+00, + 1.50636948736934317e+00, + 1.50837751679893928e+00, + 1.51204050407917401e+00, + 1.51529782154917969e+00, + 1.51821326518395483e+00, + 1.52083793107295384e+00, + 1.52321322351791322e+00, + 1.52537304737331958e+00, + 1.52734543140336587e+00, + 1.52915374769630819e+00, + 1.53081763967160667e+00, + 1.53235373677370856e+00, + 1.53377621092096650e+00, + 1.53509721411557254e+00, + 1.53632722579538861e+00, + 1.53747533091664934e+00, + 1.53854944435964280e+00, + 1.53955649336462841e+00, + 1.54139303859089161e+00, + 1.54302569020147562e+00, + 1.54448660954197448e+00, + 1.54580153317597646e+00, + 1.54699130060982659e+00, + 1.54807296595325550e+00, + 1.54906061995310385e+00, + 1.54996600675867957e+00, + 1.55079899282174605e+00, + 1.55156792769518947e+00, + 1.55227992472688747e+00, + 1.55294108165534417e+00, + 1.55355665560036682e+00, + 1.55413120308095598e+00, + 1.55466869295126031e+00, + 1.55517259817441977e+00, +}; + +static const double + pio4 = 7.8539816339744827900e-01, + pio2 = 1.5707963267948965580e+00, + negpi = -3.1415926535897931160e+00, + q1 = -3.3333333333296428046e-01, + q2 = 1.9999999186853752618e-01, + zero = 0.0; + +static const float two24 = 16777216.0; + +float +atan2f(float fy, float fx) +{ + double a, t, s, dbase; + float x, y, base; + int i, k, hx, hy, ix, iy, sign; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + iy = *(int *)&fy; + ix = *(int *)&fx; + hy = iy & ~0x80000000; + hx = ix & ~0x80000000; + + sign = 0; + if (hy > hx) { + x = fy; + y = fx; + i = hx; + hx = hy; + hy = i; + if (iy < 0) { + x = -x; + sign = 1; + } + if (ix < 0) { + y = -y; + a = pio2; + } else { + a = -pio2; + sign = 1 - sign; + } + } else { + y = fy; + x = fx; + if (iy < 0) { + y = -y; + sign = 1; + } + if (ix < 0) { + x = -x; + a = negpi; + sign = 1 - sign; + } else { + a = zero; + } + } + + if (hx >= 0x7f800000 || hx - hy >= 0x0c800000) { + if (hx >= 0x7f800000) { + if (hx > 0x7f800000) /* nan */ + return (x * y); + else if (hy >= 0x7f800000) + a += pio4; + } else if ((int)a == 0) { + a = (double)y / x; + } + return ((float)((sign)? -a : a)); + } + + if (hy < 0x00800000) { + if (hy == 0) + return ((float)((sign)? -a : a)); + /* scale subnormal y */ + y *= two24; + x *= two24; + hy = *(int *)&y; + hx = *(int *)&x; + } + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + k = (hy - hx + 0x3f800000) & 0xfff80000; + if (k >= 0x3c800000) { /* |y/x| >= 1/64 */ + *(int *)&base = k; + k = (k - 0x3c800000) >> 19; + a += TBL[k]; + } else { + /* + * For some reason this is faster on USIII than just + * doing t = y/x in this case. + */ + *(int *)&base = 0; + } + dbase = (double)base; + t = (y - x * dbase) / (x + y * dbase); + s = t * t; + a = (a + t) + t * s * (q1 + s * q2); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((float)((sign)? -a : a)); +} diff --git a/usr/src/libm/src/R/atan2pif.c b/usr/src/libm/src/R/atan2pif.c new file mode 100644 index 0000000..55bffa0 --- /dev/null +++ b/usr/src/libm/src/R/atan2pif.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atan2pif.c 1.8 06/01/23 SMI" + +#pragma weak atan2pif = __atan2pif + +#include "libm.h" + +static const double invpi = 0.3183098861837906715377675; + +float +atan2pif(float y, float x) { + int ix, iy, hx, hy; + + ix = *(int *)&x; + iy = *(int *)&y; + hx = ix & ~0x80000000; + hy = iy & ~0x80000000; + if (hx > 0x7f800000 || hy > 0x7f800000) /* x or y is nan */ + return (x * y); + if ((hx | hy) == 0) { + /* x and y are both zero */ + if (ix == 0) + return (y); + return ((iy == 0)? 1.0f : -1.0f); + } + return ((float)(invpi * atan2((double)y, (double)x))); +} diff --git a/usr/src/libm/src/R/atanf.c b/usr/src/libm/src/R/atanf.c new file mode 100644 index 0000000..3e029b6 --- /dev/null +++ b/usr/src/libm/src/R/atanf.c @@ -0,0 +1,195 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atanf.c 1.15 06/01/31 SMI" + +#pragma weak atanf = __atanf + +/* INDENT OFF */ +/* + * float atanf(float x); + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-115.94 long double + * |(atan(x)-poly1(x))/x|<= 2^-58.85 double + * |(atan(x)-poly1(x))/x|<= 2^-25.53 float + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error (absolute) + * |atan(x)-poly2(x)|<= 2^-122.15 long double + * |atan(x)-poly2(x)|<= 2^-64.79 double + * |atan(x)-poly2(x)|<= 2^-35.36 float + * and use poly3(x) to approximate atan(x) for x in [1/8,7/16] with + * error (relative, on for single precision) + * |(atan(x)-poly1(x))/x|<= 2^-25.53 float + * + * Here poly1-3 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2-2), atan(x) = x with inexact + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then + * (3.1) if x >= 2^(prec+2), atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3+2), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * (single is modified to (iy-0x3f000000)>>19) + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_r_atan_hi[j] + (_TBL_r_atan_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +#include "libm.h" + +extern const float _TBL_r_atan_hi[], _TBL_r_atan_lo[]; +static const float + big = 1.0e37F, + one = 1.0F, + p1 = -3.333185951111688247225368498733544672172e-0001F, + p2 = 1.969352894213455405211341983203180636021e-0001F, + q1 = -3.332921964095646819563419704110132937456e-0001F, + a1 = -3.333323465223893614063523351509338934592e-0001F, + a2 = 1.999425625935277805494082274808174062403e-0001F, + a3 = -1.417547090509737780085769846290301788559e-0001F, + a4 = 1.016250813871991983097273733227432685084e-0001F, + a5 = -5.137023693688358515753093811791755221805e-0002F, + pio2hi = 1.570796371e+0000F, + pio2lo = -4.371139000e-0008F; +/* INDENT ON */ + +float +atanf(float xx) { + float x, y, z, r, p, s; + volatile double dummy; + int ix, iy, sign, j; + + x = xx; + ix = *(int *) &x; + sign = ix & 0x80000000; + ix ^= sign; + + /* for |x| < 1/8 */ + if (ix < 0x3e000000) { + if (ix < 0x38800000) { /* if |x| < 2**(-prec/2-2) */ + dummy = big + x; /* get inexact flag if x!=0 */ +#ifdef lint + dummy = dummy; +#endif + return (x); + } + z = x * x; + if (ix < 0x3c000000) { /* if |x| < 2**(-prec/4-1) */ + x = x + (x * z) * p1; + return (x); + } else { + x = x + (x * z) * (p1 + z * p2); + return (x); + } + } + + /* for |x| >= 8.0 */ + if (ix >= 0x41000000) { + *(int *) &x = ix; + if (ix < 0x42820000) { /* x < 65 */ + r = one / x; + z = r * r; + y = r * (one + z * (p1 + z * p2)); /* poly1 */ + y -= pio2lo; + } else if (ix < 0x44800000) { /* x < 2**(prec/3+2) */ + r = one / x; + z = r * r; + y = r * (one + z * q1); /* poly2 */ + y -= pio2lo; + } else if (ix < 0x4c800000) { /* x < 2**(prec+2) */ + y = one / x - pio2lo; + } else if (ix < 0x7f800000) { /* x < inf */ + y = -pio2lo; + } else { /* x is inf or NaN */ + if (ix > 0x7f800000) { + return (x * x); /* - -> * for Cheetah */ + } + y = -pio2lo; + } + + if (sign == 0) + x = pio2hi - y; + else + x = y - pio2hi; + return (x); + } + + + /* now x is between 1/8 and 8 */ + if (ix < 0x3f000000) { /* between 1/8 and 1/2 */ + z = x * x; + x = x + (x * z) * (a1 + z * (a2 + z * (a3 + z * (a4 + + z * a5)))); + return (x); + } + *(int *) &x = ix; + iy = (ix + 0x00040000) & 0x7ff80000; + *(int *) &y = iy; + j = (iy - 0x3f000000) >> 19; + + if (ix == iy) + p = x - y; /* p=0.0 */ + else { + if (sign == 0) + s = (x - y) / (one + x * y); + else + s = (y - x) / (one + x * y); + z = s * s; + p = s * (one + z * q1); + } + if (sign == 0) { + r = p + _TBL_r_atan_lo[j]; + x = r + _TBL_r_atan_hi[j]; + } else { + r = p - _TBL_r_atan_lo[j]; + x = r - _TBL_r_atan_hi[j]; + } + return (x); +} diff --git a/usr/src/libm/src/R/atanhf.c b/usr/src/libm/src/R/atanhf.c new file mode 100644 index 0000000..7eead06 --- /dev/null +++ b/usr/src/libm/src/R/atanhf.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)atanhf.c 1.9 06/01/23 SMI" + +#pragma weak atanhf = __atanhf + +#include "libm.h" + +static const float zero = 0.0f; + +float +atanhf(float x) { + int ix; + + ix = *((int *)&x) & ~0x80000000; + if (ix > 0x3f800000) /* |x| > 1 or x is nan */ + return ((x * zero) / zero); + if (ix == 0x3f800000) /* |x| == 1 */ + return (x / zero); + return ((float)atanh((double)x)); +} diff --git a/usr/src/libm/src/R/besself.c b/usr/src/libm/src/R/besself.c new file mode 100644 index 0000000..38429c5 --- /dev/null +++ b/usr/src/libm/src/R/besself.c @@ -0,0 +1,806 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)besself.c 1.9 06/01/23 SMI" + +#pragma weak j0f = __j0f +#pragma weak j1f = __j1f +#pragma weak jnf = __jnf +#pragma weak y0f = __y0f +#pragma weak y1f = __y1f +#pragma weak ynf = __ynf + +#include "libm.h" +#include + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float + zerof = 0.0f, + onef = 1.0f; + +static const double C[] = { + 0.0, + -0.125, + 0.25, + 0.375, + 0.5, + 1.0, + 2.0, + 8.0, + 0.5641895835477562869480794515607725858441, /* 1/sqrt(pi) */ + 0.636619772367581343075535053490057448, /* 2/pi */ + 1.0e9, +}; + +#define zero C[0] +#define neighth C[1] +#define quarter C[2] +#define three8 C[3] +#define half C[4] +#define one C[5] +#define two C[6] +#define eight C[7] +#define isqrtpi C[8] +#define tpi C[9] +#define big C[10] + +static const double Cj0y0[] = { + 0.4861344183386052721391238447e5, /* pr */ + 0.1377662549407112278133438945e6, + 0.1222466364088289731869114004e6, + 0.4107070084315176135583353374e5, + 0.5026073801860637125889039915e4, + 0.1783193659125479654541542419e3, + 0.88010344055383421691677564e0, + 0.4861344183386052721414037058e5, /* ps */ + 0.1378196632630384670477582699e6, + 0.1223967185341006542748936787e6, + 0.4120150243795353639995862617e5, + 0.5068271181053546392490184353e4, + 0.1829817905472769960535671664e3, + 1.0, + -0.1731210995701068539185611951e3, /* qr */ + -0.5522559165936166961235240613e3, + -0.5604935606637346590614529613e3, + -0.2200430300226009379477365011e3, + -0.323869355375648849771296746e2, + -0.14294979207907956223499258e1, + -0.834690374102384988158918e-2, + 0.1107975037248683865326709645e5, /* qs */ + 0.3544581680627082674651471873e5, + 0.3619118937918394132179019059e5, + 0.1439895563565398007471485822e5, + 0.2190277023344363955930226234e4, + 0.106695157020407986137501682e3, + 1.0, +}; + +#define pr Cj0y0 +#define ps (Cj0y0+7) +#define qr (Cj0y0+14) +#define qs (Cj0y0+21) + +static const double Cj0[] = { + -2.500000000000003622131880894830476755537e-0001, /* r0 */ + 1.095597547334830263234433855932375353303e-0002, + -1.819734750463320921799187258987098087697e-0004, + 9.977001946806131657544212501069893930846e-0007, + 1.0, /* s0 */ + 1.867609810662950169966782360588199673741e-0002, + 1.590389206181565490878430827706972074208e-0004, + 6.520867386742583632375520147714499522721e-0007, + 9.999999999999999942156495584397047660949e-0001, /* r1 */ + -2.389887722731319130476839836908143731281e-0001, + 1.293359476138939027791270393439493640570e-0002, + -2.770985642343140122168852400228563364082e-0004, + 2.905241575772067678086738389169625218912e-0006, + -1.636846356264052597969042009265043251279e-0008, + 5.072306160724884775085431059052611737827e-0011, + -8.187060730684066824228914775146536139112e-0014, + 5.422219326959949863954297860723723423842e-0017, + 1.0, /* s1 */ + 1.101122772686807702762104741932076228349e-0002, + 6.140169310641649223411427764669143978228e-0005, + 2.292035877515152097976946119293215705250e-0007, + 6.356910426504644334558832036362219583789e-0010, + 1.366626326900219555045096999553948891401e-0012, + 2.280399586866739522891837985560481180088e-0015, + 2.801559820648939665270492520004836611187e-0018, + 2.073101088320349159764410261466350732968e-0021, +}; + +#define r0 Cj0 +#define s0 (Cj0+4) +#define r1 (Cj0+8) +#define s1 (Cj0+17) + +static const double Cy0[] = { + -7.380429510868722526754723020704317641941e-0002, /* u0 */ + 1.772607102684869924301459663049874294814e-0001, + -1.524370666542713828604078090970799356306e-0002, + 4.650819100693891757143771557629924591915e-0004, + -7.125768872339528975036316108718239946022e-0006, + 6.411017001656104598327565004771515257146e-0008, + -3.694275157433032553021246812379258781665e-0010, + 1.434364544206266624252820889648445263842e-0012, + -3.852064731859936455895036286874139896861e-0015, + 7.182052899726138381739945881914874579696e-0018, + -9.060556574619677567323741194079797987200e-0021, + 7.124435467408860515265552217131230511455e-0024, + -2.709726774636397615328813121715432044771e-0027, + 1.0, /* v0 */ + 4.678678931512549002587702477349214886475e-0003, + 9.486828955529948534822800829497565178985e-0006, + 1.001495929158861646659010844136682454906e-0008, + 4.725338116256021660204443235685358593611e-0012, +}; + +#define u0 Cy0 +#define v0 (Cy0+13) + +static const double Cj1y1[] = { + -0.4435757816794127857114720794e7, /* pr0 */ + -0.9942246505077641195658377899e7, + -0.6603373248364939109255245434e7, + -0.1523529351181137383255105722e7, + -0.1098240554345934672737413139e6, + -0.1611616644324610116477412898e4, + -0.4435757816794127856828016962e7, /* ps0 */ + -0.9934124389934585658967556309e7, + -0.6585339479723087072826915069e7, + -0.1511809506634160881644546358e7, + -0.1072638599110382011903063867e6, + -0.1455009440190496182453565068e4, + 0.3322091340985722351859704442e5, /* qr0 */ + 0.8514516067533570196555001171e5, + 0.6617883658127083517939992166e5, + 0.1849426287322386679652009819e5, + 0.1706375429020768002061283546e4, + 0.3526513384663603218592175580e2, + 0.7087128194102874357377502472e6, /* qs0 */ + 0.1819458042243997298924553839e7, + 0.1419460669603720892855755253e7, + 0.4002944358226697511708610813e6, + 0.3789022974577220264142952256e5, + 0.8638367769604990967475517183e3, +}; + +#define pr0 Cj1y1 +#define ps0 (Cj1y1+6) +#define qr0 (Cj1y1+12) +#define qs0 (Cj1y1+18) + +static const double Cj1[] = { + -6.250000000000002203053200981413218949548e-0002, /* a0 */ + 1.600998455640072901321605101981501263762e-0003, + -1.963888815948313758552511884390162864930e-0005, + 8.263917341093549759781339713418201620998e-0008, + 1.0e0, /* b0 */ + 1.605069137643004242395356851797873766927e-0002, + 1.149454623251299996428500249509098499383e-0004, + 3.849701673735260970379681807910852327825e-0007, + 4.999999999999999995517408894340485471724e-0001, + -6.003825028120475684835384519945468075423e-0002, + 2.301719899263321828388344461995355419832e-0003, + -4.208494869238892934859525221654040304068e-0005, + 4.377745135188837783031540029700282443388e-0007, + -2.854106755678624335145364226735677754179e-0009, + 1.234002865443952024332943901323798413689e-0011, + -3.645498437039791058951273508838177134310e-0014, + 7.404320596071797459925377103787837414422e-0017, + -1.009457448277522275262808398517024439084e-0019, + 8.520158355824819796968771418801019930585e-0023, + -3.458159926081163274483854614601091361424e-0026, + 1.0e0, /* b1 */ + 4.923499437590484879081138588998986303306e-0003, + 1.054389489212184156499666953501976688452e-0005, + 1.180768373106166527048240364872043816050e-0008, + 5.942665743476099355323245707680648588540e-0012, +}; + +#define a0 Cj1 +#define b0 (Cj1+4) +#define a1 (Cj1+8) +#define b1 (Cj1+20) + +static const double Cy1[] = { + -1.960570906462389461018983259589655961560e-0001, /* c0 */ + 4.931824118350661953459180060007970291139e-0002, + -1.626975871565393656845930125424683008677e-0003, + 1.359657517926394132692884168082224258360e-0005, + 1.0e0, /* d0 */ + 2.565807214838390835108224713630901653793e-0002, + 3.374175208978404268650522752520906231508e-0004, + 2.840368571306070719539936935220728843177e-0006, + 1.396387402048998277638900944415752207592e-0008, + -1.960570906462389473336339614647555351626e-0001, /* c1 */ + 5.336268030335074494231369159933012844735e-0002, + -2.684137504382748094149184541866332033280e-0003, + 5.737671618979185736981543498580051903060e-0005, + -6.642696350686335339171171785557663224892e-0007, + 4.692417922568160354012347591960362101664e-0009, + -2.161728635907789319335231338621412258355e-0011, + 6.727353419738316107197644431844194668702e-0014, + -1.427502986803861372125234355906790573422e-0016, + 2.020392498726806769468143219616642940371e-0019, + -1.761371948595104156753045457888272716340e-0022, + 7.352828391941157905175042420249225115816e-0026, + 1.0e0, /* d1 */ + 5.029187436727947764916247076102283399442e-0003, + 1.102693095808242775074856548927801750627e-0005, + 1.268035774543174837829534603830227216291e-0008, + 6.579416271766610825192542295821308730206e-0012, +}; + +#define c0 Cy1 +#define d0 (Cy1+4) +#define c1 (Cy1+9) +#define d1 (Cy1+21) + + +/* core of j0f computation; assumes fx is finite */ +static double +__k_j0f(float fx) +{ + double x, z, s, c, ss, cc, r, t, p0, q0; + int ix, i; + + ix = *(int *)&fx & ~0x80000000; + x = fabs((double)fx); + if (ix > 0x41000000) { + /* x > 8; see comments in j0.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + ss = s - c; + cc = -cos(x + x) / ss; + } else { + cc = s + c; + ss = -cos(x + x) / cc; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p0 = one; + q0 = neighth / x; + } else { + t = eight / x; + z = t * t; + p0 = (pr[0] + z * (pr[1] + z * (pr[2] + z * (pr[3] + + z * (pr[4] + z * (pr[5] + z * pr[6])))))) / + (ps[0] + z * (ps[1] + z * (ps[2] + z * (ps[3] + + z * (ps[4] + z * (ps[5] + z)))))); + q0 = ((qr[0] + z * (qr[1] + z * (qr[2] + z * (qr[3] + + z * (qr[4] + z * (qr[5] + z * qr[6])))))) / + (qs[0] + z * (qs[1] + z * (qs[2] + z * (qs[3] + + z * (qs[4] + z * (qs[5] + z))))))) * t; + } + return (isqrtpi * (p0 * cc - q0 * ss) / sqrt(x)); + } + if (ix <= 0x3727c5ac) { + /* x <= 1.0e-5 */ + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + return (one - x); + return (one - x * x * quarter); + } + z = x * x; + if (ix <= 0x3fa3d70a) { + /* x <= 1.28 */ + r = r0[0] + z * (r0[1] + z * (r0[2] + z * r0[3])); + s = s0[0] + z * (s0[1] + z * (s0[2] + z * s0[3])); + return (one + z * (r / s)); + } + r = r1[8]; + s = s1[8]; + for (i = 7; i >= 0; i--) { + r = r * z + r1[i]; + s = s * z + s1[i]; + } + return (r / s); +} + +float +j0f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx & ~0x80000000; + if (ix >= 0x7f800000) { /* nan or inf */ + if (ix > 0x7f800000) + return (fx * fx); + return (zerof); + } + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_j0f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +/* core of y0f computation; assumes fx is finite and positive */ +static double +__k_y0f(float fx) +{ + double x, z, s, c, ss, cc, t, p0, q0, u, v; + int ix, i; + + ix = *(int *)&fx; + x = (double)fx; + if (ix > 0x41000000) { + /* x > 8; see comments in j0.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + ss = s - c; + cc = -cos(x + x) / ss; + } else { + cc = s + c; + ss = -cos(x + x) / cc; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p0 = one; + q0 = neighth / x; + } else { + t = eight / x; + z = t * t; + p0 = (pr[0] + z * (pr[1] + z * (pr[2] + z * (pr[3] + + z * (pr[4] + z * (pr[5] + z * pr[6])))))) / + (ps[0] + z * (ps[1] + z * (ps[2] + z * (ps[3] + + z * (ps[4] + z * (ps[5] + z)))))); + q0 = ((qr[0] + z * (qr[1] + z * (qr[2] + z * (qr[3] + + z * (qr[4] + z * (qr[5] + z * qr[6])))))) / + (qs[0] + z * (qs[1] + z * (qs[2] + z * (qs[3] + + z * (qs[4] + z * (qs[5] + z))))))) * t; + } + return (isqrtpi * (p0 * ss + q0 * cc) / sqrt(x)); + } + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + return (u0[0] + tpi * log(x)); + z = x * x; + u = u0[12]; + for (i = 11; i >= 0; i--) + u = u * z + u0[i]; + v = v0[0] + z * (v0[1] + z * (v0[2] + z * (v0[3] + z * v0[4]))); + return (u / v + tpi * (__k_j0f(fx) * log(x))); +} + +float +y0f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx; + if ((ix & ~0x80000000) > 0x7f800000) /* nan */ + return (fx * fx); + if (ix <= 0) { /* zero or negative */ + if ((ix << 1) == 0) + return (-onef / zerof); + return (zerof / zerof); + } + if (ix == 0x7f800000) /* +inf */ + return (zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_y0f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +/* core of j1f computation; assumes fx is finite */ +static double +__k_j1f(float fx) +{ + double x, z, s, c, ss, cc, r, t, p1, q1; + int i, ix, sgn; + + ix = *(int *)&fx; + sgn = (unsigned)ix >> 31; + ix &= ~0x80000000; + x = fabs((double)fx); + if (ix > 0x41000000) { + /* x > 8; see comments in j1.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + cc = s - c; + ss = cos(x + x) / cc; + } else { + ss = -s - c; + cc = cos(x + x) / ss; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p1 = one; + q1 = three8 / x; + } else { + t = eight / x; + z = t * t; + p1 = (pr0[0] + z * (pr0[1] + z * (pr0[2] + z * + (pr0[3] + z * (pr0[4] + z * pr0[5]))))) / + (ps0[0] + z * (ps0[1] + z * (ps0[2] + z * + (ps0[3] + z * (ps0[4] + z * (ps0[5] + z)))))); + q1 = ((qr0[0] + z * (qr0[1] + z * (qr0[2] + z * + (qr0[3] + z * (qr0[4] + z * qr0[5]))))) / + (qs0[0] + z * (qs0[1] + z * (qs0[2] + z * + (qs0[3] + z * (qs0[4] + z * (qs0[5] + z))))))) * t; + } + t = isqrtpi * (p1 * cc - q1 * ss) / sqrt(x); + return ((sgn)? -t : t); + } + if (ix <= 0x3727c5ac) { + /* x <= 1.0e-5 */ + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + t = half * x; + else + t = x * (half + neighth * x * x); + return ((sgn)? -t : t); + } + z = x * x; + if (ix < 0x3fa3d70a) { + /* x < 1.28 */ + r = a0[0] + z * (a0[1] + z * (a0[2] + z * a0[3])); + s = b0[0] + z * (b0[1] + z * (b0[2] + z * b0[3])); + t = x * half + x * (z * (r / s)); + } else { + r = a1[11]; + for (i = 10; i >= 0; i--) + r = r * z + a1[i]; + s = b1[0] + z * (b1[1] + z * (b1[2] + z * (b1[3] + z * b1[4]))); + t = x * (r / s); + } + return ((sgn)? -t : t); +} + +float +j1f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx & ~0x80000000; + if (ix >= 0x7f800000) /* nan or inf */ + return (onef / fx); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_j1f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +/* core of y1f computation; assumes fx is finite and positive */ +static double +__k_y1f(float fx) +{ + double x, z, s, c, ss, cc, u, v, p1, q1, t; + int i, ix; + + ix = *(int *)&fx; + x = (double)fx; + if (ix > 0x41000000) { + /* x > 8; see comments in j1.c */ + s = sin(x); + c = cos(x); + if (signbit(s) != signbit(c)) { + cc = s - c; + ss = cos(x + x) / cc; + } else { + ss = -s - c; + cc = cos(x + x) / ss; + } + if (ix > 0x501502f9) { + /* x > 1.0e10 */ + p1 = one; + q1 = three8 / x; + } else { + t = eight / x; + z = t * t; + p1 = (pr0[0] + z * (pr0[1] + z * (pr0[2] + z * + (pr0[3] + z * (pr0[4] + z * pr0[5]))))) / + (ps0[0] + z * (ps0[1] + z * (ps0[2] + z * + (ps0[3] + z * (ps0[4] + z * (ps0[5] + z)))))); + q1 = ((qr0[0] + z * (qr0[1] + z * (qr0[2] + z * + (qr0[3] + z * (qr0[4] + z * qr0[5]))))) / + (qs0[0] + z * (qs0[1] + z * (qs0[2] + z * + (qs0[3] + z * (qs0[4] + z * (qs0[5] + z))))))) * t; + } + return (isqrtpi * (p1 * ss + q1 * cc) / sqrt(x)); + } + if (ix <= 0x219392ef) /* x <= 1.0e-18 */ + return (-tpi / x); + z = x * x; + if (ix < 0x3fa3d70a) { + /* x < 1.28 */ + u = c0[0] + z * (c0[1] + z * (c0[2] + z * c0[3])); + v = d0[0] + z * (d0[1] + z * (d0[2] + z * (d0[3] + z * d0[4]))); + } else { + u = c1[11]; + for (i = 10; i >= 0; i--) + u = u * z + c1[i]; + v = d1[0] + z * (d1[1] + z * (d1[2] + z * (d1[3] + z * d1[4]))); + } + return (x * (u / v) + tpi * (__k_j1f(fx) * log(x) - one / x)); +} + +float +y1f(float fx) +{ + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&fx; + if ((ix & ~0x80000000) > 0x7f800000) /* nan */ + return (fx * fx); + if (ix <= 0) { /* zero or negative */ + if ((ix << 1) == 0) + return (-onef / zerof); + return (zerof / zerof); + } + if (ix == 0x7f800000) /* +inf */ + return (zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)__k_y1f(fx); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} + +float +jnf(int n, float fx) +{ + double a, b, temp, x, z, w, t, q0, q1, h; + float f; + int i, ix, sgn, m, k; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + if (n < 0) { + n = -n; + fx = -fx; + } + if (n == 0) + return (j0f(fx)); + if (n == 1) + return (j1f(fx)); + + ix = *(int *)&fx; + sgn = (n & 1)? ((unsigned)ix >> 31) : 0; + ix &= ~0x80000000; + if (ix >= 0x7f800000) { /* nan or inf */ + if (ix > 0x7f800000) + return (fx * fx); + return ((sgn)? -zerof : zerof); + } + if ((ix << 1) == 0) + return ((sgn)? -zerof : zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + fx = fabsf(fx); + x = (double)fx; + if ((double)n <= x) { + /* safe to use J(n+1,x) = 2n/x * J(n,x) - J(n-1,x) */ + a = __k_j0f(fx); + b = __k_j1f(fx); + for (i = 1; i < n; i++) { + temp = b; + b = b * ((double)(i + i) / x) - a; + a = temp; + } + f = (float)b; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((sgn)? -f : f); + } + if (ix < 0x3089705f) { + /* x < 1.0e-9; use J(n,x) = 1/n! * (x / 2)^n */ + if (n > 6) + n = 6; /* result underflows to zero for n >= 6 */ + b = t = half * x; + a = one; + for (i = 2; i <= n; i++) { + b *= t; + a *= (double)i; + } + b /= a; + } else { + /* + * Use the backward recurrence: + * + * x x^2 x^2 + * J(n,x)/J(n-1,x) = ---- - ------ - ------ ..... + * 2n 2(n+1) 2(n+2) + * + * Let w = 2n/x and h = 2/x. Then the above quotient + * is equal to the continued fraction: + * 1 + * = ----------------------- + * 1 + * w - ----------------- + * 1 + * w+h - --------- + * w+2h - ... + * + * To determine how many terms are needed, run the + * recurrence + * + * Q(0) = w, + * Q(1) = w(w+h) - 1, + * Q(k) = (w+k*h)*Q(k-1) - Q(k-2). + * + * Then when Q(k) > 1e4, k is large enough for single + * precision. + */ +/* XXX NOT DONE - rework this */ + w = (n + n) / x; + h = two / x; + q0 = w; + z = w + h; + q1 = w * z - one; + k = 1; + while (q1 < big) { + k++; + z += h; + temp = z * q1 - q0; + q0 = q1; + q1 = temp; + } + m = n + n; + t = zero; + for (i = (n + k) << 1; i >= m; i -= 2) + t = one / ((double)i / x - t); + a = t; + b = one; + /* + * estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) + * hence, if n*(log(2n/x)) > ... + * single 8.8722839355e+01 + * double 7.09782712893383973096e+02 + * then recurrent value may overflow and the result is + * likely underflow to zero + */ + temp = (double)n; + temp *= log((two / x) * temp); + if (temp < 7.09782712893383973096e+02) { + for (i = n - 1; i > 0; i--) { + temp = b; + b = b * ((double)(i + i) / x) - a; + a = temp; + } + } else { + for (i = n - 1; i > 0; i--) { + temp = b; + b = b * ((double)(i + i) / x) - a; + a = temp; + if (b > 1.0e100) { + a /= b; + t /= b; + b = one; + } + } + } + b = (t * __k_j0f(fx) / b); + } + f = (float)b; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((sgn)? -f : f); +} + +float +ynf(int n, float fx) +{ + double a, b, temp, x; + float f; + int i, sign, ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + sign = 0; + if (n < 0) { + n = -n; + if (n & 1) + sign = 1; + } + if (n == 0) + return (y0f(fx)); + if (n == 1) + return ((sign)? -y1f(fx) : y1f(fx)); + + ix = *(int *)&fx; + if ((ix & ~0x80000000) > 0x7f800000) /* nan */ + return (fx * fx); + if (ix <= 0) { /* zero or negative */ + if ((ix << 1) == 0) + return (-onef / zerof); + return (zerof / zerof); + } + if (ix == 0x7f800000) /* +inf */ + return (zerof); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + a = __k_y0f(fx); + b = __k_y1f(fx); + x = (double)fx; + for (i = 1; i < n; i++) { + temp = b; + b *= (double)(i + i) / x; + if (b <= -DBL_MAX) + break; + b -= a; + a = temp; + } + f = (float)b; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return ((sign)? -f : f); +} diff --git a/usr/src/libm/src/R/cbrtf.c b/usr/src/libm/src/R/cbrtf.c new file mode 100644 index 0000000..f5387dd --- /dev/null +++ b/usr/src/libm/src/R/cbrtf.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cbrtf.c 1.8 06/01/31 SMI" + +#pragma weak cbrtf = __cbrtf + +#include "libm.h" + +float +cbrtf(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) cbrt((double) x)); +} diff --git a/usr/src/libm/src/R/copysignf.c b/usr/src/libm/src/R/copysignf.c new file mode 100644 index 0000000..85835ff --- /dev/null +++ b/usr/src/libm/src/R/copysignf.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)copysignf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak copysignf = __copysignf +#endif + +#include "libm.h" + +float +copysignf(float x, float y) { + float w; + + *(int *) &w = (*(int *) &x & ~0x80000000) | (*(int *) &y & 0x80000000); + return (w); +} diff --git a/usr/src/libm/src/R/cosf.c b/usr/src/libm/src/R/cosf.c new file mode 100644 index 0000000..245ed10 --- /dev/null +++ b/usr/src/libm/src/R/cosf.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cosf.c 1.11 06/01/23 SMI" + +#pragma weak cosf = __cosf + +/* + * See sincosf.c + */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.85735322054308378716204874632872525989806770558e-0003, + -1.95035094218403635082921458859320791358115801259e-0004, + 5.38400550766074785970952495168558701485841707252e+0002, + -3.31975110777873728964197739157371509422022905947e+0001, + 1.09349482127188401868272000389539985058873853699e-0003, + -5.03324285989964979398034700054920226866107675091e-0004, + 2.43792880266971107750418061559602239831538067410e-0005, + 9.14499072605666582228127405245558035523741471271e+0002, + -3.63151270591815439197122504991683846785293207730e+0001, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define S3 C[3] +#define C0 C[4] +#define C1 C[5] +#define C2 C[6] +#define C3 C[7] +#define C4 C[8] +#define invpio2 C[9] +#define half C[10] +#define pio2_1 C[11] +#define pio2_t C[12] + +float +cosf(float x) +{ + double y, z, w; + float f; + int n, ix, hx, hy; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix <= 0x39800000) { /* |x| <= 2**-12 */ + volatile int i = (int)y; +#ifdef lint + i = i; +#endif + return (1.0f); + } + z = y * y; + return ((float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z)))); + } else if (hx > 0) { + y = (y - pio2_1) - pio2_t; + z = y * y; + return ((float)-((y * (S0 + z * S1)) * + (S2 + z * (S3 + z)))); + } else { + y = (y + pio2_1) + pio2_t; + z = y * y; + return ((float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z)))); + } + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; + n++; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) + return (x / x); /* cos(Inf or NaN) is NaN */ + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf) + 1; + } + + if (n & 1) { + /* compute cos y */ + z = y * y; + f = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + } else { + /* compute sin y */ + z = y * y; + f = (float)((y * (S0 + z * S1)) * (S2 + z * (S3 + z))); + } + + return ((n & 2)? -f : f); +} diff --git a/usr/src/libm/src/R/coshf.c b/usr/src/libm/src/R/coshf.c new file mode 100644 index 0000000..5f974a4 --- /dev/null +++ b/usr/src/libm/src/R/coshf.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)coshf.c 1.11 06/01/23 SMI" + +#pragma weak coshf = __coshf + +#include "libm.h" + +float +coshf(float x) { + double c; + float w; + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix >= 0x7f800000) { + /* coshf(x) is |x| if x is +-Inf or NaN */ + return (x * x); + } + if (ix >= 0x43000000) /* coshf(x) trivially overflows */ + c = 1.0e100; + else + c = cosh((double)x); + w = (float)c; + return (w); +} diff --git a/usr/src/libm/src/R/erff.c b/usr/src/libm/src/R/erff.c new file mode 100644 index 0000000..862d21b --- /dev/null +++ b/usr/src/libm/src/R/erff.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)erff.c 1.8 06/01/23 SMI" + +#pragma weak erff = __erff +#pragma weak erfcf = __erfcf + +#include "libm.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +float +erff(float x) { + int ix; + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x7f800000) /* x is NaN */ + return (x * x); + return ((float)erf((double)x)); +} + +float +erfcf(float x) { + float f; + int ix; +#if defined(__i386) && !defined(__amd64) + int rp; +#endif + + ix = *(int *)&x & ~0x80000000; + if (ix > 0x7f800000) /* x is NaN */ + return (x * x); + +#if defined(__i386) && !defined(__amd64) + rp = __swapRP(fp_extended); +#endif + f = (float)erfc((double)x); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (f); +} diff --git a/usr/src/libm/src/R/exp10f.c b/usr/src/libm/src/R/exp10f.c new file mode 100644 index 0000000..3614748 --- /dev/null +++ b/usr/src/libm/src/R/exp10f.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp10f.c 1.8 06/01/31 SMI" + +#pragma weak exp10f = __exp10f + +#include "libm.h" + +float +exp10f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) exp10((double) x)); +} diff --git a/usr/src/libm/src/R/exp2f.c b/usr/src/libm/src/R/exp2f.c new file mode 100644 index 0000000..01a36a0 --- /dev/null +++ b/usr/src/libm/src/R/exp2f.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)exp2f.c 1.9 06/01/31 SMI" + +#pragma weak exp2f = __exp2f + +#include "libm.h" + +float +exp2f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) exp2((double) x)); +} diff --git a/usr/src/libm/src/R/expf.c b/usr/src/libm/src/R/expf.c new file mode 100644 index 0000000..c1b876c --- /dev/null +++ b/usr/src/libm/src/R/expf.c @@ -0,0 +1,400 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)expf.c 1.14 06/01/23 SMI" + +#pragma weak expf = __expf + +/* INDENT OFF */ +/* + * float expf(float x); + * Code by K.C. Ng for SUN 5.0 libmopt + * 11/5/99 + * Method : + * 1. For |x| >= 2^7, either underflow/overflow. + * More precisely: + * x > 88.722839355...(0x42B17218) => overflow; + * x < -103.97207642..(0xc2CFF1B4) => underflow. + * 2. For |x| < 2^-6, use polynomail + * exp(x) = 1 + x + p1*x^2 + p2*x^3 + * 3. Otherwise, write |x|=(1+r)*2^n, where 0<=r<1. + * Let t = 2^n * (1+r) .... x > 0; + * t = 2^n * (1-r) .... x < 0. (x= -2**(n+1)+t) + * Since -6 <= n <= 6, we may break t into + * six 6-bits chunks: + * -5 -11 -17 -23 -29 + * t=j *2+j *2 +j *2 +j *2 +j *2 +j *2 + * 1 2 3 4 5 6 + * + * where 0 <= j < 64 for i = 1,...,6. + * i + * Note that since t has only 24 significant bits, + * either j or j must be 0. + * 1 6 + * 7-6i + * One may define j by (int) ( t * 2 ) mod 64 + * i + * mathematically. In actual implementation, they can + * be obtained by manipulating the exponent and + * mantissa bits as follow: + * Let ix = (HEX(x)&0x007fffff)|0x00800000. + * If n>=0, let ix=ix<>(30-6i)) mod 64 ...i=1,...,5 + * i + * Otherwise, let ix=ix<<(j+6), then j = 0 and + * 1 + * j = ix>>(36-6i)) mod 64 ...i=2,...,6 + * i + * + * 4. Compute exp(t) by table look-up method. + * Precompute ET[k] = exp(j*2^(7-6i)), k=j+64*(6-i). + * Then + * exp(t) = ET[j +320]*ET[j +256]*ET[j +192]* + * 1 2 3 + * + * ET[j +128]*ET[j +64]*ET[j ] + * 4 5 6 + * + * n+1 + * 5. If x < 0, return exp(-2 )* exp(t). Note that + * -6 <= n <= 6. Let k = n - 6, then we can + * precompute + * k-5 n+1 + * EN[k] = exp(-2 ) = exp(-2 ) for k=0,1,...,12. + * + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) = 0; + * for finite argument, only exp(0) = 1 is exact. + * + * Accuracy: + * All calculations are done in double precision except for + * the case |x| < 2^-6. When |x| < 2^-6, the error is less + * than 0.55 ulp. When |x| >= 2^-6 and the result is normal, + * the error is less than 0.51 ulp. When FDTOS_TRAPS_... is + * defined and the result is subnormal, the error can be as + * large as 0.75 ulp. + */ +/* INDENT ON */ + +#include "libm.h" + +/* + * ET[k] = exp(j*2^(7-6i)) , where j = k mod 64, i = k/64 + */ +static const double ET[] = { + 1.00000000000000000000e+00, 1.00000000186264514923e+00, + 1.00000000372529029846e+00, 1.00000000558793544769e+00, + 1.00000000745058059692e+00, 1.00000000931322574615e+00, + 1.00000001117587089539e+00, 1.00000001303851604462e+00, + 1.00000001490116119385e+00, 1.00000001676380656512e+00, + 1.00000001862645171435e+00, 1.00000002048909686359e+00, + 1.00000002235174201282e+00, 1.00000002421438716205e+00, + 1.00000002607703253332e+00, 1.00000002793967768255e+00, + 1.00000002980232283178e+00, 1.00000003166496798102e+00, + 1.00000003352761335229e+00, 1.00000003539025850152e+00, + 1.00000003725290365075e+00, 1.00000003911554879998e+00, + 1.00000004097819417126e+00, 1.00000004284083932049e+00, + 1.00000004470348446972e+00, 1.00000004656612984100e+00, + 1.00000004842877499023e+00, 1.00000005029142036150e+00, + 1.00000005215406551073e+00, 1.00000005401671088201e+00, + 1.00000005587935603124e+00, 1.00000005774200140252e+00, + 1.00000005960464655175e+00, 1.00000006146729192302e+00, + 1.00000006332993707225e+00, 1.00000006519258244353e+00, + 1.00000006705522759276e+00, 1.00000006891787296404e+00, + 1.00000007078051811327e+00, 1.00000007264316348454e+00, + 1.00000007450580863377e+00, 1.00000007636845400505e+00, + 1.00000007823109937632e+00, 1.00000008009374452556e+00, + 1.00000008195638989683e+00, 1.00000008381903526811e+00, + 1.00000008568168063938e+00, 1.00000008754432578861e+00, + 1.00000008940697115989e+00, 1.00000009126961653116e+00, + 1.00000009313226190244e+00, 1.00000009499490705167e+00, + 1.00000009685755242295e+00, 1.00000009872019779422e+00, + 1.00000010058284316550e+00, 1.00000010244548853677e+00, + 1.00000010430813368600e+00, 1.00000010617077905728e+00, + 1.00000010803342442856e+00, 1.00000010989606979983e+00, + 1.00000011175871517111e+00, 1.00000011362136054238e+00, + 1.00000011548400591366e+00, 1.00000011734665128493e+00, + 1.00000000000000000000e+00, 1.00000011920929665621e+00, + 1.00000023841860752327e+00, 1.00000035762793260119e+00, + 1.00000047683727188996e+00, 1.00000059604662538959e+00, + 1.00000071525599310007e+00, 1.00000083446537502141e+00, + 1.00000095367477115360e+00, 1.00000107288418149665e+00, + 1.00000119209360605055e+00, 1.00000131130304481530e+00, + 1.00000143051249779091e+00, 1.00000154972196497738e+00, + 1.00000166893144637470e+00, 1.00000178814094198287e+00, + 1.00000190735045180190e+00, 1.00000202655997583179e+00, + 1.00000214576951407253e+00, 1.00000226497906652412e+00, + 1.00000238418863318657e+00, 1.00000250339821405987e+00, + 1.00000262260780914403e+00, 1.00000274181741843904e+00, + 1.00000286102704194491e+00, 1.00000298023667966163e+00, + 1.00000309944633158921e+00, 1.00000321865599772764e+00, + 1.00000333786567807692e+00, 1.00000345707537263706e+00, + 1.00000357628508140806e+00, 1.00000369549480438991e+00, + 1.00000381470454158261e+00, 1.00000393391429298617e+00, + 1.00000405312405860059e+00, 1.00000417233383842586e+00, + 1.00000429154363246198e+00, 1.00000441075344070896e+00, + 1.00000452996326316679e+00, 1.00000464917309983548e+00, + 1.00000476838295071502e+00, 1.00000488759281580542e+00, + 1.00000500680269510667e+00, 1.00000512601258861878e+00, + 1.00000524522249634174e+00, 1.00000536443241827556e+00, + 1.00000548364235442023e+00, 1.00000560285230477575e+00, + 1.00000572206226934213e+00, 1.00000584127224811937e+00, + 1.00000596048224110746e+00, 1.00000607969224830640e+00, + 1.00000619890226971620e+00, 1.00000631811230533685e+00, + 1.00000643732235516836e+00, 1.00000655653241921073e+00, + 1.00000667574249746394e+00, 1.00000679495258992802e+00, + 1.00000691416269660294e+00, 1.00000703337281748873e+00, + 1.00000715258295258536e+00, 1.00000727179310189285e+00, + 1.00000739100326541120e+00, 1.00000751021344314040e+00, + 1.00000000000000000000e+00, 1.00000762942363508046e+00, + 1.00001525890547848796e+00, 1.00002288844553022251e+00, + 1.00003051804379095024e+00, 1.00003814770026133729e+00, + 1.00004577741494138365e+00, 1.00005340718783175546e+00, + 1.00006103701893311886e+00, 1.00006866690824547383e+00, + 1.00007629685576948653e+00, 1.00008392686150582307e+00, + 1.00009155692545448346e+00, 1.00009918704761613384e+00, + 1.00010681722799144033e+00, 1.00011444746658040295e+00, + 1.00012207776338368781e+00, 1.00012970811840196106e+00, + 1.00013733853163522269e+00, 1.00014496900308413885e+00, + 1.00015259953274937565e+00, 1.00016023012063093311e+00, + 1.00016786076672947736e+00, 1.00017549147104567453e+00, + 1.00018312223357952462e+00, 1.00019075305433191581e+00, + 1.00019838393330284809e+00, 1.00020601487049298761e+00, + 1.00021364586590300050e+00, 1.00022127691953288675e+00, + 1.00022890803138353455e+00, 1.00023653920145494389e+00, + 1.00024417042974778091e+00, 1.00025180171626271175e+00, + 1.00025943306099973640e+00, 1.00026706446395974304e+00, + 1.00027469592514273167e+00, 1.00028232744454959047e+00, + 1.00028995902218031944e+00, 1.00029759065803558471e+00, + 1.00030522235211605242e+00, 1.00031285410442172257e+00, + 1.00032048591495348333e+00, 1.00032811778371155675e+00, + 1.00033574971069616488e+00, 1.00034338169590819589e+00, + 1.00035101373934764979e+00, 1.00035864584101541475e+00, + 1.00036627800091149076e+00, 1.00037391021903676602e+00, + 1.00038154249539146257e+00, 1.00038917482997580244e+00, + 1.00039680722279067382e+00, 1.00040443967383629875e+00, + 1.00041207218311289928e+00, 1.00041970475062136359e+00, + 1.00042733737636191371e+00, 1.00043497006033499375e+00, + 1.00044260280254104778e+00, 1.00045023560298029786e+00, + 1.00045786846165363215e+00, 1.00046550137856127272e+00, + 1.00047313435370366363e+00, 1.00048076738708124900e+00, + 1.00000000000000000000e+00, 1.00048840047869447289e+00, + 1.00097703949241645383e+00, 1.00146591715766675179e+00, + 1.00195503359100279717e+00, 1.00244438890903908579e+00, + 1.00293398322844673487e+00, 1.00342381666595459322e+00, + 1.00391388933834746489e+00, 1.00440420136246855165e+00, + 1.00489475285521656645e+00, 1.00538554393354861993e+00, + 1.00587657471447822211e+00, 1.00636784531507639251e+00, + 1.00685935585247099411e+00, 1.00735110644384739942e+00, + 1.00784309720644804642e+00, 1.00833532825757243856e+00, + 1.00882779971457803292e+00, 1.00932051169487890796e+00, + 1.00981346431594687374e+00, 1.01030665769531102782e+00, + 1.01080009195055753324e+00, 1.01129376719933050666e+00, + 1.01178768355933157430e+00, 1.01228184114831898377e+00, + 1.01277624008410960244e+00, 1.01327088048457714109e+00, + 1.01376576246765282008e+00, 1.01426088615132625748e+00, + 1.01475625165364347069e+00, 1.01525185909270931894e+00, + 1.01574770858668572693e+00, 1.01624380025379235093e+00, + 1.01674013421230657883e+00, 1.01723671058056375216e+00, + 1.01773352947695694404e+00, 1.01823059101993673714e+00, + 1.01872789532801233392e+00, 1.01922544251975000229e+00, + 1.01972323271377418585e+00, 1.02022126602876750390e+00, + 1.02071954258347008526e+00, 1.02121806249668067856e+00, + 1.02171682588725554197e+00, 1.02221583287410910934e+00, + 1.02271508357621376817e+00, 1.02321457811260052573e+00, + 1.02371431660235789884e+00, 1.02421429916463280207e+00, + 1.02471452591863054771e+00, 1.02521499698361440167e+00, + 1.02571571247890602763e+00, 1.02621667252388526492e+00, + 1.02671787723799012859e+00, 1.02721932674071725344e+00, + 1.02772102115162167202e+00, 1.02822296059031659254e+00, + 1.02872514517647339893e+00, 1.02922757502982276101e+00, + 1.02973025027015285815e+00, 1.03023317101731093359e+00, + 1.03073633739120262831e+00, 1.03123974951179242510e+00, + 1.00000000000000000000e+00, 1.03174340749910276038e+00, + 1.06449445891785954288e+00, 1.09828514030782575794e+00, + 1.13314845306682632220e+00, 1.16911844616950433284e+00, + 1.20623024942098067136e+00, 1.24452010776609522935e+00, + 1.28402541668774139438e+00, 1.32478475872886569675e+00, + 1.36683794117379631139e+00, 1.41022603492571074746e+00, + 1.45499141461820125087e+00, 1.50117780000012279729e+00, + 1.54883029863413312910e+00, 1.59799544995063325104e+00, + 1.64872127070012819416e+00, 1.70105730184840076014e+00, + 1.75505465696029849809e+00, 1.81076607211938722664e+00, + 1.86824595743222232613e+00, 1.92755045016754467113e+00, + 1.98873746958229191684e+00, 2.05186677348797674725e+00, + 2.11700001661267478426e+00, 2.18420081081561789915e+00, + 2.25353478721320854561e+00, 2.32506966027712103084e+00, + 2.39887529396709808793e+00, 2.47502376996302508871e+00, + 2.55358945806292680913e+00, 2.63464908881563086851e+00, + 2.71828182845904553488e+00, 2.80456935623722669604e+00, + 2.89359594417176113623e+00, 2.98544853936535581340e+00, + 3.08021684891803104733e+00, 3.17799342753883840018e+00, + 3.27887376793867346692e+00, 3.38295639409246895468e+00, + 3.49034295746184142217e+00, 3.60113833627217561073e+00, + 3.71545073794110392029e+00, 3.83339180475841034834e+00, + 3.95507672292057721464e+00, 4.08062433502646015882e+00, + 4.21015725614395996956e+00, 4.34380199356104235164e+00, + 4.48168907033806451778e+00, 4.62395315278208052234e+00, + 4.77073318196760265408e+00, 4.92217250943229078786e+00, + 5.07841903718008147450e+00, 5.23962536212848917216e+00, + 5.40594892514116676097e+00, 5.57755216479125959239e+00, + 5.75460267600573072144e+00, 5.93727337374560715233e+00, + 6.12574266188198635064e+00, 6.32019460743274397174e+00, + 6.52081912033011246166e+00, 6.72781213889469142941e+00, + 6.94137582119703555605e+00, 7.16171874249371143151e+00, + 1.00000000000000000000e+00, 7.38905609893065040694e+00, + 5.45981500331442362040e+01, 4.03428793492735110249e+02, + 2.98095798704172830185e+03, 2.20264657948067178950e+04, + 1.62754791419003915507e+05, 1.20260428416477679275e+06, + 8.88611052050787210464e+06, 6.56599691373305097222e+07, + 4.85165195409790277481e+08, 3.58491284613159179688e+09, + 2.64891221298434715271e+10, 1.95729609428838775635e+11, + 1.44625706429147509766e+12, 1.06864745815244628906e+13, + 7.89629601826806875000e+13, 5.83461742527454875000e+14, + 4.31123154711519500000e+15, 3.18559317571137560000e+16, + 2.35385266837020000000e+17, 1.73927494152050099200e+18, + 1.28516001143593082880e+19, 9.49611942060244828160e+19, + 7.01673591209763143680e+20, 5.18470552858707204506e+21, + 3.83100800071657691546e+22, 2.83075330327469394756e+23, + 2.09165949601299610311e+24, 1.54553893559010391826e+25, + 1.14200738981568423454e+26, 8.43835666874145383188e+26, + 6.23514908081161674391e+27, 4.60718663433129178064e+28, + 3.40427604993174075827e+29, 2.51543867091916687979e+30, + 1.85867174528412788702e+31, 1.37338297954017610775e+32, + 1.01480038811388874615e+33, 7.49841699699012090701e+33, + 5.54062238439350983445e+34, 4.09399696212745451138e+35, + 3.02507732220114256223e+36, 2.23524660373471497416e+37, + 1.65163625499400180987e+38, 1.22040329431784083418e+39, + 9.01762840503429851945e+39, 6.66317621641089618500e+40, + 4.92345828601205826106e+41, 3.63797094760880474988e+42, + 2.68811714181613560943e+43, 1.98626483613765434356e+44, + 1.46766223015544238535e+45, 1.08446385529002313207e+46, + 8.01316426400059069850e+46, 5.92097202766466993617e+47, + 4.37503944726134096988e+48, 3.23274119108485947460e+49, + 2.38869060142499127023e+50, 1.76501688569176554670e+51, + 1.30418087839363225614e+52, 9.63666567360320166416e+52, + 7.12058632688933793173e+53, 5.26144118266638596909e+54, +}; + +/* + * EN[k] = exp(-2^(k-5)) + */ +static const double EN[] = { + 9.69233234476344129860e-01, 9.39413062813475807644e-01, + 8.82496902584595455110e-01, 7.78800783071404878477e-01, + 6.06530659712633424263e-01, 3.67879441171442334024e-01, + 1.35335283236612702318e-01, 1.83156388887341786686e-02, + 3.35462627902511853224e-04, 1.12535174719259116458e-07, + 1.26641655490941755372e-14, 1.60381089054863792659e-28, +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) + 2.96555550007072683578e-38, /* exp(-128) scaled up by 2^60 */ +#else + 2.57220937264241481170e-56, +#endif +}; + +static const float F[] = { + 0.0f, + 1.0f, + 5.0000000951292138e-01F, + 1.6666518897347284e-01F, + 3.4028234663852885981170E+38F, + 1.1754943508222875079688E-38F, +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) + 8.67361737988403547205962240695953369140625e-19F +#endif +}; + +#define zero F[0] +#define one F[1] +#define p1 F[2] +#define p2 F[3] +#define big F[4] +#define tiny F[5] +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) +#define twom60 F[6] +#endif + +float +expf(float xf) { + double w, p, q; + int hx, ix, n; + + hx = *(int *)&xf; + ix = hx & ~0x80000000; + + if (ix < 0x3c800000) { /* |x| < 2**-6 */ + if (ix < 0x38800000) /* |x| < 2**-14 */ + return (one + xf); + return (one + (xf + (xf * xf) * (p1 + xf * p2))); + } + + n = ix >> 23; /* biased exponent */ + + if (n >= 0x86) { /* |x| >= 2^7 */ + if (n >= 0xff) { /* x is nan of +-inf */ + if (hx == 0xff800000) + return (zero); /* exp(-inf)=0 */ + return (xf * xf); /* exp(nan/inf) is nan or inf */ + } + if (hx > 0) + return (big * big); /* overflow */ + else + return (tiny * tiny); /* underflow */ + } + + ix -= n << 23; + if (hx > 0) + ix += 0x800000; + else + ix = 0x800000 - ix; + if (n >= 0x7f) { /* n >= 0 */ + ix <<= n - 0x7f; + w = ET[(ix & 0x3f) + 64] * ET[((ix >> 6) & 0x3f) + 128]; + p = ET[((ix >> 12) & 0x3f) + 192] * + ET[((ix >> 18) & 0x3f) + 256]; + q = ET[((ix >> 24) & 0x3f) + 320]; + } else { + ix <<= n - 0x79; + w = ET[ix & 0x3f] * ET[((ix >> 6) & 0x3f) + 64]; + p = ET[((ix >> 12) & 0x3f) + 128] * + ET[((ix >> 18) & 0x3f) + 192]; + q = ET[((ix >> 24) & 0x3f) + 256]; + } + xf = (float)((w * p) * (hx < 0 ? q * EN[n - 0x79] : q)); +#if defined(FDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE) + if ((unsigned)hx >= 0xc2800000u) { + if ((unsigned)hx >= 0xc2aeac50) { /* force underflow */ + volatile float t = tiny; + t *= t; + } + return (xf * twom60); + } +#endif + return (xf); +} diff --git a/usr/src/libm/src/R/expm1f.c b/usr/src/libm/src/R/expm1f.c new file mode 100644 index 0000000..2ea051c --- /dev/null +++ b/usr/src/libm/src/R/expm1f.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)expm1f.c 1.8 06/01/31 SMI" + +#pragma weak expm1f = __expm1f + +#include "libm.h" + +float +expm1f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) expm1((double) x)); +} diff --git a/usr/src/libm/src/R/fabsf.c b/usr/src/libm/src/R/fabsf.c new file mode 100644 index 0000000..e6de825 --- /dev/null +++ b/usr/src/libm/src/R/fabsf.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fabsf.c 1.7 06/01/31 SMI" + +#pragma weak fabsf = __fabsf + +#include "libm.h" + +float +fabsf(float x) { + *(int *) &x &= ~0x80000000; + return (x); +} diff --git a/usr/src/libm/src/R/floorf.c b/usr/src/libm/src/R/floorf.c new file mode 100644 index 0000000..b9c13b2 --- /dev/null +++ b/usr/src/libm/src/R/floorf.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)floorf.c 1.11 06/01/31 SMI" + +#pragma weak ceilf = __ceilf +#pragma weak floorf = __floorf + +/* INDENT OFF */ +/* + * ceilf(x) return the biggest integral value (in float) below x + * floorf(x) return the least integral value (in float) above x + * + * NOTE: ceilf(x) and floorf(x) return result + * with the same sign as x's, including 0.0F. + */ + +#include "libm.h" + +static const float xf[] = { +/* ZEROF */ 0.0f, +/* ONEF */ 1.0f, +/* MONEF */ -1.0f, +/* HUGEF */ 1.0e30f, +}; + +#define ZEROF xf[0] +#define ONEF xf[1] +#define MONEF xf[2] +#define HUGEF xf[3] +/* INDENT ON */ + +float +ceilf(float x) { + volatile float dummy; + int hx, k, j, ix; + + hx = *(int *) &x; + ix = hx & ~0x80000000; + k = ix >> 23; + if (((k - 127) ^ (k - 150)) < 0) { + k = (1 << (150 - k)) - 1; + if ((k & hx) != 0) + dummy = HUGEF + x; /* raise inexact */ + j = k & (~(hx >> 31)); + *(int *) &x = (hx + j) & ~k; + return (x); + } else if (k <= 126) { + dummy = HUGEF + x; + if (hx > 0) + return (ONEF); + else if (ix == 0) + return (x); + else + return (-ZEROF); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} + +float +floorf(float x) { + volatile float dummy; + int hx, k, j, ix; + + hx = *(int *) &x; + ix = hx & ~0x80000000; + k = ix >> 23; + if (((k - 127) ^ (k - 150)) < 0) { + k = (1 << (150 - k)) - 1; + if ((k & hx) != 0) + dummy = HUGEF + x; /* raise inexact */ + j = k & (hx >> 31); + *(int *) &x = (hx + j) & ~k; + return (x); + } else if (k <= 126) { + dummy = HUGEF + x; + if (hx > 0) + return (ZEROF); + else if (ix == 0) + return (x); + else + return (MONEF); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} diff --git a/usr/src/libm/src/R/fmodf.c b/usr/src/libm/src/R/fmodf.c new file mode 100644 index 0000000..c78df05 --- /dev/null +++ b/usr/src/libm/src/R/fmodf.c @@ -0,0 +1,175 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmodf.c 1.12 06/01/23 SMI" + +#pragma weak fmodf = __fmodf + +#include "libm.h" + +/* INDENT OFF */ +static const int + is = (int)0x80000000, + im = 0x007fffff, + ii = 0x7f800000, + iu = 0x00800000; +/* INDENT ON */ + +static const float zero = 0.0; + +float +fmodf(float x, float y) { + float w; + int hx, ix, iy, iz, k, ny, nd; + + hx = *(int *)&x; + ix = hx & 0x7fffffff; + iy = *(int *)&y & 0x7fffffff; + + /* purge off exception values */ + if (ix >= ii || iy > ii || iy == 0) { + w = x * y; + w = w / w; + } else if (ix <= iy) { + if (ix < iy) + w = x; /* return x if |x|<|y| */ + else + w = zero * x; /* return sign(x)*0.0 */ + } else { + /* INDENT OFF */ + /* + * scale x,y to "normal" with + * ny = exponent of y + * nd = exponent of x minus exponent of y + */ + /* INDENT ON */ + ny = iy >> 23; + k = ix >> 23; + + /* special case for subnormal y or x */ + if (ny == 0) { + ny = 1; + while (iy < iu) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < iu) { + nd -= 1; + ix += ix; + } + } else { + ix = iu | (ix & im); + } + } else { + nd = k - ny; + ix = iu | (ix & im); + iy = iu | (iy & im); + } + + /* fix point fmod for normalized ix and iy */ + /* INDENT OFF */ + /* + * while (nd--) { + * iz = ix - iy; + * if (iz < 0) + * ix = ix + ix; + * else if (iz == 0) { + * *(int *) &w = is & hx; + * return w; + * } + * else + * ix = iz + iz; + * } + */ + /* INDENT ON */ + /* unroll the above loop 4 times to gain performance */ + k = nd >> 2; + nd -= k << 2; + while (k--) { + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + if (iz == 0) { + *(int *)&w = is & hx; + return (w); + } + } + while (nd--) { + iz = ix - iy; + if (iz >= 0) + ix = iz + iz; + else + ix += ix; + } + /* end of unrolling */ + + iz = ix - iy; + if (iz >= 0) + ix = iz; + + /* convert back to floating value and restore the sign */ + if (ix == 0) { + *(int *)&w = is & hx; + return (w); + } + while (ix < iu) { + ix += ix; + ny -= 1; + } + while (ix > (iu + iu)) { + ny += 1; + ix >>= 1; + } + if (ny > 0) { + *(int *)&w = (is & hx) | (ix & im) | (ny << 23); + } else { + /* subnormal output */ + k = -ny + 1; + ix >>= k; + *(int *)&w = (is & hx) | ix; + } + } + return (w); +} diff --git a/usr/src/libm/src/R/gammaf.c b/usr/src/libm/src/R/gammaf.c new file mode 100644 index 0000000..7a9e1f2 --- /dev/null +++ b/usr/src/libm/src/R/gammaf.c @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gammaf.c 1.11 06/01/23 SMI" + +#pragma weak gammaf = __gammaf + +#include "libm.h" + +float +gammaf(float x) { + return (lgammaf(x)); +} diff --git a/usr/src/libm/src/R/gammaf_r.c b/usr/src/libm/src/R/gammaf_r.c new file mode 100644 index 0000000..c0722a8 --- /dev/null +++ b/usr/src/libm/src/R/gammaf_r.c @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)gammaf_r.c 1.6 06/01/23 SMI" + +#pragma weak gammaf_r = __gammaf_r + +#include "libm.h" + +float +gammaf_r(float x, int *signgamfp) { + return (lgammaf_r(x, signgamfp)); +} diff --git a/usr/src/libm/src/R/hypotf.c b/usr/src/libm/src/R/hypotf.c new file mode 100644 index 0000000..8c1f23e --- /dev/null +++ b/usr/src/libm/src/R/hypotf.c @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)hypotf.c 1.8 06/01/31 SMI" + +#pragma weak hypotf = __hypotf + +#include "libm.h" + +float +hypotf(float x, float y) { + double dx, dy; + float w; + int ix, iy; + + ix = (*(int *) &x) & 0x7fffffff; + iy = (*(int *) &y) & 0x7fffffff; + if (ix >= 0x7f800000) { + if (ix == 0x7f800000) + *(int *) &w = x == y ? iy : ix; /* w = |x| = inf */ + else if (iy == 0x7f800000) + *(int *) &w = x == y ? ix : iy; /* w = |y| = inf */ + else + w = fabsf(x) * fabsf(y); /* + -> * for Cheetah */ + } else if (iy >= 0x7f800000) { + if (iy == 0x7f800000) + *(int *) &w = x == y ? ix : iy; /* w = |y| = inf */ + else + w = fabsf(x) * fabsf(y); /* + -> * for Cheetah */ + } else if (ix == 0) + *(int *) &w = iy; /* w = |y| */ + else if (iy == 0) + *(int *) &w = ix; /* w = |x| */ + else { + dx = (double) x; + dy = (double) y; + w = (float) sqrt(dx * dx + dy * dy); + } + return (w); +} diff --git a/usr/src/libm/src/R/ilogbf.c b/usr/src/libm/src/R/ilogbf.c new file mode 100644 index 0000000..3d1f77a --- /dev/null +++ b/usr/src/libm/src/R/ilogbf.c @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ilogbf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ilogbf = __ilogbf +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ + +#if defined(USE_FPSCALE) || defined(__i386) +static const float two25 = 33554432.0F; +#else +/* + * v: a non-zero subnormal |x| + */ +static int +ilogbf_subnormal(unsigned v) { + int r = -126 - 23; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +static int +raise_invalid(int v) { /* SUSv3 requires ilogbf(0,+/-Inf,NaN) raise invalid */ +#ifndef lint + if ((__xpg6 & _C99SUSv3_ilogb_0InfNaN_raises_invalid) != 0) { + static const double zero = 0.0; + volatile double dummy; + + dummy = zero / zero; + } +#endif + return (v); +} + +int +ilogbf(float x) { + int k = *((int *) &x) & ~0x80000000; + + if (k < 0x00800000) { + if (k == 0) + return (raise_invalid(0x80000001)); + else { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two25; + return (((*((int *) &x) & 0x7f800000) >> 23) - 152); +#else + return (ilogbf_subnormal(k)); +#endif + } + } else if (k < 0x7f800000) + return ((k >> 23) - 127); + else + return (raise_invalid(0x7fffffff)); +} diff --git a/usr/src/libm/src/R/isnanf.c b/usr/src/libm/src/R/isnanf.c new file mode 100644 index 0000000..724fb5b --- /dev/null +++ b/usr/src/libm/src/R/isnanf.c @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)isnanf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak isnanf = __isnanf +#pragma weak _isnanf = __isnanf +#endif + +#include "libm.h" + +int +isnanf(float x) { + return ((*(int *) &x & ~0x80000000) > 0x7f800000); +} diff --git a/usr/src/libm/src/R/lgammaf.c b/usr/src/libm/src/R/lgammaf.c new file mode 100644 index 0000000..399b90d --- /dev/null +++ b/usr/src/libm/src/R/lgammaf.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgammaf.c 1.11 06/01/23 SMI" + +#pragma weak lgammaf = __lgammaf + +#include "libm.h" + +extern int signgamf; + +float +lgammaf(float x) { + float y; + + if (isnanf(x)) + return (x * x); + y = (float)__k_lgamma((double)x, &signgamf); + signgam = signgamf; /* SUSv3 requires the setting of signgam */ + return (y); +} diff --git a/usr/src/libm/src/R/lgammaf_r.c b/usr/src/libm/src/R/lgammaf_r.c new file mode 100644 index 0000000..508aaec --- /dev/null +++ b/usr/src/libm/src/R/lgammaf_r.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lgammaf_r.c 1.6 06/01/23 SMI" + +#pragma weak lgammaf_r = __lgammaf_r + +#include "libm.h" + +float +lgammaf_r(float x, int *signgamfp) { + if (isnanf(x)) + return (x * x); + return ((float)__k_lgamma((double)x, signgamfp)); +} diff --git a/usr/src/libm/src/R/log10f.c b/usr/src/libm/src/R/log10f.c new file mode 100644 index 0000000..e693442 --- /dev/null +++ b/usr/src/libm/src/R/log10f.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log10f.c 1.11 06/01/23 SMI" + +#pragma weak log10f = __log10f + +#include "libm.h" + +static const float zero = 0.0f, mone = -1.0f; + +float +log10f(float x) { + int hx, ix; + float w; + + hx = *(int *)&x; + ix = hx & ~0x80000000; + if (ix > 0x7f800000) + return (x * x); + if (ix == 0x7f800000) + return (x + x * x); + if (ix == 0) { + w = mone; + return (w / zero); + } + if (hx < 0) { + w = zero; + return (w / zero); + } + return ((float)log10((double)x)); +} diff --git a/usr/src/libm/src/R/log1pf.c b/usr/src/libm/src/R/log1pf.c new file mode 100644 index 0000000..a99e76e --- /dev/null +++ b/usr/src/libm/src/R/log1pf.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log1pf.c 1.11 06/01/23 SMI" + +#pragma weak log1pf = __log1pf + +#include "libm.h" + +static const float zero = 0.0f; + +float +log1pf(float x) { + int ix; + + ix = *(int *)&x; + if (ix >= 0x7f800000) { + /* x is +inf or nan */ + return (x * x); + } + if (ix < 0) { + ix &= ~0x80000000; + if (ix == 0x3f800000) /* x is -1 */ + return (x / zero); + if (ix > 0x3f800000) /* x is < -1 or nan */ + return ((x * zero) / zero); + } + return ((float)log1p((double)x)); +} diff --git a/usr/src/libm/src/R/log2f.c b/usr/src/libm/src/R/log2f.c new file mode 100644 index 0000000..74e4546 --- /dev/null +++ b/usr/src/libm/src/R/log2f.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)log2f.c 1.8 06/01/31 SMI" + +#pragma weak log2f = __log2f + +#include "libm.h" + +float +log2f(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) log2((double) x)); +} diff --git a/usr/src/libm/src/R/logbf.c b/usr/src/libm/src/R/logbf.c new file mode 100644 index 0000000..f40c999 --- /dev/null +++ b/usr/src/libm/src/R/logbf.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)logbf.c 1.12 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak logbf = __logbf +#endif + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_logb _C99SUSv3_logb_subnormal_is_like_ilogb + +#if defined(USE_FPSCALE) || defined(__i386) +static const float two25 = 33554432.0F; +#else +/* + * v: a non-zero subnormal |x| + */ +static int +ilogbf_subnormal(unsigned v) { + int r = -126 - 23; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +static float +raise_division(float t) { +#pragma STDC FENV_ACCESS ON + static const float zero = 0.0F; + return (t / zero); +} + +float +logbf(float x) { + int k = *((int *) &x) & ~0x80000000; + + if (k < 0x00800000) { + if (k == 0) + return (raise_division(-1.0F)); + else if ((__xpg6 & _C99SUSv3_logb) != 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two25; + return ((float) (((*((int *) &x) & 0x7f800000) >> 23) - + 152)); +#else + return ((float) ilogbf_subnormal(k)); +#endif + } else + return (-126.F); + } else if (k < 0x7f800000) + return ((float) ((k >> 23) - 127)); + else + return (x * x); +} diff --git a/usr/src/libm/src/R/logf.c b/usr/src/libm/src/R/logf.c new file mode 100644 index 0000000..08bb034 --- /dev/null +++ b/usr/src/libm/src/R/logf.c @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)logf.c 1.14 06/01/23 SMI" + +#pragma weak logf = __logf + +/* + * Algorithm: + * + * Let y = x rounded to six significant bits. Then for any choice + * of e and z such that y = 2^e z, we have + * + * log(x) = e log(2) + log(z) + log(1+(x-y)/y) + * + * Note that (x-y)/y = (x'-y')/y' for any scaled x' = sx, y' = sy; + * in particular, we can take s to be the power of two that makes + * ulp(x') = 1. + * + * From a table, obtain l = log(z) and r = 1/y'. For |s| <= 2^-6, + * approximate log(1+s) by a polynomial p(s) where p(s) := s+s*s* + * (K1+s*(K2+s*K3)). Then we compute the expression above as + * e*ln2 + l + p(r*(x'-y')) all evaluated in double precision. + * + * When x is subnormal, we first scale it to the normal range, + * adjusting e accordingly. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include "libm.h" + +/* + * For i = 0, ..., 12, + * TBL[2i] = log(1 + i/32) and TBL[2i+1] = 2^-23 / (1 + i/32) + * + * For i = 13, ..., 32, + * TBL[2i] = log(1/2 + i/64) and TBL[2i+1] = 2^-23 / (1 + i/32) + */ +static const double TBL[] = { + 0.000000000000000000e+00, 1.192092895507812500e-07, + 3.077165866675368733e-02, 1.155968868371212153e-07, + 6.062462181643483994e-02, 1.121969784007352926e-07, + 8.961215868968713805e-02, 1.089913504464285680e-07, + 1.177830356563834557e-01, 1.059638129340277719e-07, + 1.451820098444978890e-01, 1.030999260979729787e-07, + 1.718502569266592284e-01, 1.003867701480263102e-07, + 1.978257433299198675e-01, 9.781275040064102225e-08, + 2.231435513142097649e-01, 9.536743164062500529e-08, + 2.478361639045812692e-01, 9.304139672256097884e-08, + 2.719337154836417580e-01, 9.082612537202380448e-08, + 2.954642128938358980e-01, 8.871388989825581272e-08, + 3.184537311185345887e-01, 8.669766512784091150e-08, + -3.522205935893520934e-01, 8.477105034722222546e-08, + -3.302416868705768671e-01, 8.292820142663043248e-08, + -3.087354816496132859e-01, 8.116377160904255122e-08, + -2.876820724517809014e-01, 7.947285970052082892e-08, + -2.670627852490452536e-01, 7.785096460459183052e-08, + -2.468600779315257843e-01, 7.629394531250000159e-08, + -2.270574506353460753e-01, 7.479798560049019504e-08, + -2.076393647782444896e-01, 7.335956280048077330e-08, + -1.885911698075500298e-01, 7.197542010613207272e-08, + -1.698990367953974734e-01, 7.064254195601851460e-08, + -1.515498981272009327e-01, 6.935813210227272390e-08, + -1.335313926245226268e-01, 6.811959402901785336e-08, + -1.158318155251217008e-01, 6.692451343201754014e-08, + -9.844007281325252434e-02, 6.577064251077586116e-08, + -8.134563945395240081e-02, 6.465588585805084723e-08, + -6.453852113757117814e-02, 6.357828776041666578e-08, + -4.800921918636060631e-02, 6.253602074795082293e-08, + -3.174869831458029812e-02, 6.152737525201612732e-08, + -1.574835696813916761e-02, 6.055075024801586965e-08, + 0.000000000000000000e+00, 5.960464477539062500e-08, +}; + +static const double C[] = { + 6.931471805599452862e-01, + -2.49887584306188944706e-01, + 3.33368809981254554946e-01, + -5.00000008402474976565e-01 +}; + +#define ln2 C[0] +#define K3 C[1] +#define K2 C[2] +#define K1 C[3] + +float +logf(float x) +{ + double v, t; + float f; + int hx, ix, i, exp, iy; + + hx = *(int *)&x; + ix = hx & ~0x80000000; + + if (ix >= 0x7f800000) /* nan or inf */ + return ((hx < 0)? x * 0.0f : x * x); + + exp = 0; + if (hx < 0x00800000) { /* negative, zero, or subnormal */ + if (hx <= 0) { + f = 0.0f; + return ((ix == 0)? -1.0f / f : f / f); + } + + /* subnormal; scale by 2^149 */ + f = (float)ix; + ix = *(int *)&f; + exp = -149; + } + + exp += (ix - 0x3f320000) >> 23; + ix &= 0x007fffff; + iy = (ix + 0x20000) & 0xfffc0000; + i = iy >> 17; + t = ln2 * (double)exp + TBL[i]; + v = (double)(ix - iy) * TBL[i + 1]; + v += (v * v) * (K1 + v * (K2 + v * K3)); + f = (float)(t + v); + return (f); +} diff --git a/usr/src/libm/src/R/nextafterf.c b/usr/src/libm/src/R/nextafterf.c new file mode 100644 index 0000000..7089273 --- /dev/null +++ b/usr/src/libm/src/R/nextafterf.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nextafterf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nextafterf = __nextafterf +#endif + +#include "libm.h" + +float +nextafterf(float x, float y) { + float w; + int *pw = (int *) &w; + int *px = (int *) &x; + int *py = (int *) &y; + int ix, iy, iz; + + ix = px[0]; + iy = py[0]; + if ((ix & ~0x80000000) > 0x7f800000) + return (x * y); /* + -> * for Cheetah */ + if ((iy & ~0x80000000) > 0x7f800000) + return (y * x); /* + -> * for Cheetah */ + if (ix == iy || (ix | iy) == 0x80000000) + return (y); /* C99 requirement */ + if ((ix & ~0x80000000) == 0) + iz = 1 | (iy & 0x80000000); + else if (ix > 0) { + if (ix > iy) + iz = ix - 1; + else + iz = ix + 1; + } else { + if (iy < 0 && ix < iy) + iz = ix + 1; + else + iz = ix - 1; + } + pw[0] = iz; + ix = iz & 0x7f800000; + if (ix == 0x7f800000) { + /* raise overflow */ + volatile float t; + + *(int *) &t = 0x7f7fffff; + t *= t; + } else if (ix == 0) { + /* raise underflow */ + volatile float t; + + *(int *) &t = 0x00800000; + t *= t; + } + return (w); +} diff --git a/usr/src/libm/src/R/powf.c b/usr/src/libm/src/R/powf.c new file mode 100644 index 0000000..6bc8498 --- /dev/null +++ b/usr/src/libm/src/R/powf.c @@ -0,0 +1,287 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)powf.c 1.19 06/01/23 SMI" + +#pragma weak powf = __powf + +#include "libm.h" +#include "xpg6.h" /* __xpg6 */ +#define _C99SUSv3_pow _C99SUSv3_pow_treats_Inf_as_an_even_int + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +/* INDENT OFF */ +static const double + ln2 = 6.93147180559945286227e-01, /* 0x3fe62e42, 0xfefa39ef */ + invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ + dtwo = 2.0, + done = 1.0, + dhalf = 0.5, + d32 = 32.0, + d1_32 = 0.03125, + A0 = 1.999999999813723303647511146995966439250e+0000, + A1 = 6.666910817935858533770138657139665608610e-0001, + t0 = 2.000000000004777489262405315073203746943e+0000, + t1 = 1.666663408349926379873111932994250726307e-0001; + +static const double S[] = { + 1.00000000000000000000e+00, /* 3FF0000000000000 */ + 1.02189714865411662714e+00, /* 3FF059B0D3158574 */ + 1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ + 1.06714040067682369717e+00, /* 3FF11301D0125B51 */ + 1.09050773266525768967e+00, /* 3FF172B83C7D517B */ + 1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ + 1.13878863475669156458e+00, /* 3FF2387A6E756238 */ + 1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ + 1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ + 1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ + 1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ + 1.26905095719173321989e+00, /* 3FF44E086061892D */ + 1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ + 1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ + 1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ + 1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ + 1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ + 1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ + 1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ + 1.50916442759342284141e+00, /* 3FF82589994CCE13 */ + 1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ + 1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ + 1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ + 1.64575547815396494578e+00, /* 3FFA5503B23E255D */ + 1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ + 1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ + 1.75625216037329945351e+00, /* 3FFC199BDD85529C */ + 1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ + 1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ + 1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ + 1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ + 1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; + +static const double TBL[] = { + 0.00000000000000000e+00, + 3.07716586667536873e-02, + 6.06246218164348399e-02, + 8.96121586896871380e-02, + 1.17783035656383456e-01, + 1.45182009844497889e-01, + 1.71850256926659228e-01, + 1.97825743329919868e-01, + 2.23143551314209765e-01, + 2.47836163904581269e-01, + 2.71933715483641758e-01, + 2.95464212893835898e-01, + 3.18453731118534589e-01, + 3.40926586970593193e-01, + 3.62905493689368475e-01, + 3.84411698910332056e-01, + 4.05465108108164385e-01, + 4.26084395310900088e-01, + 4.46287102628419530e-01, + 4.66089729924599239e-01, + 4.85507815781700824e-01, + 5.04556010752395312e-01, + 5.23248143764547868e-01, + 5.41597282432744409e-01, + 5.59615787935422659e-01, + 5.77315365034823613e-01, + 5.94707107746692776e-01, + 6.11801541105992941e-01, + 6.28608659422374094e-01, + 6.45137961373584701e-01, + 6.61398482245365016e-01, + 6.77398823591806143e-01, +}; + +static const float zero = 0.0F, one = 1.0F, huge = 1.0e25f, tiny = 1.0e-25f; +/* INDENT ON */ + +float +powf(float x, float y) { + float fx = x, fy = y; + float fz; + int ix, iy, jx, jy, k, iw, yisint; + + ix = *(int *)&x; + iy = *(int *)&y; + jx = ix & ~0x80000000; + jy = iy & ~0x80000000; + + if (jy == 0) + return (one); /* x**+-0 = 1 */ + else if (ix == 0x3f800000 && (__xpg6 & _C99SUSv3_pow) != 0) + return (one); /* C99: 1**anything = 1 */ + else if (((0x7f800000 - jx) | (0x7f800000 - jy)) < 0) + return (fx * fy); /* at least one of x or y is NaN */ + /* includes Sun: 1**NaN = NaN */ + /* INDENT OFF */ + /* + * determine if y is an odd int + * yisint = 0 ... y is not an integer + * yisint = 1 ... y is an odd int + * yisint = 2 ... y is an even int + */ + /* INDENT ON */ + yisint = 0; + if (ix < 0) { + if (jy >= 0x4b800000) { + yisint = 2; /* |y|>=2**24: y must be even */ + } else if (jy >= 0x3f800000) { + k = (jy >> 23) - 0x7f; /* exponent */ + iw = jy >> (23 - k); + if ((iw << (23 - k)) == jy) + yisint = 2 - (iw & 1); + } + } + + /* special value of y */ + if ((jy & ~0x7f800000) == 0) { + if (jy == 0x7f800000) { /* y is +-inf */ + if (jx == 0x3f800000) { + if ((__xpg6 & _C99SUSv3_pow) != 0) + fz = one; + /* C99: (-1)**+-inf is 1 */ + else + fz = fy - fy; + /* Sun: (+-1)**+-inf = NaN */ + } else if (jx > 0x3f800000) { + /* (|x|>1)**+,-inf = inf,0 */ + if (iy > 0) + fz = fy; + else + fz = zero; + } else { /* (|x|<1)**-,+inf = inf,0 */ + if (iy < 0) + fz = -fy; + else + fz = zero; + } + return (fz); + } else if (jy == 0x3f800000) { /* y is +-1 */ + if (iy < 0) + fx = one / fx; /* y is -1 */ + return (fx); + } else if (iy == 0x40000000) { /* y is 2 */ + return (fx * fx); + } else if (iy == 0x3f000000) { /* y is 0.5 */ + if (jx != 0 && jx != 0x7f800000) + return (sqrtf(x)); + } + } + + /* special value of x */ + if ((jx & ~0x7f800000) == 0) { + if (jx == 0x7f800000 || jx == 0 || jx == 0x3f800000) { + /* x is +-0,+-inf,-1; set fz = |x|**y */ + *(int *)&fz = jx; + if (iy < 0) + fz = one / fz; + if (ix < 0) { + if (jx == 0x3f800000 && yisint == 0) { + /* (-1)**non-int is NaN */ + fz = zero; + fz /= fz; + } else if (yisint == 1) { + /* (x<0)**odd = -(|x|**odd) */ + fz = -fz; + } + } + return (fz); + } + } + + /* (x<0)**(non-int) is NaN */ + if (ix < 0 && yisint == 0) { + fz = zero; + return (fz / fz); + } + + /* + * compute exp(y*log(|x|)) + * fx = *(float *) &jx; + * fz = (float) exp(((double) fy) * log((double) fx)); + */ + { + double dx, dy, dz, ds; + int *px = (int *)&dx, *pz = (int *)&dz, i, n, m; +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + + fx = *(float *)&jx; + dx = (double)fx; + + /* compute log(x)/ln2 */ + i = px[HIWORD] + 0x4000; + n = (i >> 20) - 0x3ff; + pz[HIWORD] = i & 0xffff8000; + pz[LOWORD] = 0; + ds = (dx - dz) / (dx + dz); + i = (i >> 15) & 0x1f; + dz = ds * ds; + dy = invln2 * (TBL[i] + ds * (A0 + dz * A1)); + if (n == 0) + dz = (double)fy * dy; + else + dz = (double)fy * (dy + (double)n); + + /* compute exp2(dz=y*ln(x)) */ + i = pz[HIWORD]; + if ((i & ~0x80000000) >= 0x40640000) { /* |z| >= 160.0 */ + fz = (i > 0)? huge : tiny; + if (ix < 0 && yisint == 1) + fz *= -fz; /* (-ve)**(odd int) */ + else + fz *= fz; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + return (fz); + } + + n = (int)(d32 * dz + (i > 0 ? dhalf : -dhalf)); + i = n & 0x1f; + m = n >> 5; + dy = ln2 * (dz - d1_32 * (double)n); + dx = S[i] * (done - (dtwo * dy) / (dy * (done - dy * t1) - t0)); + if (m != 0) + px[HIWORD] += m << 20; + fz = (float)dx; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + + /* end of computing exp(y*log(x)) */ + if (ix < 0 && yisint == 1) + fz = -fz; /* (-ve)**(odd int) */ + return (fz); +} diff --git a/usr/src/libm/src/R/remainderf.c b/usr/src/libm/src/R/remainderf.c new file mode 100644 index 0000000..4c5d743 --- /dev/null +++ b/usr/src/libm/src/R/remainderf.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remainderf.c 1.12 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak remainderf = __remainderf +#endif + +#include "libm.h" + +float +remainderf(float x, float y) { + if (isnanf(x) || isnanf(y)) + return (x * y); + if (y == 0.0f || (*(int *) &x & ~0x80000000) == 0x7f800000) { + /* y is 0 or x is infinite; raise invalid and return NaN */ + y = 0.0f; + *(int *) &x = 0x7f800000; + return (x * y); + } + return ((float) remainder((double) x, (double) y)); +} diff --git a/usr/src/libm/src/R/rintf.c b/usr/src/libm/src/R/rintf.c new file mode 100644 index 0000000..945228a --- /dev/null +++ b/usr/src/libm/src/R/rintf.c @@ -0,0 +1,165 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)rintf.c 1.10 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak aintf = __aintf +#pragma weak anintf = __anintf +#pragma weak irintf = __irintf +#pragma weak nintf = __nintf +#pragma weak rintf = __rintf +#endif + +/* INDENT OFF */ +/* + * aintf(x) return x chopped to integral value + * anintf(x) return sign(x)*(|x|+0.5) chopped to integral value + * irintf(x) return rint(x) in integer format + * nintf(x) return anint(x) in integer format + * rintf(x) return x rounded to integral according to the rounding direction + * + * NOTE: rintf(x), aintf(x) and anintf(x) return results with the same sign as + * x's, including 0.0. + */ + +#include "libm.h" + +static const float xf[] = { +/* ZEROF */ 0.0f, +/* TWO_23F */ 8.3886080000e6f, +/* MTWO_23F */ -8.3886080000e6f, +/* ONEF */ 1.0f, +/* MONEF */ -1.0f, +/* HALFF */ 0.5f, +/* MHALFF */ -0.5f, +/* HUGEF */ 1.0e30f, +}; + +#define ZEROF xf[0] +#define TWO_23F xf[1] +#define MTWO_23F xf[2] +#define ONEF xf[3] +#define MONEF xf[4] +#define HALFF xf[5] +#define MHALFF xf[6] +#define HUGEF xf[7] +/* INDENT ON */ + +float +aintf(float x) { + int hx, k; + float y; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; + if (k < 150) { + y = (float) ((int) x); + /* + * make sure y has the same sign of x when |x|<0.5 + * (i.e., y=0.0) + */ + return (((k - 127) & hx) < 0 ? -y : y); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} + +float +anintf(float x) { + volatile float dummy; + int hx, k, j, ix; + + hx = *(int *) &x; + ix = hx & ~0x80000000; + k = ix >> 23; + if (((k - 127) ^ (k - 150)) < 0) { + j = 1 << (149 - k); + k = j + j - 1; + if ((k & hx) != 0) + dummy = HUGEF + x; /* raise inexact */ + *(int *) &x = (hx + j) & ~k; + return (x); + } else if (k <= 126) { + dummy = HUGEF + x; + *(int *) &x = (0x3f800000 & ((125 - k) >> 31)) | + (0x80000000 & hx); + return (x); + } else + /* signal invalid if x is a SNaN */ + return (x * ONEF); /* +0 -> *1 for Cheetah */ +} + +int +irintf(float x) { + float v; + int hx, k; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; + v = xf[((k - 150) >> 31) & (1 - (hx >> 31))]; + return ((int) ((float) (x + v) - v)); +} + +int +nintf(float x) { + int hx, ix, k, j, m; + volatile float dummy; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; + if (((k - 126) ^ (k - 150)) < 0) { + ix = (hx & 0x00ffffff) | 0x800000; + m = 149 - k; + j = 1 << m; + if ((ix & (j + j - 1)) != 0) + dummy = HUGEF + x; + hx = hx >> 31; + return ((((ix + j) >> (m + 1)) ^ hx) - hx); + } else + return ((int) x); +} + +float +rintf(float x) { + float w, v; + int hx, k; + + hx = *(int *) &x; + k = (hx & ~0x80000000) >> 23; +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (k >= 150) + return (x * ONEF); + v = xf[1 - (hx >> 31)]; +#else + v = xf[((k - 150) >> 31) & (1 - (hx >> 31))]; +#endif + w = (float) (x + v); + if (k < 127 && w == v) + return (ZEROF * x); + else + return (w - v); +} diff --git a/usr/src/libm/src/R/scalbf.c b/usr/src/libm/src/R/scalbf.c new file mode 100644 index 0000000..7c0e2ba --- /dev/null +++ b/usr/src/libm/src/R/scalbf.c @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbf.c 1.8 06/01/23 SMI" + +#pragma weak scalbf = __scalbf + +#include "libm.h" + +float +scalbf(float x, float y) { + int ix, iy, hx, hy, n; + + ix = *(int *)&x; + iy = *(int *)&y; + hx = ix & ~0x80000000; + hy = iy & ~0x80000000; + + if (hx > 0x7f800000 || hy >= 0x7f800000) { + /* x is nan or y is inf or nan */ + return ((iy < 0)? x / -y : x * y); + } + + /* see if y is an integer without raising inexact */ + if (hy >= 0x4b000000) { + /* |y| >= 2^23, so it must be an integer */ + n = (iy < 0)? -65000 : 65000; + } else if (hy < 0x3f800000) { + /* |y| < 1, so it must be zero or non-integer */ + return ((hy == 0)? x : (x - x) / (x - x)); + } else { + if (hy & ((1 << (0x96 - (hy >> 23))) - 1)) + return ((y - y) / (y - y)); + n = (int)y; + } + return (scalbnf(x, n)); +} diff --git a/usr/src/libm/src/R/scalbnf.c b/usr/src/libm/src/R/scalbnf.c new file mode 100644 index 0000000..c55b78b --- /dev/null +++ b/usr/src/libm/src/R/scalbnf.c @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbnf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalbnf = __scalbnf +#endif + +#include "libm.h" +#include /* FLT_MAX, FLT_MIN */ +#include /* abs */ + +static const float twom25f = 2.98023223876953125e-8F; +#if defined(USE_FPSCALE) || defined(__i386) +static const float two23f = 8388608.0F; +#else +/* + * v: a non-zero subnormal |x|; returns [-22, 0] + */ +static int +ilogbf_biased(unsigned v) { + int r = -22; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +float +scalbnf(float x, int n) { + int *px = (int *) &x, ix, k; + + ix = *px & ~0x80000000; + k = ix >> 23; + if (k == 0xff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + if (ix == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two23f; + k = ((*px & ~0x80000000) >> 23) - 23; +#else + k = ilogbf_biased(ix); + *px = (*px & 0x80000000) | (ix << (-k + 1)); +#endif + } + if ((unsigned) abs(n) >= 131072) /* cast to unsigned for -2^31 */ + n >>= 1; /* avoid subsequent integer overflow */ + k += n; + if (k > 0xfe) + return (FLT_MAX * copysignf(FLT_MAX, x)); + if (k <= -25) + return (FLT_MIN * copysignf(FLT_MIN, x)); + if (k > 0) { + *px = (*px & ~0x7f800000) | (k << 23); + return (x); + } + k += 25; + *px = (*px & ~0x7f800000) | (k << 23); + return (x * twom25f); +} diff --git a/usr/src/libm/src/R/signgamf.c b/usr/src/libm/src/R/signgamf.c new file mode 100644 index 0000000..9002af4 --- /dev/null +++ b/usr/src/libm/src/R/signgamf.c @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)signgamf.c 1.5 06/01/31 SMI" + +#pragma weak signgamf = __signgamf + +#include "libm.h" + +int signgamf = 0; diff --git a/usr/src/libm/src/R/significandf.c b/usr/src/libm/src/R/significandf.c new file mode 100644 index 0000000..a36948b --- /dev/null +++ b/usr/src/libm/src/R/significandf.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)significandf.c 1.10 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak significandf = __significandf +#endif + +#include "libm.h" + +float +significandf(float x) { + int ix = *(int *) &x & ~0x80000000; + + if (ix == 0 || ix >= 0x7f800000) /* 0/+-Inf/NaN */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + else + return (scalbnf(x, -ilogbf(x))); +} diff --git a/usr/src/libm/src/R/sincosf.c b/usr/src/libm/src/R/sincosf.c new file mode 100644 index 0000000..b5ba350 --- /dev/null +++ b/usr/src/libm/src/R/sincosf.c @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincosf.c 1.9 06/01/23 SMI" + +#pragma weak sincosf = __sincosf + +/* INDENT OFF */ +/* + * For |x| < pi/4, let z = x * x, and approximate sin(x) by + * + * S(x) = x(S0 + S1*z)(S2 + S3*z + z*z) + * where + * S0 = 1.85735322054308378716204874632872525989806770558e-0003, + * S1 = -1.95035094218403635082921458859320791358115801259e-0004, + * S2 = 5.38400550766074785970952495168558701485841707252e+0002, + * S3 = -3.31975110777873728964197739157371509422022905947e+0001, + * + * with error bounded by |(sin(x) - S(x))/x| < 2**(-28.2), and + * cos(x) by + * + * C(x) = (C0 + C1*z + C2*z*z) * (C3 + C4*z + z*z) + * where + * C0 = 1.09349482127188401868272000389539985058873853699e-0003 + * C1 = -5.03324285989964979398034700054920226866107675091e-0004 + * C2 = 2.43792880266971107750418061559602239831538067410e-0005 + * C3 = 9.14499072605666582228127405245558035523741471271e+0002 + * C4 = -3.63151270591815439197122504991683846785293207730e+0001 + * + * with error bounded by |cos(x) - C(x)| < 2**(-34.2). + */ +/* INDENT ON */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.85735322054308378716204874632872525989806770558e-0003, + -1.95035094218403635082921458859320791358115801259e-0004, + 5.38400550766074785970952495168558701485841707252e+0002, + -3.31975110777873728964197739157371509422022905947e+0001, + 1.09349482127188401868272000389539985058873853699e-0003, + -5.03324285989964979398034700054920226866107675091e-0004, + 2.43792880266971107750418061559602239831538067410e-0005, + 9.14499072605666582228127405245558035523741471271e+0002, + -3.63151270591815439197122504991683846785293207730e+0001, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define S3 C[3] +#define C0 C[4] +#define C1 C[5] +#define C2 C[6] +#define C3 C[7] +#define C4 C[8] +#define invpio2 C[9] +#define half C[10] +#define pio2_1 C[11] +#define pio2_t C[12] + +void +sincosf(float x, float *s, float *c) +{ + double y, z, w; + float f, g; + int n, ix, hx, hy; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix <= 0x39800000) { /* |x| <= 2**-12 */ + volatile int i = (int)y; +#ifdef lint + i = i; +#endif + *s = x; + *c = 1.0f; + return; + } + z = y * y; + *s = (float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z))); + *c = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + } else if (hx > 0) { + y = (y - pio2_1) - pio2_t; + z = y * y; + *s = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + *c = (float)-((y * (S0 + z * S1)) * + (S2 + z * (S3 + z))); + } else { + y = (y + pio2_1) + pio2_t; + z = y * y; + *s = (float)-(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + *c = (float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z))); + } + return; + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) { + *s = *c = x / x; + return; + } + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf); + if (hy < 0) { + y = -y; + n = -n; + } + } + + z = y * y; + f = (float)((y * (S0 + z * S1)) * (S2 + z * (S3 + z))); + g = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + if (n & 2) { + f = -f; + g = -g; + } + if (n & 1) { + *s = g; + *c = -f; + } else { + *s = f; + *c = g; + } +} diff --git a/usr/src/libm/src/R/sincospif.c b/usr/src/libm/src/R/sincospif.c new file mode 100644 index 0000000..e7bc021 --- /dev/null +++ b/usr/src/libm/src/R/sincospif.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sincospif.c 1.7 06/01/31 SMI" + +#pragma weak sincospif = __sincospif + +#include "libm.h" + +void +sincospif(float x, float *s, float *c) { + double ds, dc; + +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + *s = *c = x * x; + else { +#endif + sincospi((double) x, &ds, &dc); + *s = (float) ds; + *c = (float) dc; +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + } +#endif +} diff --git a/usr/src/libm/src/R/sinf.c b/usr/src/libm/src/R/sinf.c new file mode 100644 index 0000000..4885d9a --- /dev/null +++ b/usr/src/libm/src/R/sinf.c @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinf.c 1.12 06/01/23 SMI" + +#pragma weak sinf = __sinf + +/* + * See sincosf.c + */ + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.85735322054308378716204874632872525989806770558e-0003, + -1.95035094218403635082921458859320791358115801259e-0004, + 5.38400550766074785970952495168558701485841707252e+0002, + -3.31975110777873728964197739157371509422022905947e+0001, + 1.09349482127188401868272000389539985058873853699e-0003, + -5.03324285989964979398034700054920226866107675091e-0004, + 2.43792880266971107750418061559602239831538067410e-0005, + 9.14499072605666582228127405245558035523741471271e+0002, + -3.63151270591815439197122504991683846785293207730e+0001, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define S3 C[3] +#define C0 C[4] +#define C1 C[5] +#define C2 C[6] +#define C3 C[7] +#define C4 C[8] +#define invpio2 C[9] +#define half C[10] +#define pio2_1 C[11] +#define pio2_t C[12] + +float +sinf(float x) +{ + double y, z, w; + float f; + int n, ix, hx, hy; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix <= 0x39800000) { /* |x| <= 2**-12 */ + volatile int i = (int)y; +#ifdef lint + i = i; +#endif + return (x); + } + z = y * y; + return ((float)((y * (S0 + z * S1)) * + (S2 + z * (S3 + z)))); + } else if (hx > 0) { + y = (y - pio2_1) - pio2_t; + z = y * y; + return ((float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z)))); + } else { + y = (y + pio2_1) + pio2_t; + z = y * y; + return ((float)-(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z)))); + } + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) + return (x / x); /* sin(Inf or NaN) is NaN */ + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf); + if (hy < 0) { + y = -y; + n = -n; + } + } + + if (n & 1) { + /* compute cos y */ + z = y * y; + f = (float)(((C0 + z * C1) + (z * z) * C2) * + (C3 + z * (C4 + z))); + } else { + /* compute sin y */ + z = y * y; + f = (float)((y * (S0 + z * S1)) * (S2 + z * (S3 + z))); + } + + return ((n & 2)? -f : f); +} diff --git a/usr/src/libm/src/R/sinhf.c b/usr/src/libm/src/R/sinhf.c new file mode 100644 index 0000000..db8ad1c --- /dev/null +++ b/usr/src/libm/src/R/sinhf.c @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sinhf.c 1.12 06/01/23 SMI" + +#pragma weak sinhf = __sinhf + +#include "libm.h" + +float +sinhf(float x) { + double s; + float w; + int hx, ix; + + hx = *(int *)&x; + ix = hx & ~0x80000000; + if (ix >= 0x7f800000) { + /* sinhf(x) is x if x is +-Inf or NaN */ + return (x * 1.0f); + } + if (ix >= 0x43000000) /* sinhf(x) trivially overflows */ + s = (hx < 0)? -1.0e100 : 1.0e100; + else + s = sinh((double)x); + w = (float)s; + return (w); +} diff --git a/usr/src/libm/src/R/sqrtf.c b/usr/src/libm/src/R/sqrtf.c new file mode 100644 index 0000000..7c6d821 --- /dev/null +++ b/usr/src/libm/src/R/sqrtf.c @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)sqrtf.c 1.14 06/01/23 SMI" + +#pragma weak sqrtf = __sqrtf + +#include "libm.h" + +#ifdef __INLINE + +extern float __inline_sqrtf(float); + +float +sqrtf(float x) { + return (__inline_sqrtf(x)); +} + +#else /* defined(__INLINE) */ + +static const float huge = 1.0e35F, tiny = 1.0e-35F, zero = 0.0f; + +float +sqrtf(float x) { + float dz, w; + int *pw = (int *)&w; + int ix, j, r, q, m, n, s, t; + + w = x; + ix = pw[0]; + if (ix <= 0) { + /* x is <= 0 or nan */ + j = ix & 0x7fffffff; + if (j == 0) + return (w); + return ((w * zero) / zero); + } + + if ((ix & 0x7f800000) == 0x7f800000) { + /* x is +inf or nan */ + return (w * w); + } + + m = ir_ilogb_(&w); + n = -m; + w = r_scalbn_(&w, (int *)&n); + ix = (pw[0] & 0x007fffff) | 0x00800000; + n = m / 2; + if ((n + n) != m) { + ix = ix + ix; + m -= 1; + n = m / 2; + } + + /* generate sqrt(x) bit by bit */ + ix <<= 1; + q = s = 0; + r = 0x01000000; + for (j = 1; j <= 25; j++) { + t = s + r; + if (t <= ix) { + s = t + r; + ix -= t; + q += r; + } + ix <<= 1; + r >>= 1; + } + if (ix == 0) + goto done; + + /* raise inexact and determine the ambient rounding mode */ + dz = huge - tiny; + if (dz < huge) + goto done; + dz = huge + tiny; + if (dz > huge) + q += 1; + q += (q & 1); + +done: + pw[0] = (q >> 1) + 0x3f000000; + return (r_scalbn_(&w, (int *)&n)); +} + +#endif /* defined(__INLINE) */ diff --git a/usr/src/libm/src/R/tanf.c b/usr/src/libm/src/R/tanf.c new file mode 100644 index 0000000..35f9c19 --- /dev/null +++ b/usr/src/libm/src/R/tanf.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanf.c 1.10 06/01/23 SMI" + +#pragma weak tanf = __tanf + +#include "libm.h" + +extern const int _TBL_ipio2_inf[]; +extern int __rem_pio2m(double *, double *, int, int, int, const int *); +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double C[] = { + 1.0, + 4.46066928428959230679140546271810308098793029785e-0003, + 4.92165316309189027066395283327437937259674072266e+0000, + -7.11410648161473480044492134766187518835067749023e-0001, + 4.08549808374053391446523164631798863410949707031e+0000, + 2.50411070398050927821032018982805311679840087891e+0000, + 1.11492064560251158411574579076841473579406738281e+0001, + -1.50565540968422650891511693771462887525558471680e+0000, + -1.81484378878349295050043110677506774663925170898e+0000, + 3.333335997532835641297409611782510896641e-0001, + 2.999997598248363761541668282006867229939e+00, + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 0.5, + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define one C[0] +#define P0 C[1] +#define P1 C[2] +#define P2 C[3] +#define P3 C[4] +#define P4 C[5] +#define P5 C[6] +#define P6 C[7] +#define P7 C[8] +#define T0 C[9] +#define T1 C[10] +#define invpio2 C[11] +#define half C[12] +#define pio2_1 C[13] +#define pio2_t C[14] + +float +tanf(float x) +{ + double y, z, w; + float f; + int n, ix, hx, hy; + + hx = *((int *)&x); + ix = hx & 0x7fffffff; + + y = (double)x; + + if (ix <= 0x4016cbe4) { /* |x| < 3*pi/4 */ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ + if (ix < 0x3c000000) { /* |x| < 2**-7 */ + if (ix <= 0x39800000) { /* |x| < 2**-12 */ + volatile int i = (int)y; +#ifdef lint + i = i; +#endif + return (x); + } + return ((float)((y * T0) * (T1 + y * y))); + } + z = y * y; + return ((float)(((P0 * y) * (P1 + z * (P2 + z)) * + (P3 + z * (P4 + z))) * + (P5 + z * (P6 + z * (P7 + z))))); + } + if (hx > 0) + y = (y - pio2_1) - pio2_t; + else + y = (y + pio2_1) + pio2_t; + hy = ((int *)&y)[HIWORD] & ~0x80000000; + if (hy < 0x3f800000) { /* |y| < 2**-7 */ + z = (y * T0) * (T1 + y * y); + return ((float)(-one / z)); + } + z = y * y; + w = ((P0 * y) * (P1 + z * (P2 + z)) * (P3 + z * (P4 + z))) * + (P5 + z * (P6 + z * (P7 + z))); + return ((float)(-one / w)); + } + + if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ +#if defined(__i386) && !defined(__amd64) + int rp; + + rp = __swapRP(fp_extended); +#endif + w = y * invpio2; + if (hx < 0) + n = (int)(w - half); + else + n = (int)(w + half); + y = (y - n * pio2_1) - n * pio2_t; +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + if (ix >= 0x7f800000) + return (x / x); /* sin(Inf or NaN) is NaN */ + hy = ((int *)&y)[HIWORD]; + n = ((hy >> 20) & 0x7ff) - 1046; + ((int *)&w)[HIWORD] = (hy & 0xfffff) | 0x41600000; + ((int *)&w)[LOWORD] = ((int *)&y)[LOWORD]; + n = __rem_pio2m(&w, &y, n, 1, 0, _TBL_ipio2_inf); + if (hy < 0) { + y = -y; + n = -n; + } + } + + hy = ((int *)&y)[HIWORD] & ~0x80000000; + if (hy < 0x3f800000) { /* |y| < 2**-7 */ + z = (y * T0) * (T1 + y * y); + f = ((n & 1) == 0)? (float)z : (float)(-one / z); + return (f); + } + z = y * y; + w = ((P0 * y) * (P1 + z * (P2 + z)) * (P3 + z * (P4 + z))) * + (P5 + z * (P6 + z * (P7 + z))); + f = ((n & 1) == 0)? (float)w : (float)(-one / w); + return (f); +} diff --git a/usr/src/libm/src/R/tanhf.c b/usr/src/libm/src/R/tanhf.c new file mode 100644 index 0000000..83d1d14 --- /dev/null +++ b/usr/src/libm/src/R/tanhf.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tanhf.c 1.11 06/01/31 SMI" + +#pragma weak tanhf = __tanhf + +#include "libm.h" + +float +tanhf(float x) { +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + if (isnanf(x)) + return (x * x); + else +#endif + return ((float) tanh((double) x)); +} diff --git a/usr/src/libm/src/complex/cabs.c b/usr/src/libm/src/complex/cabs.c new file mode 100644 index 0000000..ae6188d --- /dev/null +++ b/usr/src/libm/src/complex/cabs.c @@ -0,0 +1,182 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cabs.c 1.3 06/01/23 SMI" + +#pragma weak cabs = __cabs + +#include "libm_synonyms.h" +#include +#include "complex_wrapper.h" + +/* + * If C were the only standard we cared about, cabs could just call + * hypot. Unfortunately, various other standards say that hypot must + * call matherr and/or set errno to ERANGE when the result overflows. + * Since cabs should do neither of these things, we have to either + * make hypot a wrapper on another internal function or duplicate + * the hypot implementation here. I've chosen to do the latter. + */ + +static const double + zero = 0.0, + onep1u = 1.00000000000000022204e+00, /* 0x3ff00000 1 = 1+2**-52 */ + twom53 = 1.11022302462515654042e-16, /* 0x3ca00000 0 = 2**-53 */ + twom768 = 6.441148769597133308e-232, /* 2^-768 */ + two768 = 1.552518092300708935e+231; /* 2^768 */ + +double +cabs(dcomplex z) +{ + double x, y, xh, yh, w, ax, ay; + int i, j, nx, ny, ix, iy, iscale = 0; + unsigned lx, ly; + + x = D_RE(z); + y = D_IM(z); + + ix = ((int *)&x)[HIWORD] & ~0x80000000; + lx = ((int *)&x)[LOWORD]; + iy = ((int *)&y)[HIWORD] & ~0x80000000; + ly = ((int *)&y)[LOWORD]; + + /* force ax = |x| ~>~ ay = |y| */ + if (iy > ix) { + ax = fabs(y); + ay = fabs(x); + i = ix; + ix = iy; + iy = i; + i = lx; + lx = ly; + ly = i; + } else { + ax = fabs(x); + ay = fabs(y); + } + nx = ix >> 20; + ny = iy >> 20; + j = nx - ny; + + if (nx >= 0x5f3) { + /* x >= 2^500 (x*x or y*y may overflow) */ + if (nx == 0x7ff) { + /* inf or NaN, signal of sNaN */ + if (((ix - 0x7ff00000) | lx) == 0) + return ((ax == ay)? ay : ax); + else if (((iy - 0x7ff00000) | ly) == 0) + return ((ay == ax)? ax : ay); + else + return (ax * ay); + } else if (j > 32) { + /* x >> y */ + if (j <= 53) + ay *= twom53; + ax += ay; + return (ax); + } + ax *= twom768; + ay *= twom768; + iscale = 2; + ix -= 768 << 20; + iy -= 768 << 20; + } else if (ny < 0x23d) { + /* y < 2^-450 (x*x or y*y may underflow) */ + if ((ix | lx) == 0) + return (ay); + if ((iy | ly) == 0) + return (ax); + if (j > 53) /* x >> y */ + return (ax + ay); + iscale = 1; + ax *= two768; + ay *= two768; + if (nx == 0) { + if (ax == zero) /* guard subnormal flush to zero */ + return (ax); + ix = ((int *)&ax)[HIWORD]; + } else { + ix += 768 << 20; + } + if (ny == 0) { + if (ay == zero) /* guard subnormal flush to zero */ + return (ax * twom768); + iy = ((int *)&ay)[HIWORD]; + } else { + iy += 768 << 20; + } + j = (ix >> 20) - (iy >> 20); + if (j > 32) { + /* x >> y */ + if (j <= 53) + ay *= twom53; + return ((ax + ay) * twom768); + } + } else if (j > 32) { + /* x >> y */ + if (j <= 53) + ay *= twom53; + return (ax + ay); + } + + /* + * Medium range ax and ay with max{|ax/ay|,|ay/ax|} bounded by 2^32. + * First check rounding mode by comparing onep1u*onep1u with onep1u + * + twom53. Make sure the computation is done at run-time. + */ + if (((lx | ly) << 5) == 0) { + ay = ay * ay; + ax += ay / (ax + sqrt(ax * ax + ay)); + } else if (onep1u * onep1u != onep1u + twom53) { + /* round-to-zero, positive, negative mode */ + /* magic formula with less than an ulp error */ + w = sqrt(ax * ax + ay * ay); + ax += ay / ((ax + w) / ay); + } else { + /* round-to-nearest mode */ + w = ax - ay; + if (w > ay) { + ((int *)&xh)[HIWORD] = ix; + ((int *)&xh)[LOWORD] = 0; + ay = ay * ay + (ax - xh) * (ax + xh); + ax = sqrt(xh * xh + ay); + } else { + ax = ax + ax; + ((int *)&xh)[HIWORD] = ix + 0x00100000; + ((int *)&xh)[LOWORD] = 0; + ((int *)&yh)[HIWORD] = iy; + ((int *)&yh)[LOWORD] = 0; + ay = w * w + ((ax - xh) * yh + (ay - yh) * ax); + ax = sqrt(xh * yh + ay); + } + } + if (iscale > 0) { + if (iscale == 1) + ax *= twom768; + else + ax *= two768; /* must generate side effect here */ + } + return (ax); +} diff --git a/usr/src/libm/src/complex/cabsf.c b/usr/src/libm/src/complex/cabsf.c new file mode 100644 index 0000000..074ecbd --- /dev/null +++ b/usr/src/libm/src/complex/cabsf.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cabsf.c 1.3 06/01/31 SMI" + +#pragma weak cabsf = __cabsf + +#include "libm.h" +#include "complex_wrapper.h" + +float +cabsf(fcomplex z) { + return (hypotf(F_RE(z), F_IM(z))); +} diff --git a/usr/src/libm/src/complex/cabsl.c b/usr/src/libm/src/complex/cabsl.c new file mode 100644 index 0000000..c64d988 --- /dev/null +++ b/usr/src/libm/src/complex/cabsl.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cabsl.c 1.3 06/01/31 SMI" + +#pragma weak cabsl = __cabsl + +#include "libm.h" +#include "complex_wrapper.h" + +long double +cabsl(ldcomplex z) { + return (hypotl(LD_RE(z), LD_IM(z))); +} diff --git a/usr/src/libm/src/complex/cacos.c b/usr/src/libm/src/complex/cacos.c new file mode 100644 index 0000000..664579e --- /dev/null +++ b/usr/src/libm/src/complex/cacos.c @@ -0,0 +1,403 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cacos.c 1.6 06/01/31 SMI" + +#pragma weak cacos = __cacos + +/* INDENT OFF */ +/* + * dcomplex cacos(dcomplex z); + * + * Alogrithm + * (based on T.E.Hull, Thomas F. Fairgrieve and Ping Tak Peter Tang's + * paper "Implementing the Complex Arcsine and Arccosine Functins Using + * Exception Handling", ACM TOMS, Vol 23, pp 299-335) + * + * The principal value of complex inverse cosine function cacos(z), + * where z = x+iy, can be defined by + * + * cacos(z) = acos(B) - i sign(y) log (A + sqrt(A*A-1)), + * + * where the log function is the natural log, and + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * A = --- / (x+1) + y + --- / (x-1) + y + * 2 \/ 2 \/ + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * B = --- / (x+1) + y - --- / (x-1) + y . + * 2 \/ 2 \/ + * + * The Branch cuts are on the real line from -inf to -1 and from 1 to inf. + * The real and imaginary parts are based on Abramowitz and Stegun + * [Handbook of Mathematic Functions, 1972]. The sign of the imaginary + * part is chosen to be the generally considered the principal value of + * this function. + * + * Notes:1. A is the average of the distances from z to the points (1,0) + * and (-1,0) in the complex z-plane, and in particular A>=1. + * 2. B is in [-1,1], and A*B = x + * + * Basic relations + * cacos(conj(z)) = conj(cacos(z)) + * cacos(-z) = pi - cacos(z) + * cacos( z) = pi/2 - casin(z) + * + * Special cases (conform to ISO/IEC 9899:1999(E)): + * cacos(+-0 + i y ) = pi/2 - i y for y is +-0, +-inf, NaN + * cacos( x + i inf) = pi/2 - i inf for all x + * cacos( x + i NaN) = NaN + i NaN with invalid for non-zero finite x + * cacos(-inf + i y ) = pi - i inf for finite +y + * cacos( inf + i y ) = 0 - i inf for finite +y + * cacos(-inf + i inf) = 3pi/4- i inf + * cacos( inf + i inf) = pi/4 - i inf + * cacos(+-inf+ i NaN) = NaN - i inf (sign of imaginary is unspecified) + * cacos(NaN + i y ) = NaN + i NaN with invalid for finite y + * cacos(NaN + i inf) = NaN - i inf + * cacos(NaN + i NaN) = NaN + i NaN + * + * Special Regions (better formula for accuracy and for avoiding spurious + * overflow or underflow) (all x and y are assumed nonnegative): + * case 1: y = 0 + * case 2: tiny y relative to x-1: y <= ulp(0.5)*|x-1| + * case 3: tiny y: y < 4 sqrt(u), where u = minimum normal number + * case 4: huge y relative to x+1: y >= (1+x)/ulp(0.5) + * case 5: huge x and y: x and y >= sqrt(M)/8, where M = maximum normal number + * case 6: tiny x: x < 4 sqrt(u) + * -------- + * case 1 & 2. y=0 or y/|x-1| is tiny. We have + * ____________ _____________ + * / 2 2 / y 2 + * / (x+-1) + y = |x+-1| / 1 + (------) + * \/ \/ |x+-1| + * + * 1 y 2 + * ~ |x+-1| ( 1 + --- (------) ) + * 2 |x+-1| + * + * 2 + * y + * = |x+-1| + --------. + * 2|x+-1| + * + * Consequently, it is not difficult to see that + * 2 + * y + * [ 1 + ------------ , if x < 1, + * [ 2(1+x)(1-x) + * [ + * [ + * [ x, if x = 1 (y = 0), + * [ + * A ~= [ 2 + * [ x * y + * [ x + ------------ ~ x, if x > 1 + * [ 2(x+1)(x-1) + * + * and hence + * ______ 2 + * / 2 y y + * A + \/ A - 1 ~ 1 + ---------------- + -----------, if x < 1, + * sqrt((x+1)(1-x)) 2(x+1)(1-x) + * + * + * ~ x + sqrt((x-1)*(x+1)), if x >= 1. + * + * 2 + * y + * [ x(1 - -----------) ~ x, if x < 1, + * [ 2(1+x)(1-x) + * B = x/A ~ [ + * [ 1, if x = 1, + * [ + * [ 2 + * [ y + * [ 1 - ------------ , if x > 1, + * [ 2(x+1)(x-1) + * Thus + * [ acos(x) - i y/sqrt((x-1)*(x+1)), if x < 1, + * [ + * cacos(x+i*y)~ [ 0 - i 0, if x = 1, + * [ + * [ y/sqrt(x*x-1) - i log(x+sqrt(x*x-1)), if x > 1. + * + * Note: y/sqrt(x*x-1) ~ y/x when x >= 2**26. + * case 3. y < 4 sqrt(u), where u = minimum normal x. + * After case 1 and 2, this will only occurs when x=1. When x=1, we have + * A = (sqrt(4+y*y)+y)/2 ~ 1 + y/2 + y^2/8 + ... + * and + * B = 1/A = 1 - y/2 + y^2/8 + ... + * Since + * cos(sqrt(y)) ~ 1 - y/2 + ... + * we have, for the real part, + * acos(B) ~ acos(1 - y/2) ~ sqrt(y) + * For the imaginary part, + * log(A+sqrt(A*A-1)) ~ log(1+y/2+sqrt(2*y/2)) + * = log(1+y/2+sqrt(y)) + * = (y/2+sqrt(y)) - (y/2+sqrt(y))^2/2 + ... + * ~ sqrt(y) - y*(sqrt(y)+y/2)/2 + * ~ sqrt(y) + * + * case 4. y >= (x+1)/ulp(0.5). In this case, A ~ y and B ~ x/y. Thus + * real part = acos(B) ~ pi/2 + * and + * imag part = log(y+sqrt(y*y-one)) + * + * case 5. Both x and y are large: x and y > sqrt(M)/8, where M = maximum x + * In this case, + * A ~ sqrt(x*x+y*y) + * B ~ x/sqrt(x*x+y*y). + * Thus + * real part = acos(B) = atan(y/x), + * imag part = log(A+sqrt(A*A-1)) ~ log(2A) + * = log(2) + 0.5*log(x*x+y*y) + * = log(2) + log(y) + 0.5*log(1+(x/y)^2) + * + * case 6. x < 4 sqrt(u). In this case, we have + * A ~ sqrt(1+y*y), B = x/sqrt(1+y*y). + * Since B is tiny, we have + * real part = acos(B) ~ pi/2 + * imag part = log(A+sqrt(A*A-1)) = log (A+sqrt(y*y)) + * = log(y+sqrt(1+y*y)) + * = 0.5*log(y^2+2ysqrt(1+y^2)+1+y^2) + * = 0.5*log(1+2y(y+sqrt(1+y^2))); + * = 0.5*log1p(2y(y+A)); + * + * cacos(z) = acos(B) - i sign(y) log (A + sqrt(A*A-1)), + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + zero = 0.0, + one = 1.0, + E = 1.11022302462515654042e-16, /* 2**-53 */ + ln2 = 6.93147180559945286227e-01, + pi = 3.1415926535897931159979634685, + pi_l = 1.224646799147353177e-16, + pi_2 = 1.570796326794896558e+00, + pi_2_l = 6.123233995736765886e-17, + pi_4 = 0.78539816339744827899949, + pi_4_l = 3.061616997868382943e-17, + pi3_4 = 2.356194490192344836998, + pi3_4_l = 9.184850993605148829195e-17, + Foursqrtu = 5.96667258496016539463e-154, /* 2**(-509) */ + Acrossover = 1.5, + Bcrossover = 0.6417, + half = 0.5; +/* INDENT ON */ + +dcomplex +cacos(dcomplex z) { + double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + unsigned lx, ly; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is 0 */ + if ((ix | lx) == 0) { + if (((iy | ly) == 0) || (iy >= 0x7ff00000)) { + D_RE(ans) = pi_2; + D_IM(ans) = -y; + return (ans); + } + } + + /* |y| is inf or NaN */ + if (iy >= 0x7ff00000) { + if (ISINF(iy, ly)) { /* cacos( x + i inf ) = pi/2 - i inf */ + D_IM(ans) = -y; + if (ix < 0x7ff00000) { + D_RE(ans) = pi_2 + pi_2_l; + } else if (ISINF(ix, lx)) { + if (hx >= 0) + D_RE(ans) = pi_4 + pi_4_l; + else + D_RE(ans) = pi3_4 + pi3_4_l; + } else { + D_RE(ans) = x; + } + } else { /* cacos( x + i NaN ) = NaN + i NaN */ + D_RE(ans) = y + x; + if (ISINF(ix, lx)) + D_IM(ans) = -fabs(x); + else + D_IM(ans) = y; + } + return (ans); + } + + x = fabs(x); + y = fabs(y); + + /* x is inf or NaN */ + if (ix >= 0x7ff00000) { /* x is inf or NaN */ + if (ISINF(ix, lx)) { /* x is INF */ + D_IM(ans) = -x; + if (iy >= 0x7ff00000) { + if (ISINF(iy, ly)) { + /* INDENT OFF */ + /* cacos(inf + i inf) = pi/4 - i inf */ + /* cacos(-inf+ i inf) =3pi/4 - i inf */ + /* INDENT ON */ + if (hx >= 0) + D_RE(ans) = pi_4 + pi_4_l; + else + D_RE(ans) = pi3_4 + pi3_4_l; + } else + /* INDENT OFF */ + /* cacos(inf + i NaN) = NaN - i inf */ + /* INDENT ON */ + D_RE(ans) = y + y; + } else + /* INDENT OFF */ + /* cacos( inf + iy ) = 0 - i inf */ + /* cacos(-inf+ iy ) = pi - i inf */ + /* INDENT ON */ + if (hx >= 0) + D_RE(ans) = zero; + else + D_RE(ans) = pi + pi_l; + } else { /* x is NaN */ + /* INDENT OFF */ + /* + * cacos(NaN + i inf) = NaN - i inf + * cacos(NaN + i y ) = NaN + i NaN + * cacos(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + D_RE(ans) = x + y; + if (iy >= 0x7ff00000) { + D_IM(ans) = -y; + } else { + D_IM(ans) = x; + } + } + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); + } + + if ((iy | ly) == 0) { /* region 1: y=0 */ + if (ix < 0x3ff00000) { /* |x| < 1 */ + D_RE(ans) = acos(x); + D_IM(ans) = zero; + } else { + D_RE(ans) = zero; + if (ix >= 0x43500000) /* |x| >= 2**54 */ + D_IM(ans) = ln2 + log(x); + else if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + sqrt((x - one) * (x + + one))); + else { + xm1 = x - one; + D_IM(ans) = log1p(xm1 + sqrt(xm1 * (x + one))); + } + } + } else if (y <= E * fabs(x - one)) { /* region 2: y < tiny*|x-1| */ + if (ix < 0x3ff00000) { /* x < 1 */ + D_RE(ans) = acos(x); + D_IM(ans) = y / sqrt((one + x) * (one - x)); + } else if (ix >= 0x43500000) { /* |x| >= 2**54 */ + D_RE(ans) = y / x; + D_IM(ans) = ln2 + log(x); + } else { + t = sqrt((x - one) * (x + one)); + D_RE(ans) = y / t; + if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + t); + else + D_IM(ans) = log1p((x - one) + t); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrt(y); + D_RE(ans) = t; + D_IM(ans) = t; + } else if (E * y - one >= x) { /* region 4 */ + D_RE(ans) = pi_2; + D_IM(ans) = ln2 + log(y); + } else if (ix >= 0x5fc00000 || iy >= 0x5fc00000) { /* x,y>2**509 */ + /* region 5: x+1 or y is very large (>= sqrt(max)/8) */ + t = x / y; + D_RE(ans) = atan(y / x); + D_IM(ans) = ln2 + log(y) + half * log1p(t * t); + } else if (x < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + D_RE(ans) = pi_2; + A = sqrt(one + y * y); + if (iy >= 0x3ff80000) /* if y > Acrossover */ + D_IM(ans) = log(y + A); + else + D_IM(ans) = half * log1p((y + y) * (y + A)); + } else { /* safe region */ + y2 = y * y; + xp1 = x + one; + xm1 = x - one; + R = sqrt(xp1 * xp1 + y2); + S = sqrt(xm1 * xm1 + y2); + A = half * (R + S); + B = x / A; + if (B <= Bcrossover) + D_RE(ans) = acos(B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + x; + if (x <= one) + D_RE(ans) = atan(sqrt(half * Apx * (y2 / (R + + xp1) + (S - xm1))) / x); + else + D_RE(ans) = atan((y * sqrt(half * (Apx / (R + + xp1) + Apx / (S + xm1)))) / x); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (x < one) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + D_IM(ans) = log1p(Am1 + sqrt(Am1 * (A + one))); + } else { + D_IM(ans) = log(A + sqrt(A * A - one)); + } + } + if (hx < 0) + D_RE(ans) = pi - D_RE(ans); + if (hy >= 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/cacosf.c b/usr/src/libm/src/complex/cacosf.c new file mode 100644 index 0000000..a52519c --- /dev/null +++ b/usr/src/libm/src/complex/cacosf.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cacosf.c 1.3 06/01/31 SMI" + +#pragma weak cacosf = __cacosf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +cacosf(fcomplex z) { + dcomplex dz, dans; + fcomplex ans; + + D_RE(dz) = (double) (F_RE(z)); + D_IM(dz) = (double) (F_IM(z)); + dans = cacos(dz); + F_RE(ans) = (float) (D_RE(dans)); + F_IM(ans) = (float) (D_IM(dans)); + return (ans); +} diff --git a/usr/src/libm/src/complex/cacosh.c b/usr/src/libm/src/complex/cacosh.c new file mode 100644 index 0000000..5eb6fca --- /dev/null +++ b/usr/src/libm/src/complex/cacosh.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cacosh.c 1.5 06/01/31 SMI" + +#pragma weak cacosh = __cacosh + +/* INDENT OFF */ +/* + * dcomplex cacosh(dcomplex z); + * cacosh z = +-i cacos z . + * In order to make conj(cacosh(z))=cacosh(conj(z)), + * we define + * cacosh z = sign(Im(z))*i cacos z . + * + */ +/* INDENT ON */ + +#include "libm.h" /* fabs/isnan/isinf/signbit */ +#include "complex_wrapper.h" + +/* need to work on special cases according to spec */ + +dcomplex +cacosh(dcomplex z) { + dcomplex w, ans; + double x, y; + + w = cacos(z); + x = D_RE(z); + y = D_IM(z); + if (isnan(y)) { + D_IM(ans) = y + y; + if (isinf(x)) + D_RE(ans) = fabs(x); + else + D_RE(ans) = y; + } else if (signbit(y) == 0) { + D_RE(ans) = -D_IM(w); + D_IM(ans) = D_RE(w); + } else { + D_RE(ans) = D_IM(w); + D_IM(ans) = -D_RE(w); + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cacoshf.c b/usr/src/libm/src/complex/cacoshf.c new file mode 100644 index 0000000..f371738 --- /dev/null +++ b/usr/src/libm/src/complex/cacoshf.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cacoshf.c 1.4 06/01/31 SMI" + +#pragma weak cacoshf = __cacoshf + +#include "libm.h" +#include "complex_wrapper.h" + +/* need to work on special cases according to spec */ + +fcomplex +cacoshf(fcomplex z) { + dcomplex dz, dans; + fcomplex ans; + + D_RE(dz) = (double) (F_RE(z)); + D_IM(dz) = (double) (F_IM(z)); + dans = cacosh(dz); + F_RE(ans) = (float) (D_RE(dans)); + F_IM(ans) = (float) (D_IM(dans)); + return (ans); +} diff --git a/usr/src/libm/src/complex/cacoshl.c b/usr/src/libm/src/complex/cacoshl.c new file mode 100644 index 0000000..a64020e --- /dev/null +++ b/usr/src/libm/src/complex/cacoshl.c @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cacoshl.c 1.5 06/01/31 SMI" + +#pragma weak cacoshl = __cacoshl + +#include "libm.h" /* fabsl/isnanl/isinfl/signbitl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +/* + * ldcomplex cacoshl(ldcomplex z); + * cacosh z = +-i cacos z . + * In order to make conj(cacosh(z))=cacosh(conj(z)), + * we define + * cacosh z = sign(Im(z))*i cacos z . + * + */ +/* INDENT ON */ + +ldcomplex +cacoshl(ldcomplex z) { + ldcomplex w, ans; + long double x, y; + + w = cacosl(z); + x = LD_RE(z); + y = LD_IM(z); + if (isnanl(y)) { + LD_IM(ans) = y + y; + if (isinfl(x)) + LD_RE(ans) = fabsl(x); + else + LD_RE(ans) = y; + } else if (signbitl(y) == 0) { + LD_RE(ans) = -LD_IM(w); + LD_IM(ans) = LD_RE(w); + } else { + LD_RE(ans) = LD_IM(w); + LD_IM(ans) = -LD_RE(w); + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cacosl.c b/usr/src/libm/src/complex/cacosl.c new file mode 100644 index 0000000..c89b78a --- /dev/null +++ b/usr/src/libm/src/complex/cacosl.c @@ -0,0 +1,270 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cacosl.c 1.6 06/01/31 SMI" + +#pragma weak cacosl = __cacosl + +#include "libm.h" /* acosl/atanl/fabsl/isinfl/log1pl/logl/sqrtl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double +zero = 0.0L, +one = 1.0L, +Acrossover = 1.5L, +Bcrossover = 0.6417L, +half = 0.5L, +ln2 = 6.931471805599453094172321214581765680755e-0001L, +Foursqrtu = 7.3344154702193886624856495681939326638255e-2466L, /* 2**-8189 */ +#if defined(__i386) +E = 5.4210108624275221700372640043497085571289e-20L, /* 2**-64 */ +pi = 3.141592653589793238295968524909085317631252110004425048828125L, +pi_l = 1.666748583704175665659172893706807721468195923078e-19L, +pi_2 = 1.5707963267948966191479842624545426588156260L, +pi_2_l = 8.3337429185208783282958644685340386073409796e-20L, +pi_4 = 0.78539816339744830957399213122727132940781302750110626220703125L, +pi_4_l = 4.166871459260439164147932234267019303670489807695410e-20L, +pi3_4 = 2.35619449019234492872197639368181398822343908250331878662109375L, +pi3_4_l = 1.250061437778131749244379670280105791101146942308e-19L; +#else +E = 9.6296497219361792652798897129246365926905e-35L, /* 2**-113 */ +pi = 3.1415926535897932384626433832795027974790680981372955730045043318L, +pi_l = 8.6718101301237810247970440260433519687623233462565303417759356862e-35L, +pi_2 = 1.5707963267948966192313216916397513987395340L, +pi_2_l = 4.3359050650618905123985220130216759843811616e-35L, +pi_4 = 0.785398163397448309615660845819875699369767024534323893251126L, +pi_4_l = 2.167952532530945256199261006510837992190580836564132585443e-35L, +pi3_4 = 2.35619449019234492884698253745962709810930107360297167975337824L, +pi3_4_l = 6.503857597592835768597783019532513976571742509692397756331e-35L; +#endif +/* INDENT ON */ + +#if defined(__i386) +static const int ip1 = 0x40400000; /* 2**65 */ +#else +static const int ip1 = 0x40710000; /* 2**114 */ +#endif + +ldcomplex +cacosl(ldcomplex z) { + long double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is 0 */ + if (x == zero) { + if (y == zero || (iy >= 0x7fff0000)) { + LD_RE(ans) = pi_2 + pi_2_l; + LD_IM(ans) = -y; + return (ans); + } + } + + /* |y| is inf or NaN */ + if (iy >= 0x7fff0000) { + if (isinfl(y)) { /* cacos( x + i inf ) = pi/2 - i inf */ + LD_IM(ans) = -y; + if (ix < 0x7fff0000) { + LD_RE(ans) = pi_2 + pi_2_l; + } else if (isinfl(x)) { + if (hx >= 0) + LD_RE(ans) = pi_4 + pi_4_l; + else + LD_RE(ans) = pi3_4 + pi3_4_l; + } else { + LD_RE(ans) = x; + } + } else { /* cacos( x + i NaN ) = NaN + i NaN */ + LD_RE(ans) = y + x; + if (isinfl(x)) + LD_IM(ans) = -fabsl(x); + else + LD_IM(ans) = y; + } + return (ans); + } + + y = fabsl(y); + + if (ix >= 0x7fff0000) { /* x is inf or NaN */ + if (isinfl(x)) { /* x is INF */ + LD_IM(ans) = -fabsl(x); + if (iy >= 0x7fff0000) { + if (isinfl(y)) { + /* INDENT OFF */ + /* cacos(inf + i inf) = pi/4 - i inf */ + /* cacos(-inf+ i inf) =3pi/4 - i inf */ + /* INDENT ON */ + if (hx >= 0) + LD_RE(ans) = pi_4 + pi_4_l; + else + LD_RE(ans) = pi3_4 + pi3_4_l; + } else + /* INDENT OFF */ + /* cacos(inf + i NaN) = NaN - i inf */ + /* INDENT ON */ + LD_RE(ans) = y + y; + } else { + /* INDENT OFF */ + /* cacos( inf + iy ) = 0 - i inf */ + /* cacos(-inf+ iy ) = pi - i inf */ + /* INDENT ON */ + if (hx >= 0) + LD_RE(ans) = zero; + else + LD_RE(ans) = pi + pi_l; + } + } else { /* x is NaN */ + /* INDENT OFF */ + /* + * cacos(NaN + i inf) = NaN - i inf + * cacos(NaN + i y ) = NaN + i NaN + * cacos(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + LD_RE(ans) = x + y; + if (iy >= 0x7fff0000) { + LD_IM(ans) = -y; + } else { + LD_IM(ans) = x; + } + } + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); + } + + if (y == zero) { /* region 1: y=0 */ + if (ix < 0x3fff0000) { /* |x| < 1 */ + LD_RE(ans) = acosl(x); + LD_IM(ans) = zero; + } else { + LD_RE(ans) = zero; + x = fabsl(x); + if (ix >= ip1) /* i386 ? 2**65 : 2**114 */ + LD_IM(ans) = ln2 + logl(x); + else if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + sqrtl((x - one) * (x + + one))); + else { + xm1 = x - one; + LD_IM(ans) = log1pl(xm1 + sqrtl(xm1 * (x + + one))); + } + } + } else if (y <= E * fabsl(fabsl(x) - one)) { + /* region 2: y < tiny*||x|-1| */ + if (ix < 0x3fff0000) { /* x < 1 */ + LD_RE(ans) = acosl(x); + x = fabsl(x); + LD_IM(ans) = y / sqrtl((one + x) * (one - x)); + } else if (ix >= ip1) { /* i386 ? 2**65 : 2**114 */ + if (hx >= 0) + LD_RE(ans) = y / x; + else { + if (ix >= ip1 + 0x00040000) + LD_RE(ans) = pi + pi_l; + else { + t = pi_l + y / x; + LD_RE(ans) = pi + t; + } + } + LD_IM(ans) = ln2 + logl(fabsl(x)); + } else { + x = fabsl(x); + t = sqrtl((x - one) * (x + one)); + LD_RE(ans) = (hx >= 0)? y / t : pi - (y / t - pi_l); + if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + t); + else + LD_IM(ans) = log1pl(t - (one - x)); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrtl(y); + LD_RE(ans) = (hx >= 0)? t : pi + pi_l; + LD_IM(ans) = t; + } else if (E * y - one >= fabsl(x)) { /* region 4 */ + LD_RE(ans) = pi_2 + pi_2_l; + LD_IM(ans) = ln2 + logl(y); + } else if (ix >= 0x5ffb0000 || iy >= 0x5ffb0000) { + /* region 5: x+1 and y are both (>= sqrt(max)/8) i.e. 2**8188 */ + t = x / y; + LD_RE(ans) = atan2l(y, x); + LD_IM(ans) = ln2 + logl(y) + half * log1pl(t * t); + } else if (fabsl(x) < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + LD_RE(ans) = pi_2 + pi_2_l; + A = sqrtl(one + y * y); + if (iy >= 0x3fff8000) /* if y > Acrossover */ + LD_IM(ans) = logl(y + A); + else + LD_IM(ans) = half * log1pl((y + y) * (y + A)); + } else { /* safe region */ + t = fabsl(x); + y2 = y * y; + xp1 = t + one; + xm1 = t - one; + R = sqrtl(xp1 * xp1 + y2); + S = sqrtl(xm1 * xm1 + y2); + A = half * (R + S); + B = t / A; + + if (B <= Bcrossover) + LD_RE(ans) = (hx >= 0)? acosl(B) : acosl(-B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + t; + if (t <= one) + LD_RE(ans) = atan2l(sqrtl(half * Apx * (y2 / + (R + xp1) + (S - xm1))), x); + else + LD_RE(ans) = atan2l((y * sqrtl(half * (Apx / + (R + xp1) + Apx / (S + xm1)))), x); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (ix < 0x3fff0000) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + LD_IM(ans) = log1pl(Am1 + sqrtl(Am1 * (A + one))); + } else { + LD_IM(ans) = logl(A + sqrtl(A * A - one)); + } + } + + if (hy >= 0) + LD_IM(ans) = -LD_IM(ans); + + return (ans); +} diff --git a/usr/src/libm/src/complex/carg.c b/usr/src/libm/src/complex/carg.c new file mode 100644 index 0000000..6854365 --- /dev/null +++ b/usr/src/libm/src/complex/carg.c @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)carg.c 1.3 06/01/23 SMI" + +#pragma weak carg = __carg + +#include "libm_synonyms.h" +#include /* atan2 */ +#include "complex_wrapper.h" + +static const double + pi = 3.14159265358979311600e+00, + pi_lo = 1.22464679914735320717e-16; + +double +carg(dcomplex z) { + int ix, iy; + + ix = ((int *)&(D_RE(z)))[HIWORD]; + iy = ((int *)&(D_IM(z)))[HIWORD]; + if ((((ix | iy) & ~0x80000000) | ((int *)&(D_RE(z)))[LOWORD] | + ((int *)&(D_IM(z)))[LOWORD]) == 0) { + /* x and y are both zero */ + if (ix == 0) + return (D_IM(z)); + return ((iy == 0)? pi + pi_lo : -pi - pi_lo); + } + return (atan2(D_IM(z), D_RE(z))); +} diff --git a/usr/src/libm/src/complex/cargf.c b/usr/src/libm/src/complex/cargf.c new file mode 100644 index 0000000..2d30776 --- /dev/null +++ b/usr/src/libm/src/complex/cargf.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cargf.c 1.3 06/01/31 SMI" + +#pragma weak cargf = __cargf + +#include "libm.h" /* atan2f */ +#include "complex_wrapper.h" + +float +cargf(fcomplex z) { + return (atan2f(F_IM(z), F_RE(z))); +} diff --git a/usr/src/libm/src/complex/cargl.c b/usr/src/libm/src/complex/cargl.c new file mode 100644 index 0000000..8e419ec --- /dev/null +++ b/usr/src/libm/src/complex/cargl.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cargl.c 1.3 06/01/31 SMI" + +#pragma weak cargl = __cargl + +#include "libm.h" +#include "complex_wrapper.h" + +long double +cargl(ldcomplex z) { + return (atan2l(LD_IM(z), LD_RE(z))); +} diff --git a/usr/src/libm/src/complex/casin.c b/usr/src/libm/src/complex/casin.c new file mode 100644 index 0000000..bb66acd --- /dev/null +++ b/usr/src/libm/src/complex/casin.c @@ -0,0 +1,378 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)casin.c 1.4 06/01/31 SMI" + +#pragma weak casin = __casin + +/* INDENT OFF */ +/* + * dcomplex casin(dcomplex z); + * + * Alogrithm + * (based on T.E.Hull, Thomas F. Fairgrieve and Ping Tak Peter Tang's + * paper "Implementing the Complex Arcsine and Arccosine Functins Using + * Exception Handling", ACM TOMS, Vol 23, pp 299-335) + * + * The principal value of complex inverse sine function casin(z), + * where z = x+iy, can be defined by + * + * casin(z) = asin(B) + i sign(y) log (A + sqrt(A*A-1)), + * + * where the log function is the natural log, and + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * A = --- / (x+1) + y + --- / (x-1) + y + * 2 \/ 2 \/ + * ____________ ____________ + * 1 / 2 2 1 / 2 2 + * B = --- / (x+1) + y - --- / (x-1) + y . + * 2 \/ 2 \/ + * + * The Branch cuts are on the real line from -inf to -1 and from 1 to inf. + * The real and imaginary parts are based on Abramowitz and Stegun + * [Handbook of Mathematic Functions, 1972]. The sign of the imaginary + * part is chosen to be the generally considered the principal value of + * this function. + * + * Notes:1. A is the average of the distances from z to the points (1,0) + * and (-1,0) in the complex z-plane, and in particular A>=1. + * 2. B is in [-1,1], and A*B = x. + * + * Special notes: if casin( x, y) = ( u, v), then + * casin(-x, y) = (-u, v), + * casin( x,-y) = ( u,-v), + * in general, we have casin(conj(z)) = conj(casin(z)) + * casin(-z) = -casin(z) + * casin(z) = pi/2 - cacos(z) + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * casin( 0 + i 0 ) = 0 + i 0 + * casin( 0 + i NaN ) = 0 + i NaN + * casin( x + i inf ) = 0 + i inf for finite x + * casin( x + i NaN ) = NaN + i NaN with invalid for finite x!=0 + * casin(inf + iy ) = pi/2 + i inf finite y + * casin(inf + i inf) = pi/4 + i inf + * casin(inf + i NaN) = NaN + i inf + * casin(NaN + i y ) = NaN + i NaN for finite y + * casin(NaN + i inf) = NaN + i inf + * casin(NaN + i NaN) = NaN + i NaN + * + * Special Regions (better formula for accuracy and for avoiding spurious + * overflow or underflow) (all x and y are assumed nonnegative): + * case 1: y = 0 + * case 2: tiny y relative to x-1: y <= ulp(0.5)*|x-1| + * case 3: tiny y: y < 4 sqrt(u), where u = minimum normal number + * case 4: huge y relative to x+1: y >= (1+x)/ulp(0.5) + * case 5: huge x and y: x and y >= sqrt(M)/8, where M = maximum normal number + * case 6: tiny x: x < 4 sqrt(u) + * -------- + * case 1 & 2. y=0 or y/|x-1| is tiny. We have + * ____________ _____________ + * / 2 2 / y 2 + * / (x+-1) + y = |x+-1| / 1 + (------) + * \/ \/ |x+-1| + * + * 1 y 2 + * ~ |x+-1| ( 1 + --- (------) ) + * 2 |x+-1| + * + * 2 + * y + * = |x+-1| + --------. + * 2|x+-1| + * + * Consequently, it is not difficult to see that + * 2 + * y + * [ 1 + ------------ , if x < 1, + * [ 2(1+x)(1-x) + * [ + * [ + * [ x, if x = 1 (y = 0), + * [ + * A ~= [ 2 + * [ x * y + * [ x + ------------ , if x > 1 + * [ 2(1+x)(x-1) + * + * and hence + * ______ 2 + * / 2 y y + * A + \/ A - 1 ~ 1 + ---------------- + -----------, if x < 1, + * sqrt((x+1)(1-x)) 2(x+1)(1-x) + * + * + * ~ x + sqrt((x-1)*(x+1)), if x >= 1. + * + * 2 + * y + * [ x(1 - ------------), if x < 1, + * [ 2(1+x)(1-x) + * B = x/A ~ [ + * [ 1, if x = 1, + * [ + * [ 2 + * [ y + * [ 1 - ------------ , if x > 1, + * [ 2(1+x)(1-x) + * Thus + * [ asin(x) + i y/sqrt((x-1)*(x+1)), if x < 1 + * casin(x+i*y)=[ + * [ pi/2 + i log(x+sqrt(x*x-1)), if x >= 1 + * + * case 3. y < 4 sqrt(u), where u = minimum normal x. + * After case 1 and 2, this will only occurs when x=1. When x=1, we have + * A = (sqrt(4+y*y)+y)/2 ~ 1 + y/2 + y^2/8 + ... + * and + * B = 1/A = 1 - y/2 + y^2/8 + ... + * Since + * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) + * asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... + * we have, for the real part asin(B), + * asin(1-y/2) ~ pi/2 - 2 asin(sqrt(y/4)) + * ~ pi/2 - sqrt(y) + * For the imaginary part, + * log(A+sqrt(A*A-1)) ~ log(1+y/2+sqrt(2*y/2)) + * = log(1+y/2+sqrt(y)) + * = (y/2+sqrt(y)) - (y/2+sqrt(y))^2/2 + ... + * ~ sqrt(y) - y*(sqrt(y)+y/2)/2 + * ~ sqrt(y) + * + * case 4. y >= (x+1)ulp(0.5). In this case, A ~ y and B ~ x/y. Thus + * real part = asin(B) ~ x/y (be careful, x/y may underflow) + * and + * imag part = log(y+sqrt(y*y-one)) + * + * + * case 5. Both x and y are large: x and y > sqrt(M)/8, where M = maximum x + * In this case, + * A ~ sqrt(x*x+y*y) + * B ~ x/sqrt(x*x+y*y). + * Thus + * real part = asin(B) = atan(x/y), + * imag part = log(A+sqrt(A*A-1)) ~ log(2A) + * = log(2) + 0.5*log(x*x+y*y) + * = log(2) + log(y) + 0.5*log(1+(x/y)^2) + * + * case 6. x < 4 sqrt(u). In this case, we have + * A ~ sqrt(1+y*y), B = x/sqrt(1+y*y). + * Since B is tiny, we have + * real part = asin(B) ~ B = x/sqrt(1+y*y) + * imag part = log(A+sqrt(A*A-1)) = log (A+sqrt(y*y)) + * = log(y+sqrt(1+y*y)) + * = 0.5*log(y^2+2ysqrt(1+y^2)+1+y^2) + * = 0.5*log(1+2y(y+sqrt(1+y^2))); + * = 0.5*log1p(2y(y+A)); + * + * casin(z) = asin(B) + i sign(y) log (A + sqrt(A*A-1)), + */ +/* INDENT ON */ + +#include "libm.h" /* asin/atan/fabs/log/log1p/sqrt */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + zero = 0.0, + one = 1.0, + E = 1.11022302462515654042e-16, /* 2**-53 */ + ln2 = 6.93147180559945286227e-01, + pi_2 = 1.570796326794896558e+00, + pi_2_l = 6.123233995736765886e-17, + pi_4 = 7.85398163397448278999e-01, + Foursqrtu = 5.96667258496016539463e-154, /* 2**(-509) */ + Acrossover = 1.5, + Bcrossover = 0.6417, + half = 0.5; +/* INDENT ON */ + +dcomplex +casin(dcomplex z) { + double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + unsigned lx, ly; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + /* special cases */ + + /* x is inf or NaN */ + if (ix >= 0x7ff00000) { /* x is inf or NaN */ + if (ISINF(ix, lx)) { /* x is INF */ + D_IM(ans) = x; + if (iy >= 0x7ff00000) { + if (ISINF(iy, ly)) + /* casin(inf + i inf) = pi/4 + i inf */ + D_RE(ans) = pi_4; + else /* casin(inf + i NaN) = NaN + i inf */ + D_RE(ans) = y + y; + } else /* casin(inf + iy) = pi/2 + i inf */ + D_RE(ans) = pi_2; + } else { /* x is NaN */ + if (iy >= 0x7ff00000) { + /* INDENT OFF */ + /* + * casin(NaN + i inf) = NaN + i inf + * casin(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + D_IM(ans) = y + y; + D_RE(ans) = x + x; + } else { + /* casin(NaN + i y ) = NaN + i NaN */ + D_IM(ans) = D_RE(ans) = x + y; + } + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); + } + + /* casin(+0 + i 0 ) = 0 + i 0. */ + if ((ix | lx | iy | ly) == 0) + return (z); + + if (iy >= 0x7ff00000) { /* y is inf or NaN */ + if (ISINF(iy, ly)) { /* casin( x + i inf ) = 0 + i inf */ + D_IM(ans) = y; + D_RE(ans) = zero; + } else { /* casin( x + i NaN ) = NaN + i NaN */ + D_IM(ans) = x + y; + if ((ix | lx) == 0) + D_RE(ans) = x; + else + D_RE(ans) = y; + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); + } + + if ((iy | ly) == 0) { /* region 1: y=0 */ + if (ix < 0x3ff00000) { /* |x| < 1 */ + D_RE(ans) = asin(x); + D_IM(ans) = zero; + } else { + D_RE(ans) = pi_2; + if (ix >= 0x43500000) /* |x| >= 2**54 */ + D_IM(ans) = ln2 + log(x); + else if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + sqrt((x - one) * (x + + one))); + else { + xm1 = x - one; + D_IM(ans) = log1p(xm1 + sqrt(xm1 * (x + one))); + } + } + } else if (y <= E * fabs(x - one)) { /* region 2: y < tiny*|x-1| */ + if (ix < 0x3ff00000) { /* x < 1 */ + D_RE(ans) = asin(x); + D_IM(ans) = y / sqrt((one + x) * (one - x)); + } else { + D_RE(ans) = pi_2; + if (ix >= 0x43500000) { /* |x| >= 2**54 */ + D_IM(ans) = ln2 + log(x); + } else if (ix >= 0x3ff80000) /* x > Acrossover */ + D_IM(ans) = log(x + sqrt((x - one) * (x + + one))); + else + D_IM(ans) = log1p((x - one) + sqrt((x - one) * + (x + one))); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrt(y); + D_RE(ans) = pi_2 - (t - pi_2_l); + D_IM(ans) = t; + } else if (E * y - one >= x) { /* region 4 */ + D_RE(ans) = x / y; /* need to fix underflow cases */ + D_IM(ans) = ln2 + log(y); + } else if (ix >= 0x5fc00000 || iy >= 0x5fc00000) { /* x,y>2**509 */ + /* region 5: x+1 or y is very large (>= sqrt(max)/8) */ + t = x / y; + D_RE(ans) = atan(t); + D_IM(ans) = ln2 + log(y) + half * log1p(t * t); + } else if (x < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + A = sqrt(one + y * y); + D_RE(ans) = x / A; /* may underflow */ + if (iy >= 0x3ff80000) /* if y > Acrossover */ + D_IM(ans) = log(y + A); + else + D_IM(ans) = half * log1p((y + y) * (y + A)); + } else { /* safe region */ + y2 = y * y; + xp1 = x + one; + xm1 = x - one; + R = sqrt(xp1 * xp1 + y2); + S = sqrt(xm1 * xm1 + y2); + A = half * (R + S); + B = x / A; + + if (B <= Bcrossover) + D_RE(ans) = asin(B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + x; + if (x <= one) + D_RE(ans) = atan(x / sqrt(half * Apx * (y2 / + (R + xp1) + (S - xm1)))); + else + D_RE(ans) = atan(x / (y * sqrt(half * (Apx / + (R + xp1) + Apx / (S + xm1))))); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (x < one) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + D_IM(ans) = log1p(Am1 + sqrt(Am1 * (A + one))); + } else { + D_IM(ans) = log(A + sqrt(A * A - one)); + } + } + + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + + return (ans); +} diff --git a/usr/src/libm/src/complex/casinf.c b/usr/src/libm/src/complex/casinf.c new file mode 100644 index 0000000..c3e8aca --- /dev/null +++ b/usr/src/libm/src/complex/casinf.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)casinf.c 1.3 06/01/31 SMI" + +#pragma weak casinf = __casinf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +casinf(fcomplex z) { + dcomplex dz, dans; + fcomplex ans; + + D_RE(dz) = (double) (F_RE(z)); + D_IM(dz) = (double) (F_IM(z)); + dans = casin(dz); + F_RE(ans) = (float) (D_RE(dans)); + F_IM(ans) = (float) (D_IM(dans)); + return (ans); +} diff --git a/usr/src/libm/src/complex/casinh.c b/usr/src/libm/src/complex/casinh.c new file mode 100644 index 0000000..7698223 --- /dev/null +++ b/usr/src/libm/src/complex/casinh.c @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)casinh.c 1.3 06/01/31 SMI" + +#pragma weak casinh = __casinh + +/* INDENT OFF */ +/* + * dcomplex casinh(dcomplex z); + * casinh z = -i casin iz . + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +casinh(dcomplex z) { + dcomplex w, r, ans; + + D_RE(w) = -D_IM(z); + D_IM(w) = D_RE(z); + r = casin(w); + D_RE(ans) = D_IM(r); + D_IM(ans) = -D_RE(r); + return (ans); +} diff --git a/usr/src/libm/src/complex/casinhf.c b/usr/src/libm/src/complex/casinhf.c new file mode 100644 index 0000000..6a29716 --- /dev/null +++ b/usr/src/libm/src/complex/casinhf.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)casinhf.c 1.3 06/01/31 SMI" + +#pragma weak casinhf = __casinhf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +casinhf(fcomplex z) { + fcomplex w, r, ans; + + F_RE(w) = -F_IM(z); + F_IM(w) = F_RE(z); + r = casinf(w); + F_RE(ans) = F_IM(r); + F_IM(ans) = -F_RE(r); + return (ans); +} diff --git a/usr/src/libm/src/complex/casinhl.c b/usr/src/libm/src/complex/casinhl.c new file mode 100644 index 0000000..c88a6fa --- /dev/null +++ b/usr/src/libm/src/complex/casinhl.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)casinhl.c 1.3 06/01/31 SMI" + +#pragma weak casinhl = __casinhl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +casinhl(ldcomplex z) { + ldcomplex w, r, ans; + + LD_RE(w) = -LD_IM(z); + LD_IM(w) = LD_RE(z); + r = casinl(w); + LD_RE(ans) = LD_IM(r); + LD_IM(ans) = -LD_RE(r); + return (ans); +} diff --git a/usr/src/libm/src/complex/casinl.c b/usr/src/libm/src/complex/casinl.c new file mode 100644 index 0000000..dfe44a6 --- /dev/null +++ b/usr/src/libm/src/complex/casinl.c @@ -0,0 +1,230 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)casinl.c 1.4 06/01/31 SMI" + +#pragma weak casinl = __casinl + +#include "libm.h" /* asinl/atanl/fabsl/isinfl/log1pl/logl/sqrtl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double +zero = 0.0L, +one = 1.0L, +Acrossover = 1.5L, +Bcrossover = 0.6417L, +half = 0.5L, +ln2 = 6.931471805599453094172321214581765680755e-0001L, +Foursqrtu = 7.3344154702193886624856495681939326638255e-2466L, /* 2**-8189 */ +#if defined(__i386) +E = 5.4210108624275221700372640043497085571289e-20L, /* 2**-64 */ +pi_4 = 0.7853981633974483095739921312272713294078130L, +pi_4_l = 4.1668714592604391641479322342670193036704898e-20L, +pi_2 = 1.5707963267948966191479842624545426588156260L, +pi_2_l = 8.3337429185208783282958644685340386073409796e-20L; + +#else +E = 9.6296497219361792652798897129246365926905e-35L, /* 2**-113 */ +pi_4 = 0.7853981633974483096156608458198756993697670L, +pi_4_l = 2.1679525325309452561992610065108379921905808e-35L, +pi_2 = 1.5707963267948966192313216916397513987395340L, +pi_2_l = 4.3359050650618905123985220130216759843811616e-35L; + +#endif +/* INDENT ON */ + +#if defined(__i386) +static const int ip1 = 0x40400000; /* 2**65 */ +#else +static const int ip1 = 0x40710000; /* 2**114 */ +#endif + +ldcomplex +casinl(ldcomplex z) { + long double x, y, t, R, S, A, Am1, B, y2, xm1, xp1, Apx; + int ix, iy, hx, hy; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + /* special cases */ + + /* x is inf or NaN */ + if (ix >= 0x7fff0000) { /* x is inf or NaN */ + if (isinfl(x)) { /* x is INF */ + LD_IM(ans) = x; + if (iy >= 0x7fff0000) { + if (isinfl(y)) + /* casin(inf + i inf) = pi/4 + i inf */ + LD_RE(ans) = pi_4 + pi_4_l; + else /* casin(inf + i NaN) = NaN + i inf */ + LD_RE(ans) = y + y; + } else /* casin(inf + iy) = pi/2 + i inf */ + LD_RE(ans) = pi_2 + pi_2_l; + } else { /* x is NaN */ + if (iy >= 0x7fff0000) { + /* INDENT OFF */ + /* + * casin(NaN + i inf) = NaN + i inf + * casin(NaN + i NaN) = NaN + i NaN + */ + /* INDENT ON */ + LD_IM(ans) = y + y; + LD_RE(ans) = x + x; + } else { + /* INDENT OFF */ + /* casin(NaN + i y ) = NaN + i NaN */ + /* INDENT ON */ + LD_IM(ans) = LD_RE(ans) = x + y; + } + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); + } + + /* casin(+0 + i 0) = 0 + i 0. */ + if (x == zero && y == zero) + return (z); + + if (iy >= 0x7fff0000) { /* y is inf or NaN */ + if (isinfl(y)) { /* casin( x + i inf ) = 0 + i inf */ + LD_IM(ans) = y; + LD_RE(ans) = zero; + } else { /* casin( x + i NaN ) = NaN + i NaN */ + LD_IM(ans) = x + y; + if (x == zero) + LD_RE(ans) = x; + else + LD_RE(ans) = y; + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); + } + + if (y == zero) { /* region 1: y=0 */ + if (ix < 0x3fff0000) { /* |x| < 1 */ + LD_RE(ans) = asinl(x); + LD_IM(ans) = zero; + } else { + LD_RE(ans) = pi_2 + pi_2_l; + if (ix >= ip1) /* |x| >= i386 ? 2**65 : 2**114 */ + LD_IM(ans) = ln2 + logl(x); + else if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + sqrtl((x - one) * (x + + one))); + else { + xm1 = x - one; + LD_IM(ans) = log1pl(xm1 + sqrtl(xm1 * (x + + one))); + } + } + } else if (y <= E * fabsl(x - one)) { /* region 2: y < tiny*|x-1| */ + if (ix < 0x3fff0000) { /* x < 1 */ + LD_RE(ans) = asinl(x); + LD_IM(ans) = y / sqrtl((one + x) * (one - x)); + } else { + LD_RE(ans) = pi_2 + pi_2_l; + if (ix >= ip1) /* i386 ? 2**65 : 2**114 */ + LD_IM(ans) = ln2 + logl(x); + else if (ix >= 0x3fff8000) /* x > Acrossover */ + LD_IM(ans) = logl(x + sqrtl((x - one) * (x + + one))); + else + LD_IM(ans) = log1pl((x - one) + sqrtl((x - + one) * (x + one))); + } + } else if (y < Foursqrtu) { /* region 3 */ + t = sqrtl(y); + LD_RE(ans) = pi_2 - (t - pi_2_l); + LD_IM(ans) = t; + } else if (E * y - one >= x) { /* region 4 */ + LD_RE(ans) = x / y; /* need to fix underflow cases */ + LD_IM(ans) = ln2 + logl(y); + } else if (ix >= 0x5ffb0000 || iy >= 0x5ffb0000) { + /* region 5: x+1 and y are both (>= sqrt(max)/8) i.e. 2**8188 */ + t = x / y; + LD_RE(ans) = atanl(t); + LD_IM(ans) = ln2 + logl(y) + half * log1pl(t * t); + } else if (x < Foursqrtu) { + /* region 6: x is very small, < 4sqrt(min) */ + A = sqrtl(one + y * y); + LD_RE(ans) = x / A; /* may underflow */ + if (iy >= 0x3fff8000) /* if y > Acrossover */ + LD_IM(ans) = logl(y + A); + else + LD_IM(ans) = half * log1pl((y + y) * (y + A)); + } else { /* safe region */ + y2 = y * y; + xp1 = x + one; + xm1 = x - one; + R = sqrtl(xp1 * xp1 + y2); + S = sqrtl(xm1 * xm1 + y2); + A = half * (R + S); + B = x / A; + if (B <= Bcrossover) + LD_RE(ans) = asinl(B); + else { /* use atan and an accurate approx to a-x */ + Apx = A + x; + if (x <= one) + LD_RE(ans) = atanl(x / sqrtl(half * Apx * (y2 / + (R + xp1) + (S - xm1)))); + else + LD_RE(ans) = atanl(x / (y * sqrtl(half * (Apx / + (R + xp1) + Apx / (S + xm1))))); + } + if (A <= Acrossover) { + /* use log1p and an accurate approx to A-1 */ + if (x < one) + Am1 = half * (y2 / (R + xp1) + y2 / (S - xm1)); + else + Am1 = half * (y2 / (R + xp1) + (S + xm1)); + LD_IM(ans) = log1pl(Am1 + sqrtl(Am1 * (A + one))); + } else { + LD_IM(ans) = logl(A + sqrtl(A * A - one)); + } + } + + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + + return (ans); +} diff --git a/usr/src/libm/src/complex/catan.c b/usr/src/libm/src/complex/catan.c new file mode 100644 index 0000000..fd4836f --- /dev/null +++ b/usr/src/libm/src/complex/catan.c @@ -0,0 +1,291 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)catan.c 1.3 06/01/31 SMI" + +#pragma weak catan = __catan + +/* INDENT OFF */ +/* + * dcomplex catan(dcomplex z); + * + * If + * z = x + iy, + * + * then + * 1 ( 2x ) 1 2 2 + * Re w = - arctan(-----------) = - ATAN2(2x, 1 - x - y ) + * 2 ( 2 2) 2 + * (1 - x - y ) + * + * ( 2 2) + * 1 (x + (y+1) ) 1 4y + * Im w = - log(------------) .= --- log [ 1 + ------------- ] + * 4 ( 2 2) 4 2 2 + * (x + (y-1) ) x + (y-1) + * + * 2 16 3 y + * = t - 2t + -- t - ..., where t = ----------------- + * 3 x*x + (y-1)*(y-1) + * + * Note that: if catan( x, y) = ( u, v), then + * catan(-x, y) = (-u, v) + * catan( x,-y) = ( u,-v) + * + * Also, catan(x,y) = -i*catanh(-y,x), or + * catanh(x,y) = i*catan(-y,x) + * So, if catanh(y,x) = (v,u), then catan(x,y) = -i*(-v,u) = (u,v), i.e., + * catan(x,y) = (u,v) + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * catan( 0 , 0 ) = (0 , 0 ) + * catan( NaN, 0 ) = (NaN , 0 ) + * catan( 0 , 1 ) = (0 , +inf) with divide-by-zero + * catan( inf, y ) = (pi/2 , 0 ) for finite +y + * catan( NaN, y ) = (NaN , NaN ) with invalid for finite y!=0 + * catan( x , inf ) = (pi/2 , 0 ) for finite +x + * catan( inf, inf ) = (pi/2 , 0 ) + * catan( NaN, inf ) = (NaN , 0 ) + * catan( x , NaN ) = (NaN , NaN ) with invalid for finite x + * catan( inf, NaN ) = (pi/2 , +-0 ) + */ +/* INDENT ON */ + +#include "libm.h" /* atan/atan2/fabs/log/log1p */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + pi_2 = 1.570796326794896558e+00, + zero = 0.0, + half = 0.5, + two = 2.0, + ln2 = 6.931471805599453094172321214581765680755e-0001, + one = 1.0; +/* INDENT ON */ + +dcomplex +catan(dcomplex z) { + dcomplex ans; + double x, y, ax, ay, t; + int hx, hy, ix, iy; + unsigned lx, ly; + + x = D_RE(z); + y = D_IM(z); + ax = fabs(x); + ay = fabs(y); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is inf or NaN */ + if (ix >= 0x7ff00000) { + if (ISINF(ix, lx)) { + D_RE(ans) = pi_2; + D_IM(ans) = zero; + } else { + D_RE(ans) = x + x; + if ((iy | ly) == 0 || (ISINF(iy, ly))) + D_IM(ans) = zero; + else + D_IM(ans) = (fabs(y) - ay) / (fabs(y) - ay); + } + } else if (iy >= 0x7ff00000) { + /* y is inf or NaN */ + if (ISINF(iy, ly)) { + D_RE(ans) = pi_2; + D_IM(ans) = zero; + } else { + D_RE(ans) = (fabs(x) - ax) / (fabs(x) - ax); + D_IM(ans) = y; + } + } else if ((ix | lx) == 0) { + /* INDENT OFF */ + /* + * x = 0 + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) = --- atan2(0,1-|y|) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ------------ ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + t = one - ay; + if (((iy - 0x3ff00000) | ly) == 0) { + /* y=1: catan(0,1)=(0,+inf) with 1/0 signal */ + D_IM(ans) = ay / ax; + D_RE(ans) = zero; + } else if (iy >= 0x3ff00000) { /* y>1 */ + D_IM(ans) = half * log1p(two / (-t)); + D_RE(ans) = pi_2; + } else { /* y<1 */ + D_IM(ans) = half * log1p((ay + ay) / t); + D_RE(ans) = zero; + } + } else if (iy < 0x3e200000 || ((ix - iy) >> 20) >= 30) { + /* INDENT OFF */ + /* + * Tiny y (relative to 1+|x|) + * |y| < E*(1+|x|) + * where E=2**-29, -35, -60 for double, double extended, quad precision + * + * 1 [ x<=1: atan(x) + * A = --- * atan2(2x, 1-x*x-y*y) ~ [ 1 1+x + * 2 [ x>=1: - atan2(2,(1-x)*(-----)) + * 2 x + * + * y/x + * B ~ t*(1-2t), where t = ----------------- is tiny + * x + (y-1)*(y-1)/x + */ + /* INDENT ON */ + if (ix < 0x3ff00000) + D_RE(ans) = atan(ax); + else + D_RE(ans) = half * atan2(two, (one - ax) * (one + + one / ax)); + if ((iy | ly) == 0) { + D_IM(ans) = ay; + } else { + if (ix < 0x3e200000) + t = ay / ((ay - one) * (ay - one)); + else if (ix > 0x41c00000) + t = (ay / ax) / ax; + else + t = ay / (ax * ax + (ay - one) * (ay - one)); + D_IM(ans) = t * (one - (t + t)); + } + } else if (iy >= 0x41c00000 && ((iy - ix) >> 20) >= 30) { + /* INDENT OFF */ + /* + * Huge y relative to 1+|x| + * |y| > Einv*(1+|x|), where Einv~2**(prec/2+3), + * 1 + * A ~ --- * atan2(2x, -y*y) ~ pi/2 + * 2 + * y + * B ~ t*(1-2t), where t = --------------- is tiny + * (y-1)*(y-1) + */ + /* INDENT ON */ + D_RE(ans) = pi_2; + t = (ay / (ay - one)) / (ay - one); + D_IM(ans) = t * (one - (t + t)); + } else if (((iy - 0x3ff00000) | ly) == 0) { + /* INDENT OFF */ + /* + * y = 1 + * 1 1 + * A = --- * atan2(2x, -x*x) = --- atan2(2,-x) + * 2 2 + * + * 1 [x*x + 4] 1 4 [ 0.5(log2-logx) if + * B = - log [-------] = - log (1+ ---) = [ |x|= 0x43900000) { + /* INDENT OFF */ + /* + * Huge x: + * when |x| > 1/E^2, + * 1 pi + * A ~ --- * atan2(2x, -x*x-y*y) ~ --- + * 2 2 + * y y/x + * B ~ t*(1-2t), where t = --------------- = (-------------- )/x + * x*x+(y-1)*(y-1) 1+((y-1)/x)^2 + */ + /* INDENT ON */ + D_RE(ans) = pi_2; + t = ((ay / ax) / (one + ((ay - one) / ax) * ((ay - one) / + ax))) / ax; + D_IM(ans) = t * (one - (t + t)); + } else if (ix < 0x38b00000) { + /* INDENT OFF */ + /* + * Tiny x: + * when |x| < E^4, (note that y!=1) + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) ~ --- * atan2(2x,(1-y)*(1+y)) + * 2 2 + * + * 1 [(y+1)*(y+1)] 1 2 1 2y + * B = - log [-----------] = - log (1+ ---) or - log(1+ ----) + * 4 [(y-1)*(y-1)] 2 y-1 2 1-y + */ + /* INDENT ON */ + D_RE(ans) = half * atan2(ax + ax, (one - ay) * (one + ay)); + if (iy >= 0x3ff00000) + D_IM(ans) = half * log1p(two / (ay - one)); + else + D_IM(ans) = half * log1p((ay + ay) / (one - ay)); + } else { + /* INDENT OFF */ + /* + * normal x,y + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [x*x+(y+1)*(y+1)] 1 4y + * B = - log [---------------] = - log (1+ -----------------) + * 4 [x*x+(y-1)*(y-1)] 4 x*x + (y-1)*(y-1) + */ + /* INDENT ON */ + t = one - ay; + if (iy >= 0x3fe00000 && iy < 0x40000000) { + /* y close to 1 */ + D_RE(ans) = half * (atan2((ax + ax), (t * (one + ay) - + ax * ax))); + } else if (ix >= 0x3fe00000 && ix < 0x40000000) { + /* x close to 1 */ + D_RE(ans) = half * atan2((ax + ax), ((one - ax) * + (one + ax) - ay * ay)); + } else + D_RE(ans) = half * atan2((ax + ax), ((one - ax * ax) - + ay * ay)); + D_IM(ans) = 0.25 * log1p((4.0 * ay) / (ax * ax + t * t)); + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/catanf.c b/usr/src/libm/src/complex/catanf.c new file mode 100644 index 0000000..36b7a81 --- /dev/null +++ b/usr/src/libm/src/complex/catanf.c @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)catanf.c 1.5 06/01/23 SMI" + +#pragma weak catanf = __catanf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float + pi_2 = 1.570796326794896558e+00F, + zero = 0.0F, + half = 0.5F, + two = 2.0F, + one = 1.0F; + +fcomplex +catanf(fcomplex z) { + fcomplex ans; + float x, y, ax, ay, t; + double dx, dy, dt; + int hx, hy, ix, iy; + + x = F_RE(z); + y = F_IM(z); + ax = fabsf(x); + ay = fabsf(y); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + if (ix >= 0x7f800000) { /* x is inf or NaN */ + if (ix == 0x7f800000) { + F_RE(ans) = pi_2; + F_IM(ans) = zero; + } else { + F_RE(ans) = x * x; + if (iy == 0 || iy == 0x7f800000) + F_IM(ans) = zero; + else + F_IM(ans) = (fabsf(y) - ay) / (fabsf(y) - ay); + } + } else if (iy >= 0x7f800000) { /* y is inf or NaN */ + if (iy == 0x7f800000) { + F_RE(ans) = pi_2; + F_IM(ans) = zero; + } else { + F_RE(ans) = (fabsf(x) - ax) / (fabsf(x) - ax); + F_IM(ans) = y * y; + } + } else if (ix == 0) { + /* INDENT OFF */ + /* + * x = 0 + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) = --- atan2(0,1-|y|) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ----------- ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + t = one - ay; + if (iy == 0x3f800000) { + /* y=1: catan(0,1)=(0,+inf) with 1/0 signal */ + F_IM(ans) = ay / ax; + F_RE(ans) = zero; + } else if (iy > 0x3f800000) { /* y>1 */ + F_IM(ans) = half * log1pf(two / (-t)); + F_RE(ans) = pi_2; + } else { /* y<1 */ + F_IM(ans) = half * log1pf((ay + ay) / t); + F_RE(ans) = zero; + } + } else { + /* INDENT OFF */ + /* + * use double precision x,y + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [ x*x+(y+1)*(y+1) ] 1 4y + * B = - log [ --------------- ] = - log (1+ -----------------) + * 4 [ x*x+(y-1)*(y-1) ] 4 x*x + (y-1)*(y-1) + */ + /* INDENT ON */ +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + dx = (double)ax; + dy = (double)ay; + F_RE(ans) = (float)(0.5 * atan2(dx + dx, + 1.0 - dx * dx - dy * dy)); + dt = dy - 1.0; + F_IM(ans) = (float)(0.25 * log1p(4.0 * dy / + (dx * dx + dt * dt))); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + if (hx < 0) + F_RE(ans) = -F_RE(ans); + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/catanh.c b/usr/src/libm/src/complex/catanh.c new file mode 100644 index 0000000..404ae22 --- /dev/null +++ b/usr/src/libm/src/complex/catanh.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)catanh.c 1.3 06/01/31 SMI" + +#pragma weak catanh = __catanh + +/* INDENT OFF */ +/* + * z := x + iy + * catanh(z) = -i catan(iz) + * = -i catan(-y+ix) + * = (Im(catan(-y+ix)), -Re(catan(-y+ix))) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +catanh(dcomplex z) { + double x, y; + dcomplex ans, ct; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = -y; + D_IM(z) = x; + ct = catan(z); + D_RE(ans) = D_IM(ct); + D_IM(ans) = -D_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/catanhf.c b/usr/src/libm/src/complex/catanhf.c new file mode 100644 index 0000000..aa62a8f --- /dev/null +++ b/usr/src/libm/src/complex/catanhf.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)catanhf.c 1.3 06/01/31 SMI" + +#pragma weak catanhf = __catanhf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +catanhf(fcomplex z) { + float x, y; + fcomplex ans, ct; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = -y; + F_IM(z) = x; + ct = catanf(z); + F_RE(ans) = F_IM(ct); + F_IM(ans) = -F_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/catanhl.c b/usr/src/libm/src/complex/catanhl.c new file mode 100644 index 0000000..c7821c3 --- /dev/null +++ b/usr/src/libm/src/complex/catanhl.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)catanhl.c 1.3 06/01/31 SMI" + +#pragma weak catanhl = __catanhl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +catanhl(ldcomplex z) { + long double x, y; + ldcomplex ans, ct; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = -y; + LD_IM(z) = x; + ct = catanl(z); + LD_RE(ans) = LD_IM(ct); + LD_IM(ans) = -LD_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/catanl.c b/usr/src/libm/src/complex/catanl.c new file mode 100644 index 0000000..8c7c31b --- /dev/null +++ b/usr/src/libm/src/complex/catanl.c @@ -0,0 +1,327 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)catanl.c 1.4 06/01/31 SMI" + +#pragma weak catanl = __catanl + +/* INDENT OFF */ +/* + * ldcomplex catanl(ldcomplex z); + * + * Atan(z) return A + Bi where, + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [ x*x + (y+1)*(y+1) ] 1 4y + * B = --- log [ ----------------- ] = - log (1+ -----------------) + * 4 [ x*x + (y-1)*(y-1) ] 4 x*x + (y-1)*(y-1) + * + * 2 16 3 y + * = t - 2t + -- t - ..., where t = ----------------- + * 3 x*x + (y-1)*(y-1) + * Proof: + * Let w = atan(z=x+yi) = A + B i. Then tan(w) = z. + * Since sin(w) = (exp(iw)-exp(-iw))/(2i), cos(w)=(exp(iw)+exp(-iw))/(2), + * Let p = exp(iw), then z = tan(w) = ((p-1/p)/(p+1/p))/i, or + * iz = (p*p-1)/(p*p+1), or, after simplification, + * p*p = (1+iz)/(1-iz) ... (1) + * LHS of (1) = exp(2iw) = exp(2i(A+Bi)) = exp(-2B)*exp(2iA) + * = exp(-2B)*(cos(2A)+i*sin(2A)) ... (2) + * 1-y+ix (1-y+ix)*(1+y+ix) 1-x*x-y*y + 2xi + * RHS of (1) = ------ = ----------------- = --------------- ... (3) + * 1+y-ix (1+y)**2 + x**2 (1+y)**2 + x**2 + * + * Comparing the real and imaginary parts of (2) and (3), we have: + * cos(2A) : 1-x*x-y*y = sin(2A) : 2x + * and hence + * tan(2A) = 2x/(1-x*x-y*y), or + * A = 0.5 * atan2(2x, 1-x*x-y*y) ... (4) + * + * For the imaginary part B, Note that |p*p| = exp(-2B), and + * |1+iz| |i-z| hypot(x,(y-1)) + * |----| = |---| = -------------- + * |1-iz| |i+z| hypot(x,(y+1)) + * Thus + * x*x + (y+1)*(y+1) + * exp(4B) = -----------------, or + * x*x + (y-1)*(y-1) + * + * 1 [x^2+(y+1)^2] 1 4y + * B = - log [-----------] = - log(1+ -------------) ... (5) + * 4 [x^2+(y-1)^2] 4 x^2+(y-1)^2 + * + * QED. + * + * Note that: if catan( x, y) = ( u, v), then + * catan(-x, y) = (-u, v) + * catan( x,-y) = ( u,-v) + * + * Also, catan(x,y) = -i*catanh(-y,x), or + * catanh(x,y) = i*catan(-y,x) + * So, if catanh(y,x) = (v,u), then catan(x,y) = -i*(-v,u) = (u,v), i.e., + * catan(x,y) = (u,v) + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * catan( 0 , 0 ) = (0 , 0 ) + * catan( NaN, 0 ) = (NaN , 0 ) + * catan( 0 , 1 ) = (0 , +inf) with divide-by-zero + * catan( inf, y ) = (pi/2 , 0 ) for finite +y + * catan( NaN, y ) = (NaN , NaN ) with invalid for finite y!=0 + * catan( x , inf ) = (pi/2 , 0 ) for finite +x + * catan( inf, inf ) = (pi/2 , 0 ) + * catan( NaN, inf ) = (NaN , 0 ) + * catan( x , NaN ) = (NaN , NaN ) with invalid for finite x + * catan( inf, NaN ) = (pi/2 , +-0 ) + */ +/* INDENT ON */ + +#include "libm.h" /* atan2l/atanl/fabsl/isinfl/iszerol/log1pl/logl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double +zero = 0.0L, +one = 1.0L, +two = 2.0L, +half = 0.5L, +ln2 = 6.931471805599453094172321214581765680755e-0001L, +pi_2 = 1.570796326794896619231321691639751442098584699687552910487472L, +#if defined(__i386) +E = 2.910383045673370361328125000000000000000e-11L, /* 2**-35 */ +Einv = 3.435973836800000000000000000000000000000e+10L; /* 2**+35 */ +#else +E = 8.673617379884035472059622406959533691406e-19L, /* 2**-60 */ +Einv = 1.152921504606846976000000000000000000000e18L; /* 2**+60 */ +#endif +/* INDENT ON */ + +ldcomplex +catanl(ldcomplex z) { + ldcomplex ans; + long double x, y, t1, ax, ay, t; + int hx, hy, ix, iy; + + x = LD_RE(z); + y = LD_IM(z); + ax = fabsl(x); + ay = fabsl(y); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + + /* x is inf or NaN */ + if (ix >= 0x7fff0000) { + if (isinfl(x)) { + LD_RE(ans) = pi_2; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = x + x; + if (iszerol(y) || (isinfl(y))) + LD_IM(ans) = zero; + else + LD_IM(ans) = (fabsl(y) - ay) / (fabsl(y) - ay); + } + } else if (iy >= 0x7fff0000) { + /* y is inf or NaN */ + if (isinfl(y)) { + LD_RE(ans) = pi_2; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = (fabsl(x) - ax) / (fabsl(x) - ax); + LD_IM(ans) = y; + } + } else if (iszerol(x)) { + /* INDENT OFF */ + /* + * x = 0 + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) = --- atan2(0,1-|y|) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ----------- ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + t = one - ay; + if (ay == one) { + /* y=1: catan(0,1)=(0,+inf) with 1/0 signal */ + LD_IM(ans) = ay / ax; + LD_RE(ans) = zero; + } else if (ay > one) { /* y>1 */ + LD_IM(ans) = half * log1pl(two / (-t)); + LD_RE(ans) = pi_2; + } else { /* y<1 */ + LD_IM(ans) = half * log1pl((ay + ay) / t); + LD_RE(ans) = zero; + } + } else if (ay < E * (one + ax)) { + /* INDENT OFF */ + /* + * Tiny y (relative to 1+|x|) + * |y| < E*(1+|x|) + * where E=2**-29, -35, -60 for double, extended, quad precision + * + * 1 [x<=1: atan(x) + * A = - * atan2(2x,1-x*x-y*y) ~ [ 1 1+x + * 2 [x>=1: - atan2(2,(1-x)*(-----)) + * 2 x + * + * y/x + * B ~ t*(1-2t), where t = ----------------- is tiny + * x + (y-1)*(y-1)/x + * + * y + * (when x < 2**-60, t = ----------- ) + * (y-1)*(y-1) + */ + /* INDENT ON */ + if (ay == zero) + LD_IM(ans) = ay; + else { + t1 = ay - one; + if (ix < 0x3fc30000) + t = ay / (t1 * t1); + else if (ix > 0x403b0000) + t = (ay / ax) / ax; + else + t = ay / (ax * ax + t1 * t1); + LD_IM(ans) = t * (one - two * t); + } + if (ix < 0x3fff0000) + LD_RE(ans) = atanl(ax); + else + LD_RE(ans) = half * atan2l(two, (one - ax) * (one + + one / ax)); + + } else if (ay > Einv * (one + ax)) { + /* INDENT OFF */ + /* + * Huge y relative to 1+|x| + * |y| > Einv*(1+|x|), where Einv~2**(prec/2+3), + * 1 + * A ~ --- * atan2(2x, -y*y) ~ pi/2 + * 2 + * y + * B ~ t*(1-2t), where t = --------------- is tiny + * (y-1)*(y-1) + */ + /* INDENT ON */ + LD_RE(ans) = pi_2; + t = (ay / (ay - one)) / (ay - one); + LD_IM(ans) = t * (one - (t + t)); + } else if (ay == one) { + /* INDENT OFF */ + /* + * y=1 + * 1 1 + * A = - * atan2(2x, -x*x) = --- atan2(2,-x) + * 2 2 + * + * 1 [ x*x+4] 1 4 [ 0.5(log2-logx) if + * B = - log [ -----] = - log (1+ ---) = [ |x| Einv * Einv) { + /* INDENT OFF */ + /* + * Huge x: + * when |x| > 1/E^2, + * 1 pi + * A ~ --- * atan2(2x, -x*x-y*y) ~ --- + * 2 2 + * y y/x + * B ~ t*(1-2t), where t = --------------- = (-------------- )/x + * x*x+(y-1)*(y-1) 1+((y-1)/x)^2 + */ + /* INDENT ON */ + LD_RE(ans) = pi_2; + t = ((ay / ax) / (one + ((ay - one) / ax) * ((ay - one) / + ax))) / ax; + LD_IM(ans) = t * (one - (t + t)); + } else if (ax < E * E * E * E) { + /* INDENT OFF */ + /* + * Tiny x: + * when |x| < E^4, (note that y!=1) + * 1 1 + * A = --- * atan2(2x, 1-x*x-y*y) ~ --- * atan2(2x,1-y*y) + * 2 2 + * + * 1 [ (y+1)*(y+1) ] 1 2 1 2y + * B = - log [ ----------- ] = - log (1+ ---) or - log(1+ ----) + * 4 [ (y-1)*(y-1) ] 2 y-1 2 1-y + */ + /* INDENT ON */ + LD_RE(ans) = half * atan2l(ax + ax, (one - ay) * (one + ay)); + if (ay > one) /* y>1 */ + LD_IM(ans) = half * log1pl(two / (ay - one)); + else /* y<1 */ + LD_IM(ans) = half * log1pl((ay + ay) / (one - ay)); + } else { + /* INDENT OFF */ + /* + * normal x,y + * 1 + * A = --- * atan2(2x, 1-x*x-y*y) + * 2 + * + * 1 [ x*x+(y+1)*(y+1) ] 1 4y + * B = - log [ --------------- ] = - log (1+ -----------------) + * 4 [ x*x+(y-1)*(y-1) ] 4 x*x + (y-1)*(y-1) + */ + /* INDENT ON */ + t = one - ay; + if (iy >= 0x3ffe0000 && iy < 0x40000000) { + /* y close to 1 */ + LD_RE(ans) = half * (atan2l((ax + ax), (t * (one + + ay) - ax * ax))); + } else if (ix >= 0x3ffe0000 && ix < 0x40000000) { + /* x close to 1 */ + LD_RE(ans) = half * atan2l((ax + ax), ((one - ax) * + (one + ax) - ay * ay)); + } else + LD_RE(ans) = half * atan2l((ax + ax), ((one - ax * + ax) - ay * ay)); + LD_IM(ans) = 0.25L * log1pl((4.0L * ay) / (ax * ax + t * t)); + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ccos.c b/usr/src/libm/src/complex/ccos.c new file mode 100644 index 0000000..055cdf6 --- /dev/null +++ b/usr/src/libm/src/complex/ccos.c @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ccos.c 1.3 06/01/31 SMI" + +#pragma weak ccos = __ccos + +/* INDENT OFF */ +/* + * dcomplex ccos(dcomplex z); + * + * z := x+iy; since ccos(iz) = cosh(z), we have + * ccos(z) = ccos((-1)*(-z)) = ccos(i*i*(-z)) + * = ccosh(i*(-z)) = ccosh(i*(-x-yi)) + * = ccosh(y-ix) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +ccos(dcomplex z) { + double x, y; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = y; + D_IM(z) = -x; + return (ccosh(z)); +} diff --git a/usr/src/libm/src/complex/ccosf.c b/usr/src/libm/src/complex/ccosf.c new file mode 100644 index 0000000..f56f1b5 --- /dev/null +++ b/usr/src/libm/src/complex/ccosf.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ccosf.c 1.3 06/01/31 SMI" + +#pragma weak ccosf = __ccosf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +ccosf(fcomplex z) { + float x, y; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = y; + F_IM(z) = -x; + return (ccoshf(z)); +} diff --git a/usr/src/libm/src/complex/ccosh.c b/usr/src/libm/src/complex/ccosh.c new file mode 100644 index 0000000..476ad99 --- /dev/null +++ b/usr/src/libm/src/complex/ccosh.c @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ccosh.c 1.3 06/01/31 SMI" + +#pragma weak ccosh = __ccosh + +/* INDENT OFF */ +/* + * dcomplex ccosh(dcomplex z); + * + * z -z x -x + * e + e e (cos(y)+i*sin(y)) + e (cos(-y)+i*sin(-y)) + * cosh z = -------------- = --------------------------------------------- + * 2 2 + * x -x x -x + * cos(y) ( e + e ) + i*sin(y) (e - e ) + * = -------------------------------------------- + * 2 + * + * = cos(y) cosh(x) + i sin(y) sinh(x) + * + * Implementation Note + * ------------------- + * + * |x| -|x| |x| -2|x| -2|x| -P-4 + * Note that e +- e = e ( 1 +- e ). If e < 2 , where + * + * P stands for the number of significant bits of the machine precision, + * |x| + * then the result will be rounded to e . Therefore, we have + * + * z + * e + * cosh z = ----- if |x| >= (P/2 + 2)*ln2 + * 2 + * + * EXCEPTION (conform to ISO/IEC 9899:1999(E)): + * ccosh(0,0)=(1,0) + * ccosh(0,inf)=(NaN,+-0) + * ccosh(0,NaN)=(NaN,+-0) + * ccosh(x,inf) = (NaN,NaN) for finite non-zero x + * ccosh(x,NaN) = (NaN,NaN) for finite non-zero x + * ccosh(inf,0) = (inf, 0) + * ccosh(inf,y) = (inf*cos(y),inf*sin(y)) for finite non-zero y + * ccosh(inf,inf) = (+-inf,NaN) + * ccosh(inf,NaN) = (+inf,NaN) + * ccosh(NaN,0) = (NaN,+-0) + * ccosh(NaN,y) = (NaN,NaN) for non-zero y + * ccosh(NaN,NaN) = (NaN,NaN) + */ +/* INDENT ON */ + +#include "libm.h" /* cosh/exp/fabs/scalbn/sinh/sincos/__k_cexp */ +#include "complex_wrapper.h" + +dcomplex +ccosh(dcomplex z) { + double t, x, y, S, C; + int hx, ix, lx, hy, iy, ly, n; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + ix = hx & 0x7fffffff; + hy = HI_WORD(y); + ly = LO_WORD(y); + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + (void) sincos(y, &S, &C); + if (ix >= 0x403c0000) { /* |x| > 28 = prec/2 (14,28,34,60) */ + if (ix >= 0x40862E42) { /* |x| > 709.78... ~ log(2**1024) */ + if (ix >= 0x7ff00000) { /* |x| is inf or NaN */ + if ((iy | ly) == 0) { + D_RE(ans) = x; + D_IM(ans) = y; + } else if (iy >= 0x7ff00000) { + D_RE(ans) = x; + D_IM(ans) = x - y; + } else { + D_RE(ans) = C * x; + D_IM(ans) = S * x; + } + } else { + t = __k_cexp(x, &n); + /* return exp(x)=t*2**n */ + D_RE(ans) = scalbn(C * t, n - 1); + D_IM(ans) = scalbn(S * t, n - 1); + } + } else { + t = exp(x) * 0.5; + D_RE(ans) = C * t; + D_IM(ans) = S * t; + } + } else { + if ((ix | lx) == 0) { /* x = 0, return (C,0) */ + D_RE(ans) = C; + D_IM(ans) = 0.0; + } else { + D_RE(ans) = C * cosh(x); + D_IM(ans) = S * sinh(x); + } + } + if ((hx ^ hy) < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ccoshf.c b/usr/src/libm/src/complex/ccoshf.c new file mode 100644 index 0000000..f96dd5f --- /dev/null +++ b/usr/src/libm/src/complex/ccoshf.c @@ -0,0 +1,99 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ccoshf.c 1.5 06/01/23 SMI" + +#pragma weak ccoshf = __ccoshf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float zero = 0.0F, half = 0.5F; + +fcomplex +ccoshf(fcomplex z) { + float t, x, y, S, C; + double w; + int hx, ix, hy, iy, n; + fcomplex ans; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + ix = hx & 0x7fffffff; + hy = THE_WORD(y); + iy = hy & 0x7fffffff; + x = fabsf(x); + y = fabsf(y); + + sincosf(y, &S, &C); + if (ix >= 0x41600000) { /* |x| > 14 = prec/2 (14,28,34,60) */ + if (ix >= 0x42B171AA) { /* |x| > 88.722... ~ log(2**128) */ + if (ix >= 0x7f800000) { /* |x| is inf or NaN */ + if (iy == 0) { + F_RE(ans) = x; + F_IM(ans) = y; + } else if (iy >= 0x7f800000) { + F_RE(ans) = x; + F_IM(ans) = x - y; + } else { + F_RE(ans) = C * x; + F_IM(ans) = S * x; + } + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + /* return (C, S) * exp(x) / 2 */ + w = __k_cexp((double)x, &n); + F_RE(ans) = (float)scalbn(C * w, n - 1); + F_IM(ans) = (float)scalbn(S * w, n - 1); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + } else { + t = expf(x) * half; + F_RE(ans) = C * t; + F_IM(ans) = S * t; + } + } else { + if (ix == 0) { /* x = 0, return (C,0) */ + F_RE(ans) = C; + F_IM(ans) = zero; + } else { + F_RE(ans) = C * coshf(x); + F_IM(ans) = S * sinhf(x); + } + } + if ((hx ^ hy) < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ccoshl.c b/usr/src/libm/src/complex/ccoshl.c new file mode 100644 index 0000000..e8e2765 --- /dev/null +++ b/usr/src/libm/src/complex/ccoshl.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ccoshl.c 1.3 06/01/31 SMI" + +#pragma weak ccoshl = __ccoshl + +#include "libm.h" /* coshl/expl/fabsl/scalbnl/sincosl/sinhl/__k_cexpl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double zero = 0.0L, half = 0.5L; +/* INDENT ON */ + +ldcomplex +ccoshl(ldcomplex z) { + long double t, x, y, S, C; + int hx, ix, hy, iy, n; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + hy = HI_XWORD(y); + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + (void) sincosl(y, &S, &C); + if (ix >= 0x4004e000) { /* |x| > 60 = prec/2 (14,28,34,60) */ + if (ix >= 0x400C62E4) { /* |x| > 11356.52... ~ log(2**16384) */ + if (ix >= 0x7fff0000) { /* |x| is inf or NaN */ + if (y == zero) { + LD_RE(ans) = x; + LD_IM(ans) = y; + } else if (iy >= 0x7fff0000) { + LD_RE(ans) = x; + LD_IM(ans) = x - y; + } else { + LD_RE(ans) = C * x; + LD_IM(ans) = S * x; + } + } else { + t = __k_cexpl(x, &n); + /* return exp(x)=t*2**n */ + LD_RE(ans) = scalbnl(C * t, n - 1); + LD_IM(ans) = scalbnl(S * t, n - 1); + } + } else { + t = expl(x) * half; + LD_RE(ans) = C * t; + LD_IM(ans) = S * t; + } + } else { + if (x == zero) { /* x = 0, return (C,0) */ + LD_RE(ans) = C; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = C * coshl(x); + LD_IM(ans) = S * sinhl(x); + } + } + if ((hx ^ hy) < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ccosl.c b/usr/src/libm/src/complex/ccosl.c new file mode 100644 index 0000000..df8c9ab --- /dev/null +++ b/usr/src/libm/src/complex/ccosl.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ccosl.c 1.3 06/01/31 SMI" + +#pragma weak ccosl = __ccosl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +ccosl(ldcomplex z) { + long double x, y; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = y; + LD_IM(z) = -x; + return (ccoshl(z)); +} diff --git a/usr/src/libm/src/complex/cexp.c b/usr/src/libm/src/complex/cexp.c new file mode 100644 index 0000000..2bf6c66 --- /dev/null +++ b/usr/src/libm/src/complex/cexp.c @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cexp.c 1.3 06/01/31 SMI" + +#pragma weak cexp = __cexp + +/* INDENT OFF */ +/* + * dcomplex cexp(dcomplex z); + * + * x+iy x + * e = e (cos(y)+i*sin(y)) + * + * Over/underflow issue + * -------------------- + * exp(x) may be huge but cos(y) or sin(y) may be tiny. So we use + * function __k_cexp(x,&n) to return exp(x) = __k_cexp(x,&n)*2**n. + * Thus if exp(x+iy) = A + Bi and t = __k_cexp(x,&n), then + * A = t*cos(y)*2**n, B = t*sin(y)*2**n + * + * Purge off all exceptional arguments: + * (x,0) --> (exp(x),0) for all x, include inf and NaN + * (+inf, y) --> (+inf, NaN) for inf, nan + * (-inf, y) --> (+-0, +-0) for y = inf, nan + * (x,+-inf/NaN) --> (NaN,NaN) for finite x + * For all other cases, return + * (x,y) --> exp(x)*cos(y)+i*exp(x)*sin(y)) + * + * Algorithm for out of range x and finite y + * 1. compute exp(x) in factor form (t=__k_cexp(x,&n))*2**n + * 2. compute sincos(y,&s,&c) + * 3. compute t*s+i*(t*c), then scale back to 2**n and return. + */ +/* INDENT ON */ + +#include "libm.h" /* exp/scalbn/sincos/__k_cexp */ +#include "complex_wrapper.h" + +static const double zero = 0.0; + +dcomplex +cexp(dcomplex z) { + dcomplex ans; + double x, y, t, c, s; + int n, ix, iy, hx, hy, lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if ((iy | ly) == 0) { /* y = 0 */ + D_RE(ans) = exp(x); + D_IM(ans) = y; + } else if (ISINF(ix, lx)) { /* x is +-inf */ + if (hx < 0) { + if (iy >= 0x7ff00000) { + D_RE(ans) = zero; + D_IM(ans) = zero; + } else { + sincos(y, &s, &c); + D_RE(ans) = zero * c; + D_IM(ans) = zero * s; + } + } else { + if (iy >= 0x7ff00000) { + D_RE(ans) = x; + D_IM(ans) = y - y; + } else { + (void) sincos(y, &s, &c); + D_RE(ans) = x * c; + D_IM(ans) = x * s; + } + } + } else { + (void) sincos(y, &s, &c); + if (ix >= 0x40862E42) { /* |x| > 709.78... ~ log(2**1024) */ + t = __k_cexp(x, &n); + D_RE(ans) = scalbn(t * c, n); + D_IM(ans) = scalbn(t * s, n); + } else { + t = exp(x); + D_RE(ans) = t * c; + D_IM(ans) = t * s; + } + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cexpf.c b/usr/src/libm/src/complex/cexpf.c new file mode 100644 index 0000000..f4ca00c --- /dev/null +++ b/usr/src/libm/src/complex/cexpf.c @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cexpf.c 1.3 06/01/23 SMI" + +#pragma weak cexpf = __cexpf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float zero = 0.0F; + +fcomplex +cexpf(fcomplex z) { + fcomplex ans; + float x, y, c, s; + double t; + int n, ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (iy == 0) { /* y = 0 */ + F_RE(ans) = expf(x); + F_IM(ans) = y; + } else if (ix == 0x7f800000) { /* x is +-inf */ + if (hx < 0) { + if (iy >= 0x7f800000) { + F_RE(ans) = zero; + F_IM(ans) = zero; + } else { + sincosf(y, &s, &c); + F_RE(ans) = zero * c; + F_IM(ans) = zero * s; + } + } else { + if (iy >= 0x7f800000) { + F_RE(ans) = x; + F_IM(ans) = y - y; + } else { + sincosf(y, &s, &c); + F_RE(ans) = x * c; + F_IM(ans) = x * s; + } + } + } else { + sincosf(y, &s, &c); + if (ix >= 0x42B171AA) { /* |x| > 88.722... ~ log(2**128) */ +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + t = __k_cexp(x, &n); + F_RE(ans) = (float)scalbn(t * (double)c, n); + F_IM(ans) = (float)scalbn(t * (double)s, n); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + t = expf(x); + F_RE(ans) = t * c; + F_IM(ans) = t * s; + } + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cexpl.c b/usr/src/libm/src/complex/cexpl.c new file mode 100644 index 0000000..e311c6a --- /dev/null +++ b/usr/src/libm/src/complex/cexpl.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cexpl.c 1.3 06/01/31 SMI" + +#pragma weak cexpl = __cexpl + +#include "libm.h" /* expl/isinfl/iszerol/scalbnl/sincosl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double zero = 0.0L; +/* INDENT ON */ + +ldcomplex +cexpl(ldcomplex z) { + ldcomplex ans; + long double x, y, t, c, s; + int n, ix, iy, hx, hy; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (iszerol(y)) { /* y = 0 */ + LD_RE(ans) = expl(x); + LD_IM(ans) = y; + } else if (isinfl(x)) { /* x is +-inf */ + if (hx < 0) { + if (iy >= 0x7fff0000) { + LD_RE(ans) = zero; + LD_IM(ans) = zero; + } else { + sincosl(y, &s, &c); + LD_RE(ans) = zero * c; + LD_IM(ans) = zero * s; + } + } else { + if (iy >= 0x7fff0000) { + LD_RE(ans) = x; + LD_IM(ans) = y - y; + } else { + (void) sincosl(y, &s, &c); + LD_RE(ans) = x * c; + LD_IM(ans) = x * s; + } + } + } else { + (void) sincosl(y, &s, &c); + if (ix >= 0x400C62E4) { /* |x| > 11356.52... ~ log(2**16384) */ + t = __k_cexpl(x, &n); + LD_RE(ans) = scalbnl(t * c, n); + LD_IM(ans) = scalbnl(t * s, n); + } else { + t = expl(x); + LD_RE(ans) = t * c; + LD_IM(ans) = t * s; + } + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cimag.c b/usr/src/libm/src/complex/cimag.c new file mode 100644 index 0000000..8d4ecf4 --- /dev/null +++ b/usr/src/libm/src/complex/cimag.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cimag.c 1.3 06/01/31 SMI" + +#pragma weak cimag = __cimag + +#include "libm.h" +#include "complex_wrapper.h" + +double +cimag(dcomplex z) { + return (D_IM(z)); +} diff --git a/usr/src/libm/src/complex/cimagf.c b/usr/src/libm/src/complex/cimagf.c new file mode 100644 index 0000000..596ee4e --- /dev/null +++ b/usr/src/libm/src/complex/cimagf.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cimagf.c 1.3 06/01/31 SMI" + +#pragma weak cimagf = __cimagf + +#include "libm.h" +#include "complex_wrapper.h" + +float +cimagf(fcomplex z) { + return (F_IM(z)); +} diff --git a/usr/src/libm/src/complex/cimagl.c b/usr/src/libm/src/complex/cimagl.c new file mode 100644 index 0000000..e872c01 --- /dev/null +++ b/usr/src/libm/src/complex/cimagl.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cimagl.c 1.3 06/01/31 SMI" + +#pragma weak cimagl = __cimagl + +#include "libm.h" +#include "complex_wrapper.h" + +long double +cimagl(ldcomplex z) { + return (LD_IM(z)); +} diff --git a/usr/src/libm/src/complex/clog.c b/usr/src/libm/src/complex/clog.c new file mode 100644 index 0000000..03fe3b3 --- /dev/null +++ b/usr/src/libm/src/complex/clog.c @@ -0,0 +1,133 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)clog.c 1.8 06/01/23 SMI" + +#pragma weak clog = __clog + +/* INDENT OFF */ +/* + * dcomplex clog(dcomplex z); + * + * _________ + * / 2 2 -1 y + * log(x+iy) = log(\/ x + y ) + i tan (---) + * x + * + * 1 2 2 -1 y + * = --- log(x + y ) + i tan (---) + * 2 x + * + * Note that the arctangent ranges from -PI to +PI, thus the imaginary + * part of clog is atan2(y,x). + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * clog(-0 + i 0 ) = -inf + i pi + * clog( 0 + i 0 ) = -inf + i 0 + * clog( x + i inf ) = -inf + i pi/2, for finite x + * clog( x + i NaN ) = NaN + i NaN with invalid for finite x + * clog(-inf + iy )= +inf + i pi, for finite positive-signed y + * clog(+inf + iy )= +inf + i 0 , for finite positive-signed y + * clog(-inf + i inf)= inf + i 3pi/4 + * clog(+inf + i inf)= inf + i pi/4 + * clog(+-inf+ i NaN)= inf + i NaN + * clog(NaN + i y )= NaN + i NaN for finite y + * clog(NaN + i inf)= inf + i NaN + * clog(NaN + i NaN)= NaN + i NaN + */ +/* INDENT ON */ + +#include "libm_synonyms.h" +#include /* atan2/fabs/log/log1p */ +#include "complex_wrapper.h" +#include "libm_protos.h" /* __k_clog_r */ + + +static const double half = 0.5, one = 1.0; + +dcomplex +clog(dcomplex z) { + dcomplex ans; + double x, y, t, ax, ay, w; + int n, ix, iy, hx, hy; + unsigned lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabs(y); + ax = fabs(x); + D_IM(ans) = carg(z); + if (ix < iy || (ix == iy && lx < ly)) { + /* swap x and y to force ax >= ay */ + t = ax; + ax = ay; + ay = t; + n = ix, ix = iy; + iy = n; + n = lx, lx = ly; + ly = n; + } + n = (ix - iy) >> 20; + if (ix >= 0x7ff00000) { /* x or y is Inf or NaN */ + if (ISINF(ix, lx)) + D_RE(ans) = ax; + else if (ISINF(iy, ly)) + D_RE(ans) = ay; + else + D_RE(ans) = ax * ay; + } else if ((iy | ly) == 0) { + D_RE(ans) = ((ix | lx) == 0)? -one / ax : log(ax); + } else if (((0x3fffffff - ix) ^ (ix - 0x3fe00000)) >= 0) { + /* 0.5 <= x < 2 */ + if (ix >= 0x3ff00000) { + if (((ix - 0x3ff00000) | lx) == 0) + D_RE(ans) = half * log1p(ay * ay); + else if (n >= 60) + D_RE(ans) = log(ax); + else + D_RE(ans) = half * (log1p(ay * ay + (ax - + one) * (ax + one))); + } else if (n >= 60) { + D_RE(ans) = log(ax); + } else { + D_RE(ans) = __k_clog_r(ax, ay, &w); + } + } else if (n >= 30) { + D_RE(ans) = log(ax); + } else if (ix < 0x5f300000 && iy >= 0x20b00000) { + /* 2**-500< y < x < 2**500 */ + D_RE(ans) = half * log(ax * ax + ay * ay); + } else { + t = ay / ax; + D_RE(ans) = log(ax) + half * log1p(t * t); + } + return (ans); +} diff --git a/usr/src/libm/src/complex/clogf.c b/usr/src/libm/src/complex/clogf.c new file mode 100644 index 0000000..4b69a60 --- /dev/null +++ b/usr/src/libm/src/complex/clogf.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)clogf.c 1.5 06/01/23 SMI" + +#pragma weak clogf = __clogf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +fcomplex +clogf(fcomplex z) { + fcomplex ans; + float x, y, ax, ay; + double dx, dy; + int ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsf(y); + ax = fabsf(x); + F_IM(ans) = atan2f(y, x); + if (ix >= 0x7f800000 || iy >= 0x7f800000) { + /* x or y is Inf or NaN */ + if (iy == 0x7f800000) + F_RE(ans) = ay; + else if (ix == 0x7f800000) + F_RE(ans) = ax; + else + F_RE(ans) = ax + ay; + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + dx = (double)ax; + dy = (double)ay; + if (ix == 0x3f800000) + F_RE(ans) = (float)(0.5 * log1p(dy * dy)); + else if (iy == 0x3f800000) + F_RE(ans) = (float)(0.5 * log1p(dx * dx)); + else if ((ix | iy) == 0) + F_RE(ans) = -1.0f / ax; + else + F_RE(ans) = (float)(0.5 * log(dx * dx + dy * dy)); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + return (ans); +} diff --git a/usr/src/libm/src/complex/clogl.c b/usr/src/libm/src/complex/clogl.c new file mode 100644 index 0000000..579c22b --- /dev/null +++ b/usr/src/libm/src/complex/clogl.c @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)clogl.c 1.8 06/01/31 SMI" + +#pragma weak clogl = __clogl + +#include "libm.h" /* atan2l/fabsl/isinfl/log1pl/logl/__k_clog_rl */ +#include "complex_wrapper.h" + +#if defined(__sparc) +#define SIGP7 120 +#define HSIGP7 60 +#elif defined(__i386) +#define SIGP7 70 +#define HSIGP7 35 +#endif + +/* INDENT OFF */ +static const long double zero = 0.0L, half = 0.5L, one = 1.0L; +/* INDENT ON */ + +ldcomplex +clogl(ldcomplex z) { + ldcomplex ans; + long double x, y, t, ax, ay; + int n, ix, iy, hx, hy; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsl(y); + ax = fabsl(x); + LD_IM(ans) = atan2l(y, x); + if (ix < iy || (ix == iy && ix < 0x7fff0000 && ax < ay)) { + /* swap x and y to force ax>=ay */ + t = ax; + ax = ay; + ay = t; + n = ix, ix = iy; + iy = n; + } + n = (ix - iy) >> 16; + if (ix >= 0x7fff0000) { /* x or y is Inf or NaN */ + if (isinfl(ax)) + LD_RE(ans) = ax; + else if (isinfl(ay)) + LD_RE(ans) = ay; + else + LD_RE(ans) = ax + ay; + } else if (ay == zero) + LD_RE(ans) = logl(ax); + else if (((0x3fffffff - ix) ^ (ix - 0x3ffe0000)) >= 0) { + /* 0.5 <= x < 2 */ + if (ix >= 0x3fff0000) { + if (ax == one) + LD_RE(ans) = half * log1pl(ay * ay); + else if (n >= SIGP7) + LD_RE(ans) = logl(ax); + else + LD_RE(ans) = half * (log1pl(ay * ay + (ax - + one) * (ax + one))); + } else if (n >= SIGP7) + LD_RE(ans) = logl(ax); + else + LD_RE(ans) = __k_clog_rl(x, y, &t); + } else if (n >= HSIGP7) + LD_RE(ans) = logl(ax); + else if (ix < 0x5f3f0000 && iy >= 0x20bf0000) + /* 2**-8000 < y < x < 2**8000 */ + LD_RE(ans) = half * logl(ax * ax + ay * ay); + else { + t = ay / ax; + LD_RE(ans) = logl(ax) + half * log1pl(t * t); + } + return (ans); +} diff --git a/usr/src/libm/src/complex/complex_wrapper.h b/usr/src/libm/src/complex/complex_wrapper.h new file mode 100644 index 0000000..535507c --- /dev/null +++ b/usr/src/libm/src/complex/complex_wrapper.h @@ -0,0 +1,176 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _COMPLEX_WRAPPER_H +#define _COMPLEX_WRAPPER_H + +#pragma ident "@(#)complex_wrapper.h 1.7 06/01/31 SMI" + +#if !defined(__cplusplus) && (__STDC_VERSION__ >= 199901L || defined(_STDC_C99)) + +#define dcomplex double complex +#define fcomplex float complex +#define ldcomplex long double complex +#define _X_RE(__t, __z) ((__t *) &__z)[0] +#define _X_IM(__t, __z) ((__t *) &__z)[1] +#define D_RE(__z) _X_RE(double, __z) +#define D_IM(__z) _X_IM(double, __z) +#define F_RE(__z) _X_RE(float, __z) +#define F_IM(__z) _X_IM(float, __z) +#define LD_RE(__z) _X_RE(long double, __z) +#define LD_IM(__z) _X_IM(long double, __z) + +#include + +#else /* !defined(__cplusplus) && (__STDC_VERSION__ >= 199901L || ...) */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + double __re; + double __im; +} dcomplex; + +typedef struct { + float __re; + float __im; +} fcomplex; + +typedef struct { + long double __re; + long double __im; +} ldcomplex; + +#define D_RE(__z) (__z).__re +#define D_IM(__z) (__z).__im +#define F_RE(__z) (__z).__re +#define F_IM(__z) (__z).__im +#define LD_RE(__z) (__z).__re +#define LD_IM(__z) (__z).__im + +extern float cabsf(fcomplex); +extern float cargf(fcomplex); +extern float cimagf(fcomplex); +extern float crealf(fcomplex); +extern fcomplex cacosf(fcomplex); +extern fcomplex cacoshf(fcomplex); +extern fcomplex casinf(fcomplex); +extern fcomplex casinhf(fcomplex); +extern fcomplex catanf(fcomplex); +extern fcomplex catanhf(fcomplex); +extern fcomplex ccosf(fcomplex); +extern fcomplex ccoshf(fcomplex); +extern fcomplex cexpf(fcomplex); +extern fcomplex clogf(fcomplex); +extern fcomplex conjf(fcomplex); +extern fcomplex cpowf(fcomplex, fcomplex); +extern fcomplex cprojf(fcomplex); +extern fcomplex csinf(fcomplex); +extern fcomplex csinhf(fcomplex); +extern fcomplex csqrtf(fcomplex); +extern fcomplex ctanf(fcomplex); +extern fcomplex ctanhf(fcomplex); + +extern double cabs(dcomplex); +extern double carg(dcomplex); +extern double cimag(dcomplex); +extern double creal(dcomplex); +extern dcomplex cacos(dcomplex); +extern dcomplex cacosh(dcomplex); +extern dcomplex casin(dcomplex); +extern dcomplex casinh(dcomplex); +extern dcomplex catan(dcomplex); +extern dcomplex catanh(dcomplex); +extern dcomplex ccos(dcomplex); +extern dcomplex ccosh(dcomplex); +extern dcomplex cexp(dcomplex); +extern dcomplex clog(dcomplex); +extern dcomplex conj(dcomplex); +extern dcomplex cpow(dcomplex, dcomplex); +extern dcomplex cproj(dcomplex); +extern dcomplex csin(dcomplex); +extern dcomplex csinh(dcomplex); +extern dcomplex csqrt(dcomplex); +extern dcomplex ctan(dcomplex); +extern dcomplex ctanh(dcomplex); + +extern long double cabsl(ldcomplex); +extern long double cargl(ldcomplex); +extern long double cimagl(ldcomplex); +extern long double creall(ldcomplex); +extern ldcomplex cacoshl(ldcomplex); +extern ldcomplex cacosl(ldcomplex); +extern ldcomplex casinhl(ldcomplex); +extern ldcomplex casinl(ldcomplex); +extern ldcomplex catanhl(ldcomplex); +extern ldcomplex catanl(ldcomplex); +extern ldcomplex ccoshl(ldcomplex); +extern ldcomplex ccosl(ldcomplex); +extern ldcomplex cexpl(ldcomplex); +extern ldcomplex clogl(ldcomplex); +extern ldcomplex conjl(ldcomplex); +extern ldcomplex cpowl(ldcomplex, ldcomplex); +extern ldcomplex cprojl(ldcomplex); +extern ldcomplex csinhl(ldcomplex); +extern ldcomplex csinl(ldcomplex); +extern ldcomplex csqrtl(ldcomplex); +extern ldcomplex ctanhl(ldcomplex); +extern ldcomplex ctanl(ldcomplex); + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(__cplusplus) && (__STDC_VERSION__ >= 199901L || ...) */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#define HI_XWORD(x) ((unsigned *) &x)[0] +#define XFSCALE(x, n) ((unsigned *) &x)[0] += n << 16 /* signbitl(x) == 0 */ +#define CHOPPED(x) ((long double) ((double) (x))) +#elif defined(__i386) || defined(__LITTLE_ENDIAN) +#define HIWORD 1 +#define LOWORD 0 +#define HI_XWORD(x) ((((int *) &x)[2] << 16) | \ + (0xffff & ((unsigned *) &x)[1] >> 15)) +#define XFSCALE(x, n) ((unsigned short *) &x)[4] += n /* signbitl(x) == 0 */ +#define CHOPPED(x) ((long double) ((float) (x))) +#else +#error Unknown architecture +#endif +#define HI_WORD(x) ((int *) &x)[HIWORD] /* for double */ +#define LO_WORD(x) ((int *) &x)[LOWORD] /* for double */ +#define THE_WORD(x) ((int *) &x)[0] /* for float */ + +/* + * iy:ly must have the sign bit already cleared + */ +#define ISINF(iy, ly) (((iy - 0x7ff00000) | ly) == 0) + +#endif /* _COMPLEX_WRAPPER_H */ diff --git a/usr/src/libm/src/complex/conj.c b/usr/src/libm/src/complex/conj.c new file mode 100644 index 0000000..d06b30e --- /dev/null +++ b/usr/src/libm/src/complex/conj.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)conj.c 1.3 06/01/31 SMI" + +#pragma weak conj = __conj + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +conj(dcomplex z) { + D_IM(z) = -D_IM(z); + return (z); +} diff --git a/usr/src/libm/src/complex/conjf.c b/usr/src/libm/src/complex/conjf.c new file mode 100644 index 0000000..857ad32 --- /dev/null +++ b/usr/src/libm/src/complex/conjf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)conjf.c 1.3 06/01/31 SMI" + +#pragma weak conjf = __conjf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +conjf(fcomplex z) { + F_IM(z) = -F_IM(z); + return (z); +} diff --git a/usr/src/libm/src/complex/conjl.c b/usr/src/libm/src/complex/conjl.c new file mode 100644 index 0000000..32ef3dc --- /dev/null +++ b/usr/src/libm/src/complex/conjl.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)conjl.c 1.3 06/01/31 SMI" + +#pragma weak conjl = __conjl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +conjl(ldcomplex z) { + LD_IM(z) = -LD_IM(z); + return (z); +} diff --git a/usr/src/libm/src/complex/cpow.c b/usr/src/libm/src/complex/cpow.c new file mode 100644 index 0000000..099ff24 --- /dev/null +++ b/usr/src/libm/src/complex/cpow.c @@ -0,0 +1,333 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cpow.c 1.8 06/01/31 SMI" + +#pragma weak cpow = __cpow + +/* INDENT OFF */ +/* + * dcomplex cpow(dcomplex z); + * + * z**w analytically equivalent to + * + * cpow(z,w) = cexp(w clog(z)) + * + * Let z = x+iy, w = u+iv. + * Since + * _________ + * / 2 2 -1 y + * log(x+iy) = log(\/ x + y ) + i tan (---) + * x + * + * 1 2 2 -1 y + * = --- log(x + y ) + i tan (---) + * 2 x + * u 2 2 -1 y + * (u+iv)* log(x+iy) = --- log(x + y ) - v tan (---) + (1) + * 2 x + * + * v 2 2 -1 y + * i * [ --- log(x + y ) + u tan (---) ] (2) + * 2 x + * + * = r + i q + * + * Therefore, + * w r+iq r + * z = e = e (cos(q)+i*sin(q)) + * _______ + * / 2 2 + * r \/ x + y -v*atan2(y,x) + * Here e can be expressed as: u * e + * + * Special cases (in the order of appearance): + * 1. (anything) ** 0 is 1 + * 2. (anything) ** 1 is itself + * 3. When v = 0, y = 0: + * If x is finite and negative, and u is finite, then + * x ** u = exp(u*pi i) * pow(|x|, u); + * otherwise, + * x ** u = pow(x, u); + * 4. When v = 0, x = 0 or |x| = |y| or x is inf or y is inf: + * (x + y i) ** u = r * exp(q i) + * where + * r = hypot(x,y) ** u + * q = u * atan2pi(y, x) + * + * 5. otherwise, z**w is NAN if any x, y, u, v is a Nan or inf + * + * Note: many results of special cases are obtained in terms of + * polar coordinate. In the conversion from polar to rectangle: + * r exp(q i) = r * cos(q) + r * sin(q) i, + * we regard r * 0 is 0 except when r is a NaN. + */ +/* INDENT ON */ + +#include "libm.h" /* atan2/exp/fabs/hypot/log/pow/scalbn */ + /* atan2pi/exp2/sincos/sincospi/__k_clog_r/__k_atan2 */ +#include "complex_wrapper.h" + +static const double + huge = 1e300, + tiny = 1e-300, + invln2 = 1.44269504088896338700e+00, + ln2hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ + ln2lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ + one = 1.0, + zero = 0.0; + +static const int hiinf = 0x7ff00000; + +/* + * Assuming |t[0]| > |t[1]| and |t[2]| > |t[3]|, sum4fp subroutine + * compute t[0] + t[1] + t[2] + t[3] into two double fp numbers. + */ +static double +sum4fp(double ta[], double *w) { + double t1, t2, t3, t4, w1, w2, t; + t1 = ta[0]; t2 = ta[1]; t3 = ta[2]; t4 = ta[3]; + /* + * Rearrange ti so that |t1| >= |t2| >= |t3| >= |t4| + */ + if (fabs(t4) > fabs(t1)) { + t = t1; t1 = t3; t3 = t; + t = t2; t2 = t4; t4 = t; + } else if (fabs(t3) > fabs(t1)) { + t = t1; t1 = t3; + if (fabs(t4) > fabs(t2)) { + t3 = t4; t4 = t2; t2 = t; + } else { + t3 = t2; t2 = t; + } + } else if (fabs(t3) > fabs(t2)) { + t = t2; t2 = t3; + if (fabs(t4) > fabs(t2)) { + t3 = t4; t4 = t; + } else + t3 = t; + } + /* summing r = t1 + t2 + t3 + t4 to w1 + w2 */ + w1 = t3 + t4; + w2 = t4 - (w1 - t3); + t = t2 + w1; + w2 += w1 - (t - t2); + w1 = t + w2; + w2 += t - w1; + t = t1 + w1; + w2 += w1 - (t - t1); + w1 = t + w2; + *w = w2 - (w1 - t); + return (w1); +} + +dcomplex +cpow(dcomplex z, dcomplex w) { + dcomplex ans; + double x, y, u, v, t, c, s, r, x2, y2; + double b[4], t1, t2, t3, t4, w1, w2, u1, v1, x1, y1; + int ix, iy, hx, lx, hy, ly, hv, hu, iu, iv, lu, lv; + int i, j, k; + + x = D_RE(z); + y = D_IM(z); + u = D_RE(w); + v = D_IM(w); + hx = ((int *) &x)[HIWORD]; + lx = ((int *) &x)[LOWORD]; + hy = ((int *) &y)[HIWORD]; + ly = ((int *) &y)[LOWORD]; + hu = ((int *) &u)[HIWORD]; + lu = ((int *) &u)[LOWORD]; + hv = ((int *) &v)[HIWORD]; + lv = ((int *) &v)[LOWORD]; + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + iu = hu & 0x7fffffff; + iv = hv & 0x7fffffff; + + j = 0; + if ((iv | lv) == 0) { /* z**(real) */ + if (((hu - 0x3ff00000) | lu) == 0) { /* z ** 1 = z */ + D_RE(ans) = x; + D_IM(ans) = y; + } else if ((iu | lu) == 0) { /* z ** 0 = 1 */ + D_RE(ans) = one; + D_IM(ans) = zero; + } else if ((iy | ly) == 0) { /* (real)**(real) */ + D_IM(ans) = zero; + if (hx < 0 && ix < hiinf && iu < hiinf) { + /* -x ** u is exp(i*pi*u)*pow(x,u) */ + r = pow(-x, u); + sincospi(u, &s, &c); + D_RE(ans) = (c == zero)? c: c * r; + D_IM(ans) = (s == zero)? s: s * r; + } else + D_RE(ans) = pow(x, u); + } else if (((ix | lx) == 0) || ix >= hiinf || iy >= hiinf) { + if (isnan(x) || isnan(y) || isnan(u)) + D_RE(ans) = D_IM(ans) = x + y + u; + else { + if ((ix | lx) == 0) + r = fabs(y); + else + r = fabs(x) + fabs(y); + t = atan2pi(y, x); + sincospi(t * u, &s, &c); + D_RE(ans) = (c == zero)? c: c * r; + D_IM(ans) = (s == zero)? s: s * r; + } + } else if (((ix - iy) | (lx - ly)) == 0) { /* |x| = |y| */ + if (hx >= 0) { + t = (hy >= 0)? 0.25 : -0.25; + sincospi(t * u, &s, &c); + } else if ((lu & 3) == 0) { + t = (hy >= 0)? 0.75 : -0.75; + sincospi(t * u, &s, &c); + } else { + r = (hy >= 0)? u : -u; + t = -0.25 * r; + w1 = r + t; + w2 = t - (w1 - r); + sincospi(w1, &t1, &t2); + sincospi(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + } + if (ix < 0x3fe00000) /* |x| < 1/2 */ + r = pow(fabs(x + x), u) * exp2(-0.5 * u); + else if (ix >= 0x3ff00000 || iu < 0x408ff800) + /* |x| >= 1 or |u| < 1023 */ + r = pow(fabs(x), u) * exp2(0.5 * u); + else /* special treatment */ + j = 2; + if (j == 0) { + D_RE(ans) = (c == zero)? c: c * r; + D_IM(ans) = (s == zero)? s: s * r; + } + } else + j = 1; + if (j == 0) + return (ans); + } + if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) { + /* + * non-zero imag part(s) with inf component(s) yields NaN + */ + t = fabs(x) + fabs(y) + fabs(u) + fabs(v); + D_RE(ans) = D_IM(ans) = t - t; + } else { + k = 0; /* no scaling */ + if (iu > 0x7f000000 || iv > 0x7f000000) { + u *= .0009765625; /* scale 2**-10 to avoid overflow */ + v *= .0009765625; + k = 1; /* scale by 2**-10 */ + } + /* + * Use similated higher precision arithmetic to compute: + * r = u * log(hypot(x, y)) - v * atan2(y, x) + * q = u * atan2(y, x) + v * log(hypot(x, y)) + */ + t1 = __k_clog_r(x, y, &t2); + t3 = __k_atan2(y, x, &t4); + x1 = t1; + y1 = t3; + u1 = u; + v1 = v; + ((int *) &u1)[LOWORD] &= 0xf8000000; + ((int *) &v1)[LOWORD] &= 0xf8000000; + ((int *) &x1)[LOWORD] &= 0xf8000000; + ((int *) &y1)[LOWORD] &= 0xf8000000; + x2 = t2 - (x1 - t1); /* log(hypot(x,y)) = x1 + x2 */ + y2 = t4 - (y1 - t3); /* atan2(y,x) = y1 + y2 */ + /* compute q = u * atan2(y, x) + v * log(hypot(x, y)) */ + if (j != 2) { + b[0] = u1 * y1; + b[1] = (u - u1) * y1 + u * y2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = v1 * x1; + b[3] = (v - v1) * x1 + v * x2; + w1 = sum4fp(b, &w2); + } + sincos(w1, &t1, &t2); + sincos(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + if (k == 1) + /* + * square (cos(q) + i sin(q)) k times to get + * (cos(2^k * q + i sin(2^k * q) + */ + for (i = 0; i < 10; i++) { + t1 = s * c; + c = (c + s) * (c - s); + s = t1 + t1; + } + } + /* compute r = u * (t1, t2) - v * (t3, t4) */ + b[0] = u1 * x1; + b[1] = (u - u1) * x1 + u * x2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = -v1 * y1; + b[3] = (v1 - v) * y1 - v * y2; + w1 = sum4fp(b, &w2); + } + /* check over/underflow for exp(w1 + w2) */ + if (k && fabs(w1) < 1000.0) { + w1 *= 1024; w2 *= 1024; k = 0; + } + hx = ((int *) &w1)[HIWORD]; + lx = ((int *) &w1)[LOWORD]; + ix = hx & 0x7fffffff; + /* compute exp(w1 + w2) */ + if (ix < 0x3c900000) /* exp(tiny < 2**-54) = 1 */ + r = one; + else if (ix >= 0x40880000) /* overflow/underflow */ + r = (hx < 0)? tiny * tiny : huge * huge; + else { /* compute exp(w1 + w2) */ + k = (int) (invln2 * w1 + ((hx >= 0)? 0.5 : -0.5)); + t1 = (double) k; + t2 = w1 - t1 * ln2hi; + t3 = w2 - t1 * ln2lo; + r = exp(t2 + t3); + } + if (c != zero) c *= r; + if (s != zero) s *= r; + if (k != 0) { + c = scalbn(c, k); + s = scalbn(s, k); + } + D_RE(ans) = c; + D_IM(ans) = s; + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cpowf.c b/usr/src/libm/src/complex/cpowf.c new file mode 100644 index 0000000..1063be0 --- /dev/null +++ b/usr/src/libm/src/complex/cpowf.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cpowf.c 1.6 06/01/23 SMI" + +#pragma weak cpowf = __cpowf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const double + dpi = 3.1415926535897931160E0, /* Hex 2^ 1 * 1.921FB54442D18 */ + dhalf = 0.5, + dsqrt2 = 1.41421356237309514547, /* 3FF6A09E 667F3BCD */ + dinvpi = 0.3183098861837906715377675; + +static const float one = 1.0F, zero = 0.0F; + +#define hiinf 0x7f800000 + +fcomplex +cpowf(fcomplex z, fcomplex w) { + fcomplex ans; + float x, y, u, v, t, c, s; + double dx, dy, du, dv, dt, dc, ds, dp, dq, dr; + int ix, iy, hx, hy, hv, hu, iu, iv, j; + + x = F_RE(z); + y = F_IM(z); + u = F_RE(w); + v = F_IM(w); + hx = THE_WORD(x); + hy = THE_WORD(y); + hu = THE_WORD(u); + hv = THE_WORD(v); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + iu = hu & 0x7fffffff; + iv = hv & 0x7fffffff; + + j = 0; + if (iv == 0) { /* z**(real) */ + if (hu == 0x3f800000) { /* (anything) ** 1 is itself */ + F_RE(ans) = x; + F_IM(ans) = y; + } else if (iu == 0) { /* (anything) ** 0 is 1 */ + F_RE(ans) = one; + F_IM(ans) = zero; + } else if (iy == 0) { /* (real)**(real) */ + F_IM(ans) = zero; + if (hx < 0 && ix < hiinf && iu < hiinf) { + /* -x ** u is exp(i*pi*u)*pow(x,u) */ + t = powf(-x, u); + sincospif(u, &s, &c); + F_RE(ans) = (c == zero)? c: c * t; + F_IM(ans) = (s == zero)? s: s * t; + } else { + F_RE(ans) = powf(x, u); + } + } else if (ix == 0 || ix >= hiinf || iy >= hiinf) { + if (ix > hiinf || iy > hiinf || iu > hiinf) { + F_RE(ans) = F_IM(ans) = x + y + u; + } else { + v = fabsf(y); + if (ix != 0) + v += fabsf(x); + t = atan2pif(y, x); + sincospif(t * u, &s, &c); + F_RE(ans) = (c == zero)? c: c * v; + F_IM(ans) = (s == zero)? s: s * v; + } + } else if (ix == iy) { /* if |x| == |y| */ +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + dx = (double)x; + du = (double)u; + dt = (hx >= 0)? 0.25 : 0.75; + if (hy < 0) + dt = -dt; + dr = pow(dsqrt2 * dx, du); + sincospi(dt * du, &ds, &dc); + F_RE(ans) = (float)(dr * dc); + F_IM(ans) = (float)(dr * ds); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } else { + j = 1; + } + if (j == 0) + return (ans); + } + if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) { + /* + * non-zero imaginery part(s) with inf component(s) yields NaN + */ + t = fabsf(x) + fabsf(y) + fabsf(u) + fabsf(v); + F_RE(ans) = F_IM(ans) = t - t; + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + /* INDENT OFF */ + /* + * r = u*log(hypot(x,y))-v*atan2(y,x), + * q = u*atan2(y,x)+v*log(hypot(x,y)) + * or + * r = u*log(hypot(x,y))-v*pi*atan2pi(y,x), + * q/pi = u*atan2pi(y,x)+v*log(hypot(x,y))/pi + * ans = exp(r)*(cospi(q/pi) + i sinpi(q/pi)) + */ + /* INDENT ON */ + dx = (double)x; + dy = (double)y; + du = (double)u; + dv = (double)v; + if (ix > 0x3f000000 && ix < 0x40000000) /* .5 < |x| < 2 */ + dt = dhalf * log1p((dx - 1.0) * (dx + 1.0) + dy * dy); + else if (iy > 0x3f000000 && iy < 0x40000000) /* .5 < |y| < 2 */ + dt = dhalf * log1p((dy - 1.0) * (dy + 1.0) + dx * dx); + else + dt = dhalf * log(dx * dx + dy * dy); + dp = atan2pi(dy, dx); + if (iv == 0) { /* dv = 0 */ + dr = exp(du * dt); + dq = du * dp; + } else { + dr = exp(du * dt - dv * dp * dpi); + dq = du * dp + dv * dt * dinvpi; + } + sincospi(dq, &ds, &dc); + F_RE(ans) = (float)(dr * dc); + F_IM(ans) = (float)(dr * ds); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cpowl.c b/usr/src/libm/src/complex/cpowl.c new file mode 100644 index 0000000..7b21b7e --- /dev/null +++ b/usr/src/libm/src/complex/cpowl.c @@ -0,0 +1,278 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cpowl.c 1.7 06/01/31 SMI" + +#pragma weak cpowl = __cpowl + +#include "libm.h" /* __k_clog_rl/__k_atan2l */ +/* atan2l/atan2pil/exp2l/expl/fabsl/hypotl/isinfl/logl/powl/sincosl/sincospil */ +#include "complex_wrapper.h" + +#if defined(__sparc) +#define HALF(x) ((int *) &x)[3] = 0; ((int *) &x)[2] &= 0xfe000000 +#define LAST(x) ((int *) &x)[3] +#elif defined(__i386) || defined(__LITTLE_ENDIAN) +#define HALF(x) ((int *) &x)[0] = 0 +#define LAST(x) ((int *) &x)[0] +#endif + +/* INDENT OFF */ +static const int hiinf = 0x7fff0000; +static const long double + tiny = 1.0e-4000L, + huge = 1.0e4000L, +#if defined(__i386) + /* 43 significant bits, 21 trailing zeros */ + ln2hil = 0.693147180559890330187045037746429443359375L, + ln2lol = 5.497923018708371174712471612513436025525412068e-14L, +#else /* sparc */ + /* 0x3FF962E4 2FEFA39E F35793C7 00000000 */ + ln2hil = 0.693147180559945309417231592858066493070671489074L, + ln2lol = 5.28600110075004828645286235820646730106802446566153e-25L, +#endif + invln2 = 1.442695040888963407359924681001892137427e+0000L, + one = 1.0L, + zero = 0.0L; +/* INDENT ON */ + +/* + * Assuming |t[0]| > |t[1]| and |t[2]| > |t[3]|, sum4fpl subroutine + * compute t[0] + t[1] + t[2] + t[3] into two long double fp numbers. + */ +static long double sum4fpl(long double ta[], long double *w) +{ + long double t1, t2, t3, t4, w1, w2, t; + t1 = ta[0]; t2 = ta[1]; t3 = ta[2]; t4 = ta[3]; + /* + * Rearrange ti so that |t1| >= |t2| >= |t3| >= |t4| + */ + if (fabsl(t4) > fabsl(t1)) { + t = t1; t1 = t3; t3 = t; + t = t2; t2 = t4; t4 = t; + } else if (fabsl(t3) > fabsl(t1)) { + t = t1; t1 = t3; + if (fabsl(t4) > fabsl(t2)) { + t3 = t4; t4 = t2; t2 = t; + } else { + t3 = t2; t2 = t; + } + } else if (fabsl(t3) > fabsl(t2)) { + t = t2; t2 = t3; + if (fabsl(t4) > fabsl(t2)) { + t3 = t4; t4 = t; + } else + t3 = t; + } + /* summing r = t1 + t2 + t3 + t4 to w1 + w2 */ + w1 = t3 + t4; + w2 = t4 - (w1 - t3); + t = t2 + w1; + w2 += w1 - (t - t2); + w1 = t + w2; + w2 += t - w1; + t = t1 + w1; + w2 += w1 - (t - t1); + w1 = t + w2; + *w = w2 - (w1 - t); + return (w1); +} + +ldcomplex +cpowl(ldcomplex z, ldcomplex w) { + ldcomplex ans; + long double x, y, u, v, t, c, s, r; + long double t1, t2, t3, t4, x1, x2, y1, y2, u1, v1, b[4], w1, w2; + int ix, iy, hx, hy, hv, hu, iu, iv, i, j, k; + + x = LD_RE(z); + y = LD_IM(z); + u = LD_RE(w); + v = LD_IM(w); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + hu = HI_XWORD(u); + hv = HI_XWORD(v); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + iu = hu & 0x7fffffff; + iv = hv & 0x7fffffff; + + j = 0; + if (v == zero) { /* z**(real) */ + if (u == one) { /* (anything) ** 1 is itself */ + LD_RE(ans) = x; + LD_IM(ans) = y; + } else if (u == zero) { /* (anything) ** 0 is 1 */ + LD_RE(ans) = one; + LD_IM(ans) = zero; + } else if (y == zero) { /* real ** real */ + LD_IM(ans) = zero; + if (hx < 0 && ix < hiinf && iu < hiinf) { + /* -x ** u is exp(i*pi*u)*pow(x,u) */ + r = powl(-x, u); + sincospil(u, &s, &c); + LD_RE(ans) = (c == zero)? c: c * r; + LD_IM(ans) = (s == zero)? s: s * r; + } else + LD_RE(ans) = powl(x, u); + } else if (x == zero || ix >= hiinf || iy >= hiinf) { + if (isnanl(x) || isnanl(y) || isnanl(u)) + LD_RE(ans) = LD_IM(ans) = x + y + u; + else { + if (x == zero) + r = fabsl(y); + else + r = fabsl(x) + fabsl(y); + t = atan2pil(y, x); + sincospil(t * u, &s, &c); + LD_RE(ans) = (c == zero)? c: c * r; + LD_IM(ans) = (s == zero)? s: s * r; + } + } else if (fabsl(x) == fabsl(y)) { /* |x| = |y| */ + if (hx >= 0) { + t = (hy >= 0)? 0.25L : -0.25L; + sincospil(t * u, &s, &c); + } else if ((LAST(u) & 3) == 0) { + t = (hy >= 0)? 0.75L : -0.75L; + sincospil(t * u, &s, &c); + } else { + r = (hy >= 0)? u : -u; + t = -0.25L * r; + w1 = r + t; + w2 = t - (w1 - r); + sincospil(w1, &t1, &t2); + sincospil(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + } + if (ix < 0x3ffe0000) /* |x| < 1/2 */ + r = powl(fabsl(x + x), u) * exp2l(-0.5L * u); + else if (ix >= 0x3fff0000 || iu < 0x400cfff8) + /* |x| >= 1 or |u| < 16383 */ + r = powl(fabsl(x), u) * exp2l(0.5L * u); + else /* special treatment */ + j = 2; + if (j == 0) { + LD_RE(ans) = (c == zero)? c: c * r; + LD_IM(ans) = (s == zero)? s: s * r; + } + } else + j = 1; + if (j == 0) + return (ans); + } + if (iu >= hiinf || iv >= hiinf || ix >= hiinf || iy >= hiinf) { + /* + * non-zero imag part(s) with inf component(s) yields NaN + */ + t = fabsl(x) + fabsl(y) + fabsl(u) + fabsl(v); + LD_RE(ans) = LD_IM(ans) = t - t; + } else { + k = 0; /* no scaling */ + if (iu > 0x7ffe0000 || iv > 0x7ffe0000) { + u *= 1.52587890625000000000e-05L; + v *= 1.52587890625000000000e-05L; + k = 1; /* scale u and v by 2**-16 */ + } + /* + * Use similated higher precision arithmetic to compute: + * r = u * log(hypot(x, y)) - v * atan2(y, x) + * q = u * atan2(y, x) + v * log(hypot(x, y)) + */ + + t1 = __k_clog_rl(x, y, &t2); + t3 = __k_atan2l(y, x, &t4); + x1 = t1; HALF(x1); + y1 = t3; HALF(y1); + u1 = u; HALF(u1); + v1 = v; HALF(v1); + x2 = t2 - (x1 - t1); /* log(hypot(x,y)) = x1 + x2 */ + y2 = t4 - (y1 - t3); /* atan2(y,x) = y1 + y2 */ + /* compute q = u * atan2(y, x) + v * log(hypot(x, y)) */ + if (j != 2) { + b[0] = u1 * y1; + b[1] = (u - u1) * y1 + u * y2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = v1 * x1; + b[3] = (v - v1) * x1 + v * x2; + w1 = sum4fpl(b, &w2); + } + sincosl(w1, &t1, &t2); + sincosl(w2, &t3, &t4); + s = t1 * t4 + t3 * t2; + c = t2 * t4 - t1 * t3; + if (k == 1) /* square j times */ + for (i = 0; i < 10; i++) { + t1 = s * c; + c = (c + s) * (c - s); + s = t1 + t1; + } + } + /* compute r = u * (t1, t2) - v * (t3, t4) */ + b[0] = u1 * x1; + b[1] = (u - u1) * x1 + u * x2; + if (j == 1) { /* v = 0 */ + w1 = b[0] + b[1]; + w2 = b[1] - (w1 - b[0]); + } else { + b[2] = -v1 * y1; + b[3] = (v1 - v) * y1 - v * y2; + w1 = sum4fpl(b, &w2); + } + /* scale back unless w1 is large enough to cause exception */ + if (k != 0 && fabsl(w1) < 20000.0L) { + w1 *= 65536.0L; w2 *= 65536.0L; + } + hx = HI_XWORD(w1); + ix = hx & 0x7fffffff; + /* compute exp(w1 + w2) */ + k = 0; + if (ix < 0x3f8c0000) /* exp(tiny < 2**-115) = 1 */ + r = one; + else if (ix >= 0x400c6760) /* overflow/underflow */ + r = (hx < 0)? tiny * tiny : huge * huge; + else { /* compute exp(w1 + w2) */ + k = (int) (invln2 * w1 + ((hx >= 0)? 0.5L : -0.5L)); + t1 = (long double) k; + t2 = w1 - t1 * ln2hil; + t3 = w2 - t1 * ln2lol; + r = expl(t2 + t3); + } + if (c != zero) c *= r; + if (s != zero) s *= r; + if (k != 0) { + c = scalbnl(c, k); + s = scalbnl(s, k); + } + LD_RE(ans) = c; + LD_IM(ans) = s; + } + return (ans); +} diff --git a/usr/src/libm/src/complex/cproj.c b/usr/src/libm/src/complex/cproj.c new file mode 100644 index 0000000..fa00293 --- /dev/null +++ b/usr/src/libm/src/complex/cproj.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cproj.c 1.3 06/01/31 SMI" + +#pragma weak cproj = __cproj + +/* INDENT OFF */ +/* + * dcomplex cproj(dcomplex z); + * + * If one of the component of z = (x,y) is an inf, then + * cproj(z) = (+inf, copysign(0,y)); + * otherwise, + * cproj(z) = z + */ +/* INDENT ON */ + +#include "libm.h" /* fabs */ +#include "complex_wrapper.h" + +static const double zero = 0.0; + +dcomplex +cproj(dcomplex z) { + double x, y; + int ix, iy, hx, hy, lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (ISINF(iy, ly)) { + D_RE(z) = fabs(y); + D_IM(z) = hy >= 0 ? zero : -zero; + } else if (ISINF(ix, lx)) { + D_RE(z) = fabs(x); + D_IM(z) = hy >= 0 ? zero : -zero; + } + return (z); +} diff --git a/usr/src/libm/src/complex/cprojf.c b/usr/src/libm/src/complex/cprojf.c new file mode 100644 index 0000000..5fb69c1 --- /dev/null +++ b/usr/src/libm/src/complex/cprojf.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cprojf.c 1.3 06/01/31 SMI" + +#pragma weak cprojf = __cprojf + +#include "libm.h" +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const float zero = 0.0F; +/* INDENT ON */ + +fcomplex +cprojf(fcomplex z) { + float x, y; + int ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + if (iy == 0x7f800000) { + F_RE(z) = fabsf(y); + F_IM(z) = hy >= 0 ? zero : -zero; + } else if (ix == 0x7f800000) { + F_RE(z) = fabsf(x); + F_IM(z) = hy >= 0 ? zero : -zero; + } + return (z); +} diff --git a/usr/src/libm/src/complex/cprojl.c b/usr/src/libm/src/complex/cprojl.c new file mode 100644 index 0000000..43afc81 --- /dev/null +++ b/usr/src/libm/src/complex/cprojl.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)cprojl.c 1.3 06/01/31 SMI" + +#pragma weak cprojl = __cprojl + +#include "libm.h" /* fabsl */ +#include "complex_wrapper.h" +/* INDENT OFF */ +static const long double zero = 0.0L; +/* INDENT ON */ + +ldcomplex +cprojl(ldcomplex z) { + long double x, y; + int hy; + + x = LD_RE(z); + y = LD_IM(z); +#if defined(__i386) + hy = ((int *) &y)[2] << 16; +#else + hy = ((int *) &y)[0]; +#endif + if (isinfl(y)) { + LD_RE(z) = fabsl(y); + LD_IM(z) = hy >= 0 ? zero : -zero; + } else if (isinfl(x)) { + LD_RE(z) = fabsl(x); + LD_IM(z) = hy >= 0 ? zero : -zero; + } + return (z); +} diff --git a/usr/src/libm/src/complex/creal.c b/usr/src/libm/src/complex/creal.c new file mode 100644 index 0000000..ae46160 --- /dev/null +++ b/usr/src/libm/src/complex/creal.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)creal.c 1.3 06/01/31 SMI" + +#pragma weak creal = __creal + +#include "libm.h" +#include "complex_wrapper.h" + +double +creal(dcomplex z) { + return (D_RE(z)); +} diff --git a/usr/src/libm/src/complex/crealf.c b/usr/src/libm/src/complex/crealf.c new file mode 100644 index 0000000..b079979 --- /dev/null +++ b/usr/src/libm/src/complex/crealf.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)crealf.c 1.3 06/01/31 SMI" + +#pragma weak crealf = __crealf + +#include "libm.h" +#include "complex_wrapper.h" + +float +crealf(fcomplex z) { + return (F_RE(z)); +} diff --git a/usr/src/libm/src/complex/creall.c b/usr/src/libm/src/complex/creall.c new file mode 100644 index 0000000..2eaf999 --- /dev/null +++ b/usr/src/libm/src/complex/creall.c @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)creall.c 1.3 06/01/31 SMI" + +#pragma weak creall = __creall + +#include "libm.h" +#include "complex_wrapper.h" + +long double +creall(ldcomplex z) { + return (LD_RE(z)); +} diff --git a/usr/src/libm/src/complex/csin.c b/usr/src/libm/src/complex/csin.c new file mode 100644 index 0000000..04164ff --- /dev/null +++ b/usr/src/libm/src/complex/csin.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csin.c 1.3 06/01/31 SMI" + +#pragma weak csin = __csin + +/* INDENT OFF */ +/* + * dcomplex csin(dcomplex z); + * + * If z = x+iy, then since csin(iz) = i*csinh(z), we have + * + * csin(z) = csin((-1)*(-z)) = csin(i*i*(-z)) + * = i*csinh(i*(-z)) = i*csinh(i*(-x-yi)) + * = i*csinh(y-ix) + * = -Im(csinh(y-ix))+i*Re(csinh(y-ix)) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +csin(dcomplex z) { + double x, y; + dcomplex ans, ct; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = y; + D_IM(z) = -x; + ct = csinh(z); + D_RE(ans) = -D_IM(ct); + D_IM(ans) = D_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/csinf.c b/usr/src/libm/src/complex/csinf.c new file mode 100644 index 0000000..a899b7f --- /dev/null +++ b/usr/src/libm/src/complex/csinf.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csinf.c 1.3 06/01/31 SMI" + +#pragma weak csinf = __csinf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +csinf(fcomplex z) { + float x, y; + fcomplex ans, ct; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = y; + F_IM(z) = -x; + ct = csinhf(z); + F_RE(ans) = -F_IM(ct); + F_IM(ans) = F_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/csinh.c b/usr/src/libm/src/complex/csinh.c new file mode 100644 index 0000000..fd29de6 --- /dev/null +++ b/usr/src/libm/src/complex/csinh.c @@ -0,0 +1,136 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csinh.c 1.3 06/01/31 SMI" + +#pragma weak csinh = __csinh + +/* INDENT OFF */ +/* + * dcomplex csinh(dcomplex z); + * + * z -z x -x + * e - e e (cos(y)+i*sin(y)) - e (cos(-y)+i*sin(-y)) + * sinh z = -------------- = --------------------------------------------- + * 2 2 + * x -x x -x + * cos(y) ( e - e ) + i*sin(y) (e + e ) + * = -------------------------------------------- + * 2 + * + * = cos(y) sinh(x) + i sin(y) cosh(x) + * + * Implementation Note + * ------------------- + * + * |x| -|x| |x| -2|x| -2|x| -P-4 + * Note that e +- e = e ( 1 +- e ). If e < 2 , where + * + * P stands for the number of significant bits of the machine precision, + * |x| + * then the result will be rounded to e . Therefore, we have + * + * z + * e + * sinh z = ----- if |x| >= (P/2 + 2)*ln2 + * 2 + * + * EXCEPTION (conform to ISO/IEC 9899:1999(E)): + * csinh(0,0)=(0,0) + * csinh(0,inf)=(+-0,NaN) + * csinh(0,NaN)=(+-0,NaN) + * csinh(x,inf) = (NaN,NaN) for finite positive x + * csinh(x,NaN) = (NaN,NaN) for finite non-zero x + * csinh(inf,0) = (inf, 0) + * csinh(inf,y) = (inf*cos(y),inf*sin(y)) for positive finite y + * csinh(inf,inf) = (+-inf,NaN) + * csinh(inf,NaN) = (+-inf,NaN) + * csinh(NaN,0) = (NaN,0) + * csinh(NaN,y) = (NaN,NaN) for non-zero y + * csinh(NaN,NaN) = (NaN,NaN) + */ +/* INDENT ON */ + +#include "libm.h" /* cosh/exp/fabs/scalbn/sinh/sincos/__k_cexp */ +#include "complex_wrapper.h" + +dcomplex +csinh(dcomplex z) { + double t, x, y, S, C; + int hx, ix, lx, hy, iy, ly, n; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + ix = hx & 0x7fffffff; + hy = HI_WORD(y); + ly = LO_WORD(y); + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + (void) sincos(y, &S, &C); + if (ix >= 0x403c0000) { /* |x| > 28 = prec/2 (14,28,34,60) */ + if (ix >= 0x40862E42) { /* |x| > 709.78... ~ log(2**1024) */ + if (ix >= 0x7ff00000) { /* |x| is inf or NaN */ + if ((iy | ly) == 0) { + D_RE(ans) = x; + D_IM(ans) = y; + } else if (iy >= 0x7ff00000) { + D_RE(ans) = x; + D_IM(ans) = x - y; + } else { + D_RE(ans) = C * x; + D_IM(ans) = S * x; + } + } else { + /* return exp(x)=t*2**n */ + t = __k_cexp(x, &n); + D_RE(ans) = scalbn(C * t, n - 1); + D_IM(ans) = scalbn(S * t, n - 1); + } + } else { + t = exp(x) * 0.5; + D_RE(ans) = C * t; + D_IM(ans) = S * t; + } + } else { + if ((ix | lx) == 0) { /* x = 0, return (0,S) */ + D_RE(ans) = 0.0; + D_IM(ans) = S; + } else { + D_RE(ans) = C * sinh(x); + D_IM(ans) = S * cosh(x); + } + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/csinhf.c b/usr/src/libm/src/complex/csinhf.c new file mode 100644 index 0000000..b5b6166 --- /dev/null +++ b/usr/src/libm/src/complex/csinhf.c @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csinhf.c 1.4 06/01/23 SMI" + +#pragma weak csinhf = __csinhf + +#include "libm.h" +#include "complex_wrapper.h" + +#if defined(__i386) && !defined(__amd64) +extern int __swapRP(int); +#endif + +static const float zero = 0.0F, half = 0.5F; + +fcomplex +csinhf(fcomplex z) { + float x, y, S, C; + double t; + int hx, ix, hy, iy, n; + fcomplex ans; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + ix = hx & 0x7fffffff; + hy = THE_WORD(y); + iy = hy & 0x7fffffff; + x = fabsf(x); + y = fabsf(y); + + sincosf(y, &S, &C); + if (ix >= 0x41600000) { /* |x| > 14 = prec/2 (14,28,34,60) */ + if (ix >= 0x42B171AA) { /* |x| > 88.722... ~ log(2**128) */ + if (ix >= 0x7f800000) { /* |x| is inf or NaN */ + if (iy == 0) { + F_RE(ans) = x; + F_IM(ans) = y; + } else if (iy >= 0x7f800000) { + F_RE(ans) = x; + F_IM(ans) = x - y; + } else { + F_RE(ans) = C * x; + F_IM(ans) = S * x; + } + } else { +#if defined(__i386) && !defined(__amd64) + int rp = __swapRP(fp_extended); +#endif + /* return (C, S) * exp(x) / 2 */ + t = __k_cexp((double)x, &n); + F_RE(ans) = (float)scalbn(C * t, n - 1); + F_IM(ans) = (float)scalbn(S * t, n - 1); +#if defined(__i386) && !defined(__amd64) + if (rp != fp_extended) + (void) __swapRP(rp); +#endif + } + } else { + t = expf(x) * half; + F_RE(ans) = C * t; + F_IM(ans) = S * t; + } + } else { + if (ix == 0) { /* x = 0, return (0,S) */ + F_RE(ans) = zero; + F_IM(ans) = S; + } else { + F_RE(ans) = C * sinhf(x); + F_IM(ans) = S * coshf(x); + } + } + if (hx < 0) + F_RE(ans) = -F_RE(ans); + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/csinhl.c b/usr/src/libm/src/complex/csinhl.c new file mode 100644 index 0000000..cdc6e9b --- /dev/null +++ b/usr/src/libm/src/complex/csinhl.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csinhl.c 1.3 06/01/31 SMI" + +#pragma weak csinhl = __csinhl + +#include "libm.h" /* coshl/expl/fabsl/scalbnl/sincosl/sinhl/__k_cexpl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double zero = 0.0L, half = 0.5L; +/* INDENT ON */ + +ldcomplex +csinhl(ldcomplex z) { + long double t, x, y, S, C; + int hx, ix, hy, iy, n; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + hy = HI_XWORD(y); + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + (void) sincosl(y, &S, &C); + if (ix >= 0x4004e000) { /* |x| > 60 = prec/2 (14,28,34,60) */ + if (ix >= 0x400C62E4) { /* |x| > 11356.52... ~ log(2**16384) */ + if (ix >= 0x7fff0000) { /* |x| is inf or NaN */ + if (y == zero) { + LD_RE(ans) = x; + LD_IM(ans) = y; + } else if (iy >= 0x7fff0000) { + LD_RE(ans) = x; + LD_IM(ans) = x - y; + } else { + LD_RE(ans) = C * x; + LD_IM(ans) = S * x; + } + } else { + /* return exp(x)=t*2**n */ + t = __k_cexpl(x, &n); + LD_RE(ans) = scalbnl(C * t, n - 1); + LD_IM(ans) = scalbnl(S * t, n - 1); + } + } else { + t = expl(x) * half; + LD_RE(ans) = C * t; + LD_IM(ans) = S * t; + } + } else { + if (x == zero) { /* x = 0, return (0,S) */ + LD_RE(ans) = zero; + LD_IM(ans) = S; + } else { + LD_RE(ans) = C * sinhl(x); + LD_IM(ans) = S * coshl(x); + } + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/csinl.c b/usr/src/libm/src/complex/csinl.c new file mode 100644 index 0000000..860be7e --- /dev/null +++ b/usr/src/libm/src/complex/csinl.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csinl.c 1.3 06/01/31 SMI" + +#pragma weak csinl = __csinl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +csinl(ldcomplex z) { + long double x, y; + ldcomplex ans, ct; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = y; + LD_IM(z) = -x; + ct = csinhl(z); + LD_RE(ans) = -LD_IM(ct); + LD_IM(ans) = LD_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/csqrt.c b/usr/src/libm/src/complex/csqrt.c new file mode 100644 index 0000000..40a2cfe --- /dev/null +++ b/usr/src/libm/src/complex/csqrt.c @@ -0,0 +1,209 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csqrt.c 1.4 06/01/31 SMI" + +#pragma weak csqrt = __csqrt + +/* INDENT OFF */ +/* + * dcomplex csqrt(dcomplex z); + * + * 2 2 2 + * Let w=r+i*s = sqrt(x+iy). Then (r + i s) = r - s + i 2sr = x + i y. + * + * Hence x = r*r-s*s, y = 2sr. + * + * Note that x*x+y*y = (s*s+r*r)**2. Thus, we have + * ________ + * 2 2 / 2 2 + * (1) r + s = \/ x + y , + * + * 2 2 + * (2) r - s = x + * + * (3) 2sr = y. + * + * Perform (1)-(2) and (1)+(2), we obtain + * + * 2 + * (4) 2 r = hypot(x,y)+x, + * + * 2 + * (5) 2*s = hypot(x,y)-x + * ________ + * / 2 2 + * where hypot(x,y) = \/ x + y . + * + * In order to avoid numerical cancellation, we use formula (4) for + * positive x, and (5) for negative x. The other component is then + * computed by formula (3). + * + * + * ALGORITHM + * ------------------ + * + * (assume x and y are of medium size, i.e., no over/underflow in squaring) + * + * If x >=0 then + * ________ + * / 2 2 + * 2 \/ x + y + x y + * r = ---------------------, s = -------; (6) + * 2 2 r + * + * (note that we choose sign(s) = sign(y) to force r >=0). + * Otherwise, + * ________ + * / 2 2 + * 2 \/ x + y - x y + * s = ---------------------, r = -------; (7) + * 2 2 s + * + * EXCEPTION: + * + * One may use the polar coordinate of a complex number to justify the + * following exception cases: + * + * EXCEPTION CASES (conform to ISO/IEC 9899:1999(E)): + * csqrt(+-0+ i 0 ) = 0 + i 0 + * csqrt( x + i inf ) = inf + i inf for all x (including NaN) + * csqrt( x + i NaN ) = NaN + i NaN with invalid for finite x + * csqrt(-inf+ iy ) = 0 + i inf for finite positive-signed y + * csqrt(+inf+ iy ) = inf + i 0 for finite positive-signed y + * csqrt(-inf+ i NaN) = NaN +-i inf + * csqrt(+inf+ i NaN) = inf + i NaN + * csqrt(NaN + i y ) = NaN + i NaN for finite y + * csqrt(NaN + i NaN) = NaN + i NaN + */ +/* INDENT ON */ + +#include "libm.h" /* fabs/sqrt */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const double + two300 = 2.03703597633448608627e+90, + twom300 = 4.90909346529772655310e-91, + two599 = 2.07475778444049647926e+180, + twom601 = 1.20495993255144205887e-181, + two = 2.0, + zero = 0.0, + half = 0.5; +/* INDENT ON */ + +dcomplex +csqrt(dcomplex z) { + dcomplex ans; + double x, y, t, ax, ay; + int n, ix, iy, hx, hy, lx, ly; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + hy = HI_WORD(y); + ly = LO_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabs(y); + ax = fabs(x); + if (ix >= 0x7ff00000 || iy >= 0x7ff00000) { + /* x or y is Inf or NaN */ + if (ISINF(iy, ly)) + D_IM(ans) = D_RE(ans) = ay; + else if (ISINF(ix, lx)) { + if (hx > 0) { + D_RE(ans) = ax; + D_IM(ans) = ay * zero; + } else { + D_RE(ans) = ay * zero; + D_IM(ans) = ax; + } + } else + D_IM(ans) = D_RE(ans) = ax + ay; + } else if ((iy | ly) == 0) { /* y = 0 */ + if (hx >= 0) { + D_RE(ans) = sqrt(ax); + D_IM(ans) = zero; + } else { + D_IM(ans) = sqrt(ax); + D_RE(ans) = zero; + } + } else if (ix >= iy) { + n = (ix - iy) >> 20; + if (n >= 30) { /* x >> y or y=0 */ + t = sqrt(ax); + } else if (ix >= 0x5f300000) { /* x > 2**500 */ + ax *= twom601; + y *= twom601; + t = two300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else if (iy < 0x20b00000) { /* y < 2**-500 */ + ax *= two599; + y *= two599; + t = twom300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else + t = sqrt(half * (ax + sqrt(ax * ax + ay * ay))); + if (hx >= 0) { + D_RE(ans) = t; + D_IM(ans) = ay / (t + t); + } else { + D_IM(ans) = t; + D_RE(ans) = ay / (t + t); + } + } else { + n = (iy - ix) >> 20; + if (n >= 30) { /* y >> x */ + if (n >= 60) + t = sqrt(half * ay); + else if (iy >= 0x7fe00000) + t = sqrt(half * ay + half * ax); + else if (ix <= 0x00100000) + t = half * sqrt(two * (ay + ax)); + else + t = sqrt(half * (ay + ax)); + } else if (iy >= 0x5f300000) { /* y > 2**500 */ + ax *= twom601; + y *= twom601; + t = two300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else if (ix < 0x20b00000) { /* x < 2**-500 */ + ax *= two599; + y *= two599; + t = twom300 * sqrt(ax + sqrt(ax * ax + y * y)); + } else + t = sqrt(half * (ax + sqrt(ax * ax + ay * ay))); + if (hx >= 0) { + D_RE(ans) = t; + D_IM(ans) = ay / (t + t); + } else { + D_IM(ans) = t; + D_RE(ans) = ay / (t + t); + } + } + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/csqrtf.c b/usr/src/libm/src/complex/csqrtf.c new file mode 100644 index 0000000..1cee77f --- /dev/null +++ b/usr/src/libm/src/complex/csqrtf.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csqrtf.c 1.3 06/01/31 SMI" + +#pragma weak csqrtf = __csqrtf + +#include "libm.h" /* sqrt/fabsf/sqrtf */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const float zero = 0.0F; +/* INDENT ON */ + +fcomplex +csqrtf(fcomplex z) { + fcomplex ans; + double dt, dx, dy; + float x, y, t, ax, ay, w; + int ix, iy, hx, hy; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + hy = THE_WORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsf(y); + ax = fabsf(x); + if (ix >= 0x7f800000 || iy >= 0x7f800000) { + /* x or y is Inf or NaN */ + if (iy == 0x7f800000) + F_IM(ans) = F_RE(ans) = ay; + else if (ix == 0x7f800000) { + if (hx > 0) { + F_RE(ans) = ax; + F_IM(ans) = ay * zero; + } else { + F_RE(ans) = ay * zero; + F_IM(ans) = ax; + } + } else + F_IM(ans) = F_RE(ans) = ax + ay; + } else if (iy == 0) { + if (hx >= 0) { + F_RE(ans) = sqrtf(ax); + F_IM(ans) = zero; + } else { + F_IM(ans) = sqrtf(ax); + F_RE(ans) = zero; + } + } else { + dx = (double) ax; + dy = (double) ay; + dt = sqrt(0.5 * (sqrt(dx * dx + dy * dy) + dx)); + t = (float) dt; + w = (float) (dy / (dt + dt)); + if (hx >= 0) { + F_RE(ans) = t; + F_IM(ans) = w; + } else { + F_IM(ans) = t; + F_RE(ans) = w; + } + } + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/csqrtl.c b/usr/src/libm/src/complex/csqrtl.c new file mode 100644 index 0000000..349cc6c --- /dev/null +++ b/usr/src/libm/src/complex/csqrtl.c @@ -0,0 +1,144 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)csqrtl.c 1.3 06/01/31 SMI" + +#pragma weak csqrtl = __csqrtl + +#include "libm.h" /* fabsl/isinfl/sqrtl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double + twom9001 = 2.6854002716003034957421765100615693043656e-2710L, + twom4500 = 2.3174987687592429423263242862381544149252e-1355L, + two8999 = 9.3095991180122343502582347372163290310934e+2708L, + two4500 = 4.3149968987270974283777803545571722250806e+1354L, + zero = 0.0L, + half = 0.5L, + two = 2.0L; +/* INDENT ON */ + +ldcomplex +csqrtl(ldcomplex z) { + ldcomplex ans; + long double x, y, t, ax, ay; + int n, ix, iy, hx, hy; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + hy = HI_XWORD(y); + ix = hx & 0x7fffffff; + iy = hy & 0x7fffffff; + ay = fabsl(y); + ax = fabsl(x); + if (ix >= 0x7fff0000 || iy >= 0x7fff0000) { + /* x or y is Inf or NaN */ + if (isinfl(y)) + LD_IM(ans) = LD_RE(ans) = ay; + else if (isinfl(x)) { + if (hx > 0) { + LD_RE(ans) = ax; + LD_IM(ans) = ay * zero; + } else { + LD_RE(ans) = ay * zero; + LD_IM(ans) = ax; + } + } else + LD_IM(ans) = LD_RE(ans) = ax + ay; + } else if (y == zero) { + if (hx >= 0) { + LD_RE(ans) = sqrtl(ax); + LD_IM(ans) = zero; + } else { + LD_IM(ans) = sqrtl(ax); + LD_RE(ans) = zero; + } + } else if (ix >= iy) { + n = (ix - iy) >> 16; +#if defined(__i386) /* 64 significant bits */ + if (n >= 35) +#else /* 113 significant bits */ + if (n >= 60) +#endif + t = sqrtl(ax); + else if (ix >= 0x5f3f0000) { /* x > 2**8000 */ + ax *= twom9001; + y *= twom9001; + t = two4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else if (iy <= 0x20bf0000) { /* y < 2**-8000 */ + ax *= two8999; + y *= two8999; + t = twom4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else + t = sqrtl(half * (ax + sqrtl(ax * ax + y * y))); + + if (hx >= 0) { + LD_RE(ans) = t; + LD_IM(ans) = ay / (t + t); + } else { + LD_IM(ans) = t; + LD_RE(ans) = ay / (t + t); + } + } else { + n = (iy - ix) >> 16; +#if defined(__i386) /* 64 significant bits */ + if (n >= 35) { /* } */ +#else /* 113 significant bits */ + if (n >= 60) { +#endif + if (n >= 120) + t = sqrtl(half * ay); + else if (iy >= 0x7ffe0000) + t = sqrtl(half * ay + half * ax); + else if (ix <= 0x00010000) + t = half * (sqrtl(two * (ax + ay))); + else + t = sqrtl(half * (ax + ay)); + } else if (iy >= 0x5f3f0000) { /* y > 2**8000 */ + ax *= twom9001; + y *= twom9001; + t = two4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else if (ix <= 0x20bf0000) { + ax *= two8999; + y *= two8999; + t = twom4500 * sqrtl(ax + sqrtl(ax * ax + y * y)); + } else + t = sqrtl(half * (ax + sqrtl(ax * ax + y * y))); + + if (hx >= 0) { + LD_RE(ans) = t; + LD_IM(ans) = ay / (t + t); + } else { + LD_IM(ans) = t; + LD_RE(ans) = ay / (t + t); + } + } + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ctan.c b/usr/src/libm/src/complex/ctan.c new file mode 100644 index 0000000..1dd8f6b --- /dev/null +++ b/usr/src/libm/src/complex/ctan.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ctan.c 1.3 06/01/31 SMI" + +#pragma weak ctan = __ctan + +/* INDENT OFF */ +/* + * dcomplex ctan(dcomplex z); + * + * If z = x+iy, then since ctan(iz) = i*ctanh(z), we have + * + * ctan(z) = ctan((-1)*(-z)) = ctan(i*i*(-z)) + * = i*ctanh(i*(-z)) = i*ctanh(i*(-x-yi)) + * = i*ctanh(y-ix) + * = -Im(ctanh(y-ix))+i*Re(ctanh(y-ix)) + */ +/* INDENT ON */ + +#include "libm.h" +#include "complex_wrapper.h" + +dcomplex +ctan(dcomplex z) { + double x, y; + dcomplex ans, ct; + + x = D_RE(z); + y = D_IM(z); + D_RE(z) = y; + D_IM(z) = -x; + ct = ctanh(z); + D_RE(ans) = -D_IM(ct); + D_IM(ans) = D_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/ctanf.c b/usr/src/libm/src/complex/ctanf.c new file mode 100644 index 0000000..3c9c09c --- /dev/null +++ b/usr/src/libm/src/complex/ctanf.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ctanf.c 1.3 06/01/31 SMI" + +#pragma weak ctanf = __ctanf + +#include "libm.h" +#include "complex_wrapper.h" + +fcomplex +ctanf(fcomplex z) { + float x, y; + fcomplex ans, ct; + + x = F_RE(z); + y = F_IM(z); + F_RE(z) = y; + F_IM(z) = -x; + ct = ctanhf(z); + F_RE(ans) = -F_IM(ct); + F_IM(ans) = F_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/ctanh.c b/usr/src/libm/src/complex/ctanh.c new file mode 100644 index 0000000..5366a81 --- /dev/null +++ b/usr/src/libm/src/complex/ctanh.c @@ -0,0 +1,175 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ctanh.c 1.3 06/01/31 SMI" + +#pragma weak ctanh = __ctanh + +/* INDENT OFF */ +/* + * dcomplex ctanh(dcomplex z); + * + * tanh x + i tan y sinh 2x + i sin 2y + * ctanh z = --------------------- = -------------------- + * 1 + i tanh(x)tan(y) cosh 2x + cos 2y + * + * For |x| >= prec/2 (14,28,34,60 for single, double, double extended, quad), + * we use + * + * 1 2x 2 sin 2y + * cosh 2x = sinh 2x = --- e and hence ctanh z = 1 + i -----------; + * 2 2x + * e + * + * otherwise, to avoid cancellation, for |x| < prec/2, + * 2x 2 + * (e - 1) 2 2 + * cosh 2x + cos 2y = 1 + ------------ + cos y - sin y + * 2x + * 2 e + * + * 1 2x 2 -2x 2 + * = --- (e - 1) e + 2 cos y + * 2 + * and + * + * [ 2x ] + * 1 [ 2x e - 1 ] + * sinh 2x = --- [ e - 1 + --------- ] + * 2 [ 2x ] + * [ e ] + * 2x + * Implementation notes: let t = expm1(2x) = e - 1, then + * + * 1 [ t*t 2 ] 1 [ t ] + * cosh 2x + cos 2y = --- * [ ----- + 4 cos y ]; sinh 2x = --- * [ t + --- ] + * 2 [ t+1 ] 2 [ t+1 ] + * + * Hence, + * + * + * t*t+2t [4(t+1)(cos y)]*(sin y) + * ctanh z = --------------------------- + i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) t*t+[4(t+1)(cos y)](cos y) + * + * EXCEPTION (conform to ISO/IEC 9899:1999(E)): + * ctanh(0,0)=(0,0) + * ctanh(x,inf) = (NaN,NaN) for finite x + * ctanh(x,NaN) = (NaN,NaN) for finite x + * ctanh(inf,y) = 1+ i*0*sin(2y) for positive-signed finite y + * ctanh(inf,inf) = (1, +-0) + * ctanh(inf,NaN) = (1, +-0) + * ctanh(NaN,0) = (NaN,0) + * ctanh(NaN,y) = (NaN,NaN) for non-zero y + * ctanh(NaN,NaN) = (NaN,NaN) + */ +/* INDENT ON */ + +#include "libm.h" /* exp/expm1/fabs/sin/tanh/sincos */ +#include "complex_wrapper.h" + +static const double four = 4.0, two = 2.0, one = 1.0, zero = 0.0; + +dcomplex +ctanh(dcomplex z) { + double t, r, v, u, x, y, S, C; + int hx, ix, lx, hy, iy, ly; + dcomplex ans; + + x = D_RE(z); + y = D_IM(z); + hx = HI_WORD(x); + lx = LO_WORD(x); + ix = hx & 0x7fffffff; + hy = HI_WORD(y); + ly = LO_WORD(y); + iy = hy & 0x7fffffff; + x = fabs(x); + y = fabs(y); + + if ((iy | ly) == 0) { /* ctanh(x,0) = (x,0) for x = 0 or NaN */ + D_RE(ans) = tanh(x); + D_IM(ans) = zero; + } else if (iy >= 0x7ff00000) { /* y is inf or NaN */ + if (ix < 0x7ff00000) /* catanh(finite x,inf/nan) is nan */ + D_RE(ans) = D_IM(ans) = y - y; + else if (((ix - 0x7ff00000) | lx) == 0) { /* x is inf */ + D_RE(ans) = one; + D_IM(ans) = zero; + } else { + D_RE(ans) = x + y; + D_IM(ans) = y - y; + } + } else if (ix >= 0x403c0000) { + /* + * |x| > 28 = prec/2 (14,28,34,60) + * ctanh z ~ 1 + i (sin2y)/(exp(2x)) + */ + D_RE(ans) = one; + if (iy < 0x7fe00000) /* t = sin(2y) */ + S = sin(y + y); + else { + (void) sincos(y, &S, &C); + S = (S + S) * C; + } + if (ix >= 0x7fe00000) { /* |x| > max/2 */ + if (ix >= 0x7ff00000) { /* |x| is inf or NaN */ + if (((ix - 0x7ff00000) | lx) != 0) + D_RE(ans) = D_IM(ans) = x + y; + /* x is NaN */ + else + D_IM(ans) = zero * S; /* x is inf */ + } else + D_IM(ans) = S * exp(-x); /* underflow */ + } else + D_IM(ans) = (S + S) * exp(-(x + x)); + /* 2 sin 2y / exp(2x) */ + } else { + /* INDENT OFF */ + /* + * t*t+2t + * ctanh z = --------------------------- + + * t*t+[4(t+1)(cos y)](cos y) + * + * [4(t+1)(cos y)]*(sin y) + * i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) + */ + /* INDENT ON */ + (void) sincos(y, &S, &C); + t = expm1(x + x); + r = (four * C) * (t + one); + u = t * t; + v = one / (u + r * C); + D_RE(ans) = (u + two * t) * v; + D_IM(ans) = (r * S) * v; + } + if (hx < 0) + D_RE(ans) = -D_RE(ans); + if (hy < 0) + D_IM(ans) = -D_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ctanhf.c b/usr/src/libm/src/complex/ctanhf.c new file mode 100644 index 0000000..f6f24be --- /dev/null +++ b/usr/src/libm/src/complex/ctanhf.c @@ -0,0 +1,114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ctanhf.c 1.3 06/01/31 SMI" + +#pragma weak ctanhf = __ctanhf + +#include "libm.h" /* expf/expm1f/fabsf/sincosf/sinf/tanhf */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const float four = 4.0F, two = 2.0F, one = 1.0F, zero = 0.0F; +/* INDENT ON */ + +fcomplex +ctanhf(fcomplex z) { + float r, u, v, t, x, y, S, C; + int hx, ix, hy, iy; + fcomplex ans; + + x = F_RE(z); + y = F_IM(z); + hx = THE_WORD(x); + ix = hx & 0x7fffffff; + hy = THE_WORD(y); + iy = hy & 0x7fffffff; + x = fabsf(x); + y = fabsf(y); + + if (iy == 0) { /* ctanh(x,0) = (x,0) for x = 0 or NaN */ + F_RE(ans) = tanhf(x); + F_IM(ans) = zero; + } else if (iy >= 0x7f800000) { /* y is inf or NaN */ + if (ix < 0x7f800000) /* catanh(finite x,inf/nan) is nan */ + F_RE(ans) = F_IM(ans) = y - y; + else if (ix == 0x7f800000) { /* x is inf */ + F_RE(ans) = one; + F_IM(ans) = zero; + } else { + F_RE(ans) = x + y; + F_IM(ans) = y - y; + } + } else if (ix >= 0x41600000) { + /* + * |x| > 14 = prec/2 (14,28,34,60) + * ctanh z ~ 1 + i (sin2y)/(exp(2x)) + */ + F_RE(ans) = one; + if (iy < 0x7f000000) /* t = sin(2y) */ + S = sinf(y + y); + else { + (void) sincosf(y, &S, &C); + S = (S + S) * C; + } + if (ix >= 0x7f000000) { /* |x| > max/2 */ + if (ix >= 0x7f800000) { /* |x| is inf or NaN */ + if (ix > 0x7f800000) /* x is NaN */ + F_RE(ans) = F_IM(ans) = x + y; + else + F_IM(ans) = zero * S; /* x is inf */ + } else + F_IM(ans) = S * expf(-x); /* underflow */ + } else + F_IM(ans) = (S + S) * expf(-(x + x)); + /* 2 sin 2y / exp(2x) */ + } else { + /* INDENT OFF */ + /* + * t*t+2t + * ctanh z = --------------------------- + * t*t+[4(t+1)(cos y)](cos y) + * + * [4(t+1)(cos y)]*(sin y) + * i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) + */ + /* INDENT ON */ + (void) sincosf(y, &S, &C); + t = expm1f(x + x); + r = (four * C) * (t + one); + u = t * t; + v = one / (u + r * C); + F_RE(ans) = (u + two * t) * v; + F_IM(ans) = (r * S) * v; + } + if (hx < 0) + F_RE(ans) = -F_RE(ans); + if (hy < 0) + F_IM(ans) = -F_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ctanhl.c b/usr/src/libm/src/complex/ctanhl.c new file mode 100644 index 0000000..8b95696 --- /dev/null +++ b/usr/src/libm/src/complex/ctanhl.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ctanhl.c 1.3 06/01/31 SMI" + +#pragma weak ctanhl = __ctanhl + +#include "libm.h" /* expl/expm1l/fabsl/isinfl/isnanl/sincosl/sinl/tanhl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +static const long double four = 4.0L, two = 2.0L, one = 1.0L, zero = 0.0L; +/* INDENT ON */ + +ldcomplex +ctanhl(ldcomplex z) { + long double r, u, v, t, x, y, S, C; + int hx, ix, hy, iy; + ldcomplex ans; + + x = LD_RE(z); + y = LD_IM(z); + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + hy = HI_XWORD(y); + iy = hy & 0x7fffffff; + x = fabsl(x); + y = fabsl(y); + + if (y == zero) { /* ctanh(x,0) = (x,0) for x = 0 or NaN */ + LD_RE(ans) = tanhl(x); + LD_IM(ans) = zero; + } else if (iy >= 0x7fff0000) { /* y is inf or NaN */ + if (ix < 0x7fff0000) /* catanh(finite x,inf/nan) is nan */ + LD_RE(ans) = LD_IM(ans) = y - y; + else if (isinfl(x)) { /* x is inf */ + LD_RE(ans) = one; + LD_IM(ans) = zero; + } else { + LD_RE(ans) = x + y; + LD_IM(ans) = y - y; + } + } else if (ix >= 0x4004e000) { + /* INDENT OFF */ + /* + * |x| > 60 = prec/2 (14,28,34,60) + * ctanh z ~ 1 + i (sin2y)/(exp(2x)) + */ + /* INDENT ON */ + LD_RE(ans) = one; + if (iy < 0x7ffe0000) /* t = sin(2y) */ + S = sinl(y + y); + else { + (void) sincosl(y, &S, &C); + S = (S + S) * C; + } + if (ix >= 0x7ffe0000) { /* |x| > max/2 */ + if (ix >= 0x7fff0000) { /* |x| is inf or NaN */ + if (isnanl(x)) /* x is NaN */ + LD_RE(ans) = LD_IM(ans) = x + y; + else + LD_IM(ans) = zero * S; /* x is inf */ + } else + LD_IM(ans) = S * expl(-x); /* underflow */ + } else + LD_IM(ans) = (S + S) * expl(-(x + x)); + /* 2 sin 2y / exp(2x) */ + } else { + /* INDENT OFF */ + /* + * t*t+2t + * ctanh z = --------------------------- + * t*t+[4(t+1)(cos y)](cos y) + * + * [4(t+1)(cos y)]*(sin y) + * i -------------------------- + * t*t+[4(t+1)(cos y)](cos y) + */ + /* INDENT ON */ + sincosl(y, &S, &C); + t = expm1l(x + x); + r = (four * C) * (t + one); + u = t * t; + v = one / (u + r * C); + LD_RE(ans) = (u + two * t) * v; + LD_IM(ans) = (r * S) * v; + } + if (hx < 0) + LD_RE(ans) = -LD_RE(ans); + if (hy < 0) + LD_IM(ans) = -LD_IM(ans); + return (ans); +} diff --git a/usr/src/libm/src/complex/ctanl.c b/usr/src/libm/src/complex/ctanl.c new file mode 100644 index 0000000..016af44 --- /dev/null +++ b/usr/src/libm/src/complex/ctanl.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ctanl.c 1.3 06/01/31 SMI" + +#pragma weak ctanl = __ctanl + +#include "libm.h" +#include "complex_wrapper.h" + +ldcomplex +ctanl(ldcomplex z) { + long double x, y; + ldcomplex ans, ct; + + x = LD_RE(z); + y = LD_IM(z); + LD_RE(z) = y; + LD_IM(z) = -x; + ct = ctanhl(z); + LD_RE(ans) = -LD_IM(ct); + LD_IM(ans) = LD_RE(ct); + return (ans); +} diff --git a/usr/src/libm/src/complex/k_atan2.c b/usr/src/libm/src/complex/k_atan2.c new file mode 100644 index 0000000..c63f014 --- /dev/null +++ b/usr/src/libm/src/complex/k_atan2.c @@ -0,0 +1,549 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)k_atan2.c 1.5 06/01/23 SMI" + +#include "libm.h" /* __k_atan2 */ +#include "complex_wrapper.h" + +/* + * double __k_atan2(double y, double x, double *e) + * + * Compute atan2 with error terms. + * + * Important formula: + * 3 5 + * x x + * atan(x) = x - ----- + ----- - ... (for x <= 1) + * 3 5 + * + * pi 1 1 + * = --- - --- + --- - ... (for x > 1) + * 3 + * 2 x 3x + * + * Arg(x + y i) = sign(y) * atan2(|y|, x) + * = sign(y) * atan(|y|/x) (for x > 0) + * sign(y) * (PI - atan(|y|/|x|)) (for x < 0) + * Thus if x >> y (IEEE double: EXP(x) - EXP(y) >= 60): + * 1. (x > 0): atan2(y,x) ~ y/x + * 2. (x < 0): atan2(y,x) ~ sign(y) (PI - |y/x|)) + * Otherwise if x << y: + * atan2(y,x) ~ sign(y)*PI/2 - x/y + * + * __k_atan2 call static functions mx_poly, mx_atan + */ + +/* + * (void) mx_poly (double *z, double *a, double *e, int n) + * return + * e = a + z*(a + z*(a + ... z*(a + e)...)) + * 0 2 4 2n + * Note: + * 1. e and coefficient ai are represented by two double numbers. + * For e, the first one contain the leading 24 bits rounded, and the + * second one contain the remaining 53 bits (total 77 bits accuracy). + * For ai, the first one contian the leading 53 bits rounded, and the + * second is the remaining 53 bits (total 106 bits accuracy). + * 2. z is an array of three doubles. + * z[0] : the rounded value of Z (the intended value of z) + * z[1] : the leading 24 bits of Z rounded + * z[2] : the remaining 53 bits of Z + * Note that z[0] = z[1]+z[2] rounded. + * + */ + +static void +mx_poly(const double *z, const double *a, double *e, int n) { + double r, s, t, p_h, p_l, z_h, z_l, p; + int i; + + n = n + n; + p = e[0] + a[n]; + p_l = a[n + 1]; + p_h = (double) ((float) p); + p = a[n - 2] + z[0] * p; + z_h = z[1]; z_l = z[2]; + p_l += e[0] - (p_h - a[n]); + + for (i = n - 2; i >= 2; i -= 2) { + /* compute p = ai + z * p */ + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + p_h = (double) ((float) p); + s += a[i + 1]; + r = t - (p_h - a[i]); + p = a[i - 2] + z[0] * p; + p_l = r + s; + } + e[0] = (double)((float) p); + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + r = t - (e[0] - a[0]); + e[1] = r + s; +} + +/* + * Table of constants for atan from 0.125 to 8 + * 0.125 -- 0x3fc00000 --- (increment at bit 16) + * 0x3fc10000 + * 0x3fc20000 + * ... ... + * 0x401f0000 + * 8.000 -- 0x40200000 (total: 97) + * By K.C. Ng, March 9, 1989 + */ + +static const double TBL_atan_hi[] = { +1.243549945467614382e-01, 1.320397616146387620e-01, 1.397088742891636204e-01, +1.473614810886516302e-01, 1.549967419239409727e-01, 1.626138285979485676e-01, +1.702119252854744080e-01, 1.777902289926760471e-01, 1.853479499956947607e-01, +1.928843122579746439e-01, 2.003985538258785115e-01, 2.078899272022629863e-01, +2.153576996977380476e-01, 2.228011537593945213e-01, 2.302195872768437179e-01, +2.376123138654712419e-01, 2.449786631268641435e-01, 2.596296294082575118e-01, +2.741674511196587893e-01, 2.885873618940774099e-01, 3.028848683749714166e-01, +3.170557532091470287e-01, 3.310960767041321029e-01, 3.450021772071051318e-01, +3.587706702705721895e-01, 3.723984466767542023e-01, 3.858826693980737521e-01, +3.992207695752525431e-01, 4.124104415973872673e-01, 4.254496373700422662e-01, +4.383365598579578304e-01, 4.510696559885234436e-01, 4.636476090008060935e-01, +4.883339510564055352e-01, 5.123894603107377321e-01, 5.358112379604637043e-01, +5.585993153435624414e-01, 5.807563535676704136e-01, 6.022873461349641522e-01, +6.231993299340659043e-01, 6.435011087932843710e-01, 6.632029927060932861e-01, +6.823165548747480713e-01, 7.008544078844501923e-01, 7.188299996216245269e-01, +7.362574289814280970e-01, 7.531512809621944138e-01, 7.695264804056582975e-01, +7.853981633974482790e-01, 8.156919233162234217e-01, 8.441539861131710509e-01, +8.709034570756529758e-01, 8.960553845713439269e-01, 9.197196053504168578e-01, +9.420000403794636101e-01, 9.629943306809362058e-01, 9.827937232473290541e-01, +1.001483135694234639e+00, 1.019141344266349725e+00, 1.035841253008800145e+00, +1.051650212548373764e+00, 1.066630365315743623e+00, 1.080839000541168327e+00, +1.094328907321189925e+00, 1.107148717794090409e+00, 1.130953743979160375e+00, +1.152571997215667610e+00, 1.172273881128476303e+00, 1.190289949682531656e+00, +1.206817370285252489e+00, 1.222025323210989667e+00, 1.236059489478081863e+00, +1.249045772398254428e+00, 1.261093382252440387e+00, 1.272297395208717319e+00, +1.282740879744270757e+00, 1.292496667789785336e+00, 1.301628834009196156e+00, +1.310193935047555547e+00, 1.318242051016837113e+00, 1.325817663668032553e+00, +1.339705659598999565e+00, 1.352127380920954636e+00, 1.363300100359693845e+00, +1.373400766945015894e+00, 1.382574821490125894e+00, 1.390942827002418447e+00, +1.398605512271957618e+00, 1.405647649380269870e+00, 1.412141064608495311e+00, +1.418146998399631542e+00, 1.423717971406494032e+00, 1.428899272190732761e+00, +1.433730152484709031e+00, 1.438244794498222623e+00, 1.442473099109101931e+00, +1.446441332248135092e+00, +}; + +static const double TBL_atan_lo[] = { +-3.125324142453938311e-18, -1.276925400709959526e-17, 2.479758919089733066e-17, +5.409599147666297957e-18, 9.585415594114323829e-18, 7.784470643106252464e-18, +-3.541164079802125137e-18, 2.372599351477449041e-17, 4.180692268843078977e-18, +2.034098543938166622e-17, 3.139954287184449286e-18, 7.333160666520898500e-18, +4.738160130078732886e-19, -5.498822172446843173e-18, 1.231340452914270316e-17, +1.058231431371112987e-17, 1.069875561873445139e-17, 1.923875492461530410e-17, +8.261353575163771936e-18, -1.428369957377257085e-17, -1.101082790300136900e-17, +-1.893928924292642146e-17, -7.952610375793798701e-18, -2.293880475557830393e-17, +3.088733564861919217e-17, 1.961231150484565340e-17, 2.378822732491940868e-17, +2.246598105617042065e-17, 3.963462895355093301e-17, 2.331553074189288466e-17, +-2.494277030626540909e-17, 3.280735600183735558e-17, 2.269877745296168709e-17, +-1.137323618932958456e-17, -2.546278147285580353e-17, -4.063795683482557497e-18, +-5.455630548591626394e-18, -1.441464378193066908e-17, 2.950430737228402307e-17, +2.672403885140095079e-17, 1.583478505144428617e-17, -3.076054864429649001e-17, +6.943223671560007740e-18, -1.987626234335816123e-17, -2.147838844445698302e-17, +3.473937648299456719e-17, -2.425693465918206812e-17, -3.704991905602721293e-17, +3.061616997868383018e-17, -1.071456562778743077e-17, -4.841337011934916763e-17, +-2.269823590747287052e-17, 2.923876285774304890e-17, -4.057439412852767923e-17, +5.460837485846687627e-17, -3.986660595210752445e-18, 1.390331103123099845e-17, +9.438308023545392000e-17, 1.000401886936679889e-17, 3.194313981784503706e-17, +-9.650564731467513515e-17, -5.956589637160374564e-17, -1.567632251135907253e-17, +-5.490676155022364226e-18, 9.404471373566379412e-17, 7.123833804538446299e-17, +-9.159738508900378819e-17, 8.385188614028674371e-17, 7.683333629842068806e-17, +4.172467638861439118e-17, -2.979162864892849274e-17, 7.879752739459421280e-17, +-2.196203799612310905e-18, 3.242139621534960503e-17, 2.245875015034507026e-17, +-9.283188754266129476e-18, -6.830804768926660334e-17, -1.236918499824626670e-17, +8.745413734780278834e-17, -6.319394031144676258e-17, -8.824429373951136321e-17, +-2.599011860304134377e-17, 2.147674250751150961e-17, 1.093246171526936217e-16, +-3.307710355769516504e-17, -3.561490438648230100e-17, -9.843712133488842595e-17, +-2.324061182591627982e-17, -8.922630138234492386e-17, -9.573807110557223276e-17, +-8.263883782511013632e-17, 8.721870922223967507e-17, -6.457134743238754385e-17, +-4.396204466767636187e-17, -2.493019910264565554e-17, -1.105119435430315713e-16, +9.211323971545051565e-17, +}; + +/* + * mx_atan(x,err) + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-83.41 + * + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error + * |atan(x)-poly2(x)|<= 2^-86.8 + * + * Here poly1 and poly2 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2), atan(x) = x with inexact flag raised + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then (prec = 78) + * (3.1) if x >= 2^prec, atan(x) = atan(inf) - pio2lo + * (3.2) if x >= 2^(prec/3), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_atan_hi[j] + (_TBL_atan_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +#define P1 p[2] +#define P4 p[8] +#define P5 p[9] +#define P6 p[10] +#define P7 p[11] +#define P8 p[12] +#define P9 p[13] +static const double p[] = { + 1.0, + 0.0, + -3.33333333333333314830e-01, /* p1 = BFD55555 55555555 */ + -1.85030852238476921863e-17, /* p1_l = BC755525 9783A49C */ + 2.00000000000000011102e-01, /* p2 = 3FC99999 9999999A */ + -1.27263196576150347368e-17, /* p2_l = BC6D584B 0D874007 */ + -1.42857142857141405923e-01, /* p3 = BFC24924 9249245E */ + -1.34258204847170493327e-17, /* p3_l = BC6EF534 A112500D */ + 1.11111111110486909803e-01, /* p4 = 3FBC71C7 1C71176A */ + -9.09090907557387889470e-02, /* p5 = BFB745D1 73B47A7D */ + 7.69230541541713053189e-02, /* p6 = 3FB3B13A B1E68DE6 */ + -6.66645815401964159097e-02, /* p7 = BFB110EE 1584446A */ + 5.87081768778560317279e-02, /* p8 = 3FAE0EFF 87657733 */ + -4.90818147456113240690e-02, /* p9 = BFA92140 6A524B5C */ +}; +#define Q1 q[2] +#define Q3 q[6] +#define Q4 q[7] +#define Q5 q[8] +static const double q[] = { + 1.0, + 0.0, + -3.33333333333333314830e-01, /* q1 = BFD55555 55555555 */ + -1.85022941571278638733e-17, /* q1_l = BC7554E9 D20EFA66 */ + 1.99999999999999927836e-01, /* q2 = 3FC99999 99999997 */ + -1.28782564407438833398e-17, /* q2_l = BC6DB1FB 17217417 */ + -1.42857142855492280642e-01, /* q3 = BFC24924 92483C46 */ + 1.11111097130183356096e-01, /* q4 = 3FBC71C6 E06595CC */ + -9.08553303569109294013e-02, /* q5 = BFB7424B 808CDA76 */ +}; +static const double +one = 1.0, +pio2hi = 1.570796326794896558e+00, +pio2lo = 6.123233995736765886e-17; + +static double +mx_atan(double x, double *err) { + double y, z, r, s, t, w, s_h, s_l, x_h, x_l, zz[3], ee[2], z_h, + z_l, r_h, r_l, u, v; + int ix, iy, sign, j; + + ix = ((int *) &x)[HIWORD]; + sign = ix & 0x80000000; + ix ^= sign; + + /* for |x| < 1/8 */ + if (ix < 0x3fc00000) { + if (ix < 0x3f300000) { /* when |x| < 2**-12 */ + if (ix < 0x3d800000) { /* if |x| < 2**-39 */ + *err = (double) ((int) x); + return (x); + } + z = x * x; + t = x * z * (q[2] + z * (q[4] + z * q[6])); + r = x + t; + *err = t - (r - x); + return (r); + } + z = x * x; + + /* use double precision at p4 and on */ + ee[0] = z * + (P4 + z * + (P5 + z * (P6 + z * (P7 + z * (P8 + z * P9))))); + + x_h = (double) ((float) x); + z_h = (double) ((float) z); + x_l = x - x_h; + z_l = (x_h * x_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + x_l * (x + x_h); + + /* + * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call + * mx_poly + */ + + mx_poly(zz, p, ee, 3); + + /* finally x*(1+z*(p1+...)) */ + r = x_h * ee[0]; + t = x * ee[1] + x_l * ee[0]; + s = t + r; + *err = t - (s - r); + return (s); + } + /* for |x| >= 8.0 */ + if (ix >= 0x40200000) { /* x >= 8 */ + x = fabs(x); + if (ix >= 0x42600000) { /* x >= 2**39 */ + if (ix >= 0x44c00000) { /* x >= 2**77 */ + y = -pio2lo; + } else + y = one / x - pio2lo; + if (sign == 0) { + t = pio2hi - y; + *err = -(y - (pio2hi - t)); + } else { + t = y - pio2hi; + *err = y - (pio2hi + t); + } + return (t); + } else { + /* compute r = 1/x */ + r = one / x; + z = r * r; + if (ix < 0x40504000) { /* 8 < x < 65 */ + + /* use double precision at p4 and on */ + ee[0] = z * + (P4 + z * + (P5 + z * + (P6 + z * (P7 + z * (P8 + z * P9))))); + x_h = (double) ((float) x); + r_h = (double) ((float) r); + z_h = (double) ((float) z); + r_l = r * ((x_h - x) * r_h - (x_h * r_h - one)); + z_l = (r_h * r_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + r_l * (r + r_h); + /* + * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call + * mx_poly + */ + mx_poly(zz, p, ee, 3); + } else { /* x < 65 < 2**39 */ + /* use double precision at q3 and on */ + ee[0] = z * (Q3 + z * (Q4 + z * Q5)); + x_h = (double) ((float) x); + r_h = (double) ((float) r); + z_h = (double) ((float) z); + r_l = r * ((x_h - x) * r_h - (x_h * r_h - one)); + z_l = (r_h * r_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + r_l * (r + r_h); + /* + * compute (1+z*(q1+z*(q2+e))) by call + * mx_poly + */ + mx_poly(zz, q, ee, 2); + } + /* pio2 - r*(1+...) */ + v = r_h * ee[0]; + t = pio2lo - (r * ee[1] + r_l * ee[0]); + if (sign == 0) { + s = pio2hi - v; + t -= (v - (pio2hi - s)); + } else { + s = v - pio2hi; + t = -(t - (v - (s + pio2hi))); + } + w = s + t; + *err = t - (w - s); + return (w); + } + } + /* now x is between 1/8 and 8 */ + ((int *) &x)[HIWORD] = ix; + iy = (ix + 0x00008000) & 0x7fff0000; + ((int *) &y)[HIWORD] = iy; + ((int *) &y)[LOWORD] = 0; + j = (iy - 0x3fc00000) >> 16; + + w = (x - y); + v = 1 / (one + x * y); + s = w * v; + z = s * s; + /* use double precision at q3 and on */ + ee[0] = z * (Q3 + z * (Q4 + z * Q5)); + s_h = (double) ((float) s); + z_h = (double) ((float) z); + x_h = (double) ((float) x); + t = (double) ((float) (one + x * y)); + r = -((x_h - x) * y - (x_h * y - (t - one))); + s_l = -v * (s_h * r - (w - s_h * t)); + z_l = (s_h * s_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + s_l * (s + s_h); + /* compute (1+z*(q1+z*(q2+e))) by call mx_poly */ + mx_poly(zz, q, ee, 2); + v = s_h * ee[0]; + t = TBL_atan_lo[j] + (s * ee[1] + s_l * ee[0]); + u = TBL_atan_hi[j]; + s = u + v; + t += (v - (s - u)); + w = s + t; + *err = t - (w - s); + if (sign != 0) { + w = -w; + *err = -*err; + } + return (w); +} + +static const double + twom768 = 6.441148769597133308e-232, /* 2^-768 */ + two768 = 1.552518092300708935e+231, /* 2^768 */ + pi = 3.1415926535897931159979634685, + pi_lo = 1.224646799147353177e-16, + pio2 = 1.570796326794896558e+00, + pio2_lo = 6.123233995736765886e-17, + pio4 = 0.78539816339744827899949, + pio4_lo = 3.061616997868382943e-17, + pi3o4 = 2.356194490192344836998, + pi3o4_lo = 9.184850993605148829195e-17; + +double +__k_atan2(double y, double x, double *w) { + double t, xh, th, t1, t2, w1, w2; + int ix, iy, hx, hy, lx, ly; + + hy = ((int *) &y)[HIWORD]; + ly = ((int *) &y)[LOWORD]; + iy = hy & ~0x80000000; + + hx = ((int *) &x)[HIWORD]; + lx = ((int *) &x)[LOWORD]; + ix = hx & ~0x80000000; + + *w = 0.0; + if (ix >= 0x7ff00000 || iy >= 0x7ff00000) { /* ignore inexact */ + if (isnan(x) || isnan(y)) + return (x * y); + else if (iy < 0x7ff00000) { + if (hx >= 0) { /* ATAN2(+-finite, +inf) is +-0 */ + *w *= y; + return (*w); + } else { /* ATAN2(+-finite, -inf) is +-pi */ + *w = copysign(pi_lo, y); + return (copysign(pi, y)); + } + } else if (ix < 0x7ff00000) { + /* ATAN2(+-inf, finite) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (hx > 0) { /* ATAN2(+-INF,+INF) = +-pi/4 */ + *w = (hy >= 0)? pio4_lo : -pio4_lo; + return ((hy >= 0)? pio4 : -pio4); + } else { /* ATAN2(+-INF,-INF) = +-3pi/4 */ + *w = (hy >= 0)? pi3o4_lo : -pi3o4_lo; + return ((hy >= 0)? pi3o4 : -pi3o4); + } + } else if ((ix | lx) == 0 || (iy | ly) == 0) { + if ((iy | ly) == 0) { + if (hx >= 0) /* ATAN2(+-0, +(0 <= x <= inf)) is +-0 */ + return (y); + else { /* ATAN2(+-0, -(0 <= x <= inf)) is +-pi */ + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } + } else { /* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } + } else if (iy - ix > 0x06400000) { /* |x/y| < 2 ** -100 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (ix - iy > 0x06400000) { /* |y/x| < 2 ** -100 */ + if (hx < 0) { + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } else { + t = y / x; + th = t; + ((int *) &th)[LOWORD] &= 0xf8000000; + xh = x; + ((int *) &xh)[LOWORD] &= 0xf8000000; + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + *w = (t2 - t1) / x; + return (t); + } + } else { + if (ix >= 0x5f300000) { + x *= twom768; + y *= twom768; + } else if (ix < 0x23d00000) { + x *= two768; + y *= two768; + } + y = fabs(y); + x = fabs(x); + t = y / x; + th = t; + ((int *) &th)[LOWORD] &= 0xf8000000; + xh = x; + ((int *) &xh)[LOWORD] &= 0xf8000000; + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + w1 = mx_atan(t, &w2); + w2 += (t2 - t1) / (x + y * t); + if (hx < 0) { + t1 = pi - w1; + t2 = pi - t1; + w2 = (pi_lo - w2) - (w1 - t2); + w1 = t1; + } + *w = (hy >= 0)? w2 : -w2; + return ((hy >= 0)? w1 : -w1); + } +} diff --git a/usr/src/libm/src/complex/k_atan2l.c b/usr/src/libm/src/complex/k_atan2l.c new file mode 100644 index 0000000..1903e48 --- /dev/null +++ b/usr/src/libm/src/complex/k_atan2l.c @@ -0,0 +1,808 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)k_atan2l.c 1.5 06/01/23 SMI" + +#include "libm.h" /* __k_atan2l */ +#include "complex_wrapper.h" + +#if defined(__sparc) +#define HALF(x) ((int *) &x)[3] = 0; ((int *) &x)[2] &= 0xfe000000 +#elif defined(__i386) || defined(__LITTLE_ENDIAN) +#define HALF(x) ((int *) &x)[0] = 0 +#endif + +/* + * long double __k_atan2l(long double y, long double x, long double *e) + * + * Compute atan2l with error terms. + * + * Important formula: + * 3 5 + * x x + * atan(x) = x - ----- + ----- - ... (for x <= 1) + * 3 5 + * + * pi 1 1 + * = --- - --- + --- - ... (for x > 1) + * 3 + * 2 x 3x + * + * Arg(x + y i) = sign(y) * atan2(|y|, x) + * = sign(y) * atan(|y|/x) (for x > 0) + * sign(y) * (PI - atan(|y|/|x|)) (for x < 0) + * Thus if x >> y (IEEE double: EXP(x) - EXP(y) >= 60): + * 1. (x > 0): atan2(y,x) ~ y/x + * 2. (x < 0): atan2(y,x) ~ sign(y) (PI - |y/x|)) + * Otherwise if x << y: + * atan2(y,x) ~ sign(y)*PI/2 - x/y + * + * __k_atan2l call static functions mx_polyl, mx_atanl + */ + + +/* + * (void) mx_polyl (long double *z, long double *a, long double *e, int n) + * return + * e = a + z*(a + z*(a + ... z*(a + e)...)) + * 0 2 4 2n + * Note: + * 1. e and coefficient ai are represented by two long double numbers. + * For e, the first one contain the leading 53 bits (30 for x86 exteneded) + * and the second one contain the remaining 113 bits (64 for x86 extended). + * For ai, the first one contian the leading 53 bits (or 30 for x86) + * rounded, and the second is the remaining 113 bits (or 64 for x86). + * 2. z is an array of three doubles. + * z[0] : the rounded value of Z (the intended value of z) + * z[1] : the leading 32 (or 56) bits of Z rounded + * z[2] : the remaining 113 (or 64) bits of Z + * Note that z[0] = z[1]+z[2] rounded. + * + */ + +static void +mx_polyl(const long double *z, const long double *a, long double *e, int n) { + long double r, s, t, p_h, p_l, z_h, z_l, p, w; + int i; + n = n + n; + p = e[0] + a[n]; + p_l = a[n + 1]; + w = p; HALF(w); + p_h = w; + p = a[n - 2] + z[0] * p; + z_h = z[1]; z_l = z[2]; + p_l += e[0] - (p_h - a[n]); + + for (i = n - 2; i >= 2; i -= 2) { + + /* compute p = ai + z * p */ + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + w = p; HALF(w); + p_h = w; + s += a[i + 1]; + r = t - (p_h - a[i]); + p = a[i - 2] + z[0] * p; + p_l = r + s; + } + w = p; HALF(w); + e[0] = w; + t = z_h * p_h; + s = z[0] * p_l + p_h * z_l; + r = t - (e[0] - a[0]); + e[1] = r + s; +} + +/* + * Table of constants for atan from 0.125 to 8 + * 0.125 -- 0x3ffc0000 --- (increment at bit 12) + * 0x3ffc1000 + * 0x3ffc2000 + * ... ... + * 0x4001f000 + * 8.000 -- 0x40020000 (total: 97) + */ + +static const long double TBL_atan_hil[] = { +#if defined(__sparc) +1.2435499454676143503135484916387102416568e-01L, +1.3203976161463874927468440652656953226250e-01L, +1.3970887428916364518336777673909505681607e-01L, +1.4736148108865163560980276039684551821066e-01L, +1.5499674192394098230371437493349219133371e-01L, +1.6261382859794857537364156376155780062019e-01L, +1.7021192528547440449049660709976171369543e-01L, +1.7779022899267607079662479921582468899456e-01L, +1.8534794999569476488602596122854464667261e-01L, +1.9288431225797466419705871069022730349878e-01L, +2.0039855382587851465394578503437838446153e-01L, +2.0788992720226299360533498310299432475629e-01L, +2.1535769969773804802445962716648964165745e-01L, +2.2280115375939451577103212214043255525024e-01L, +2.3021958727684373024017095967980299065551e-01L, +2.3761231386547125247388363432563777919892e-01L, +2.4497866312686415417208248121127580641959e-01L, +2.5962962940825753102994644318397190560106e-01L, +2.7416745111965879759937189834217578592444e-01L, +2.8858736189407739562361141995821834504332e-01L, +3.0288486837497140556055609450555821812277e-01L, +3.1705575320914700980901557667446732975852e-01L, +3.3109607670413209494433878775694455421259e-01L, +3.4500217720710510886768128690005168408290e-01L, +3.5877067027057222039592006392646052215363e-01L, +3.7239844667675422192365503828370182641413e-01L, +3.8588266939807377589769548460723139638186e-01L, +3.9922076957525256561471669615886476491104e-01L, +4.1241044159738730689979128966712694260920e-01L, +4.2544963737004228954226360518079233013817e-01L, +4.3833655985795780544561604921477130895882e-01L, +4.5106965598852347637563925728219344073798e-01L, +4.6364760900080611621425623146121439713344e-01L, +4.8833395105640552386716496074706484459644e-01L, +5.1238946031073770666660102058425923805558e-01L, +5.3581123796046370026908506870769144698471e-01L, +5.5859931534356243597150821640166122875873e-01L, +5.8075635356767039920327447500150082375122e-01L, +6.0228734613496418168212269420423291922459e-01L, +6.2319932993406593099247534906037459367793e-01L, +6.4350110879328438680280922871732260447265e-01L, +6.6320299270609325536325431023827583417226e-01L, +6.8231655487474807825642998171115298784729e-01L, +7.0085440788445017245795128178675127318623e-01L, +7.1882999962162450541701415152590469891043e-01L, +7.3625742898142813174283527108914662479274e-01L, +7.5315128096219438952473937026902888600575e-01L, +7.6952648040565826040682003598565401726598e-01L, +7.8539816339744830961566084581987569936977e-01L, +8.1569192331622341102146083874564582672284e-01L, +8.4415398611317100251784414827164746738632e-01L, +8.7090345707565295314017311259781407291650e-01L, +8.9605538457134395617480071802993779546602e-01L, +9.1971960535041681722860345482108940969311e-01L, +9.4200004037946366473793717053459362115891e-01L, +9.6299433068093620181519583599709989677298e-01L, +9.8279372324732906798571061101466603762572e-01L, +1.0014831356942347329183295953014374896343e+00L, +1.0191413442663497346383429170230636212354e+00L, +1.0358412530088001765846944703254440735476e+00L, +1.0516502125483736674598673120862999026920e+00L, +1.0666303653157435630791763474202799086015e+00L, +1.0808390005411683108871567292171997859003e+00L, +1.0943289073211899198927883146102352763033e+00L, +1.1071487177940905030170654601785370497543e+00L, +1.1309537439791604464709335155363277560026e+00L, +1.1525719972156675180401498626127514672834e+00L, +1.1722738811284763866005949441337046006865e+00L, +1.1902899496825317329277337748293182803384e+00L, +1.2068173702852525303955115800565576625682e+00L, +1.2220253232109896370417417439225704120294e+00L, +1.2360594894780819419094519711090786146210e+00L, +1.2490457723982544258299170772810900483550e+00L, +1.2610933822524404193139408812473357640124e+00L, +1.2722973952087173412961937498224805746463e+00L, +1.2827408797442707473628852511364955164072e+00L, +1.2924966677897852679030914214070816723528e+00L, +1.3016288340091961438047858503666855024453e+00L, +1.3101939350475556342564376891719053437537e+00L, +1.3182420510168370498593302023271363040427e+00L, +1.3258176636680324650592392104284756886164e+00L, +1.3397056595989995393283037525895557850243e+00L, +1.3521273809209546571891479413898127598774e+00L, +1.3633001003596939542892985278250991560269e+00L, +1.3734007669450158608612719264449610604836e+00L, +1.3825748214901258580599674177685685163955e+00L, +1.3909428270024183486427686943836432395486e+00L, +1.3986055122719575950126700816114282727858e+00L, +1.4056476493802697809521934019958080664406e+00L, +1.4121410646084952153676136718584890852820e+00L, +1.4181469983996314594038603039700988632607e+00L, +1.4237179714064941189018190466107297108905e+00L, +1.4288992721907326964184700745371984001389e+00L, +1.4337301524847089866404719096698873880264e+00L, +1.4382447944982225979614042479354816039669e+00L, +1.4424730991091018200252920599377291810352e+00L, +1.4464413322481351841999668424758803866109e+00L, +#elif defined(__i386) || defined(__LITTLE_ENDIAN) +1.243549945356789976358413696289e-01L, 1.320397615781985223293304443359e-01L, +1.397088742814958095550537109375e-01L, 1.473614810383878648281097412109e-01L, +1.549967419123277068138122558594e-01L, 1.626138285500928759574890136719e-01L, +1.702119252295233309268951416016e-01L, 1.777902289759367704391479492188e-01L, +1.853479499695822596549987792969e-01L, 1.928843122441321611404418945312e-01L, +2.003985538030974566936492919922e-01L, 2.078899272019043564796447753906e-01L, +2.153576996643096208572387695312e-01L, 2.228011537226848304271697998047e-01L, +2.302195872762240469455718994141e-01L, 2.376123138237744569778442382812e-01L, +2.449786631041206419467926025391e-01L, 2.596296293195337057113647460938e-01L, +2.741674510762095451354980468750e-01L, 2.885873618070036172866821289062e-01L, +3.028848683461546897888183593750e-01L, 3.170557531993836164474487304688e-01L, +3.310960766393691301345825195312e-01L, 3.450021771714091300964355468750e-01L, +3.587706702528521418571472167969e-01L, 3.723984466632828116416931152344e-01L, +3.858826693613082170486450195312e-01L, 3.992207695264369249343872070312e-01L, +4.124104415532201528549194335938e-01L, 4.254496373469009995460510253906e-01L, +4.383365598041564226150512695312e-01L, 4.510696559445932507514953613281e-01L, +4.636476089945062994956970214844e-01L, 4.883339509833604097366333007812e-01L, +5.123894601128995418548583984375e-01L, 5.358112377580255270004272460938e-01L, +5.585993151180446147918701171875e-01L, 5.807563534472137689590454101562e-01L, +6.022873460315167903900146484375e-01L, 6.231993297114968299865722656250e-01L, +6.435011087451130151748657226562e-01L, 6.632029926404356956481933593750e-01L, +6.823165547102689743041992187500e-01L, 7.008544078562408685684204101562e-01L, +7.188299994450062513351440429688e-01L, 7.362574287690222263336181640625e-01L, +7.531512808054685592651367187500e-01L, 7.695264802314341068267822265625e-01L, +7.853981633670628070831298828125e-01L, 8.156919232569634914398193359375e-01L, +8.441539860796183347702026367188e-01L, 8.709034570492804050445556640625e-01L, +8.960553845390677452087402343750e-01L, 9.197196052409708499908447265625e-01L, +9.420000403188169002532958984375e-01L, 9.629943305626511573791503906250e-01L, +9.827937232330441474914550781250e-01L, 1.001483135391026735305786132812e+00L, +1.019141343887895345687866210938e+00L, 1.035841252654790878295898437500e+00L, +1.051650212146341800689697265625e+00L, 1.066630364861339330673217773438e+00L, +1.080839000176638364791870117188e+00L, 1.094328907318413257598876953125e+00L, +1.107148717623203992843627929688e+00L, 1.130953743588179349899291992188e+00L, +1.152571997139602899551391601562e+00L, 1.172273880802094936370849609375e+00L, +1.190289949532598257064819335938e+00L, 1.206817369908094406127929687500e+00L, +1.222025323193520307540893554688e+00L, 1.236059489194303750991821289062e+00L, +1.249045772012323141098022460938e+00L, 1.261093381792306900024414062500e+00L, +1.272297394927591085433959960938e+00L, 1.282740879338234663009643554688e+00L, +1.292496667709201574325561523438e+00L, 1.301628833636641502380371093750e+00L, +1.310193934943526983261108398438e+00L, 1.318242050707340240478515625000e+00L, +1.325817663222551345825195312500e+00L, 1.339705659542232751846313476562e+00L, +1.352127380669116973876953125000e+00L, 1.363300099968910217285156250000e+00L, +1.373400766868144273757934570312e+00L, 1.382574821356683969497680664062e+00L, +1.390942826867103576660156250000e+00L, 1.398605511989444494247436523438e+00L, +1.405647648964077234268188476562e+00L, 1.412141064181923866271972656250e+00L, +1.418146998155862092971801757812e+00L, 1.423717970959842205047607421875e+00L, +1.428899271879345178604125976562e+00L, 1.433730152435600757598876953125e+00L, +1.438244794495403766632080078125e+00L, 1.442473099101334810256958007812e+00L, +1.446441331878304481506347656250e+00L, +#endif +}; +static const long double TBL_atan_lol[] = { +#if defined(__sparc) +1.4074869197628063802317202820414310039556e-36L, +-4.9596961594739925555730439437999675295505e-36L, +8.9527745625194648873931213446361849472788e-36L, +1.1880437423207895718180765843544965589427e-35L, +-2.7810278112045145378425375128234365381448e-37L, +1.4797220377023800327295536234315147262387e-36L, +-4.2169561400548198732870384801849639863829e-36L, +7.2431229666913484649930323656316023494680e-36L, +-2.1573430089839170299895679353790663182462e-36L, +-9.9515745405126723554452367298128605186305e-36L, +-3.9065558992324838181617569730397882363067e-36L, +5.5260292271793726813211980664661124518807e-36L, +8.8415722215914321807682254318036452043689e-36L, +-8.1767728791586179254193323628285599800711e-36L, +-1.3344123034656142243797113823028330070762e-36L, +-4.4927331207813382908930733924681325892188e-36L, +4.4945511471812490393201824336762495687730e-36L, +-1.6688081504279223555776724459648440567274e-35L, +1.5629757586107955769461086568937329684113e-35L, +-2.2389835563308078552507970385331510848109e-35L, +-4.8312321745547311551870450671182151367050e-36L, +-1.4336172352905832876958926610980698844309e-35L, +-8.7440181998899932802989174170960593316080e-36L, +5.9284636008529837445780360785464550143016e-36L, +-2.2376651248436241276061055295043514993630e-35L, +6.0745837599336105414280310756677442136480e-36L, +1.5372187110451949677792344762029967023093e-35L, +2.0976068056751156241657121582478790247159e-35L, +-5.5623956405495438060726862202622807523700e-36L, +1.9697366707832471841858411934897351901523e-35L, +2.1070311964479488509034733639424887543697e-35L, +-2.3027356362982001602256518510854229844561e-35L, +4.8950964225733349266861843522029764772843e-36L, +-7.2380143477794458213872723050820253166391e-36L, +1.6365648865703614031637443396049568858105e-35L, +-3.9885811958234530793729129919803234197399e-35L, +4.1587722120912613510417783923227421336929e-35L, +3.8347421454556472153684687377337135027394e-35L, +-9.2251178933638721723515896465489002497864e-36L, +1.4094619690455989526175736741854656192178e-36L, +3.3568857805472235270612851425810803679451e-35L, +3.9090991055522552395018106803232118803401e-35L, +5.2956416979654208140521862707297033857956e-36L, +-5.0960846819945514367847063923662507136721e-36L, +-4.4959014425277615858329680393918315204998e-35L, +3.8039226544551634266566857615962609653834e-35L, +-4.4056522872895512108308642196611689657618e-36L, +1.6025024192482161076223807753425619076948e-36L, +2.1679525325309452561992610065108380635264e-35L, +1.9844038013515422125715362925736754104066e-35L, +3.9139619471799746834505227353568432457241e-35L, +2.1113443807975453505518453436799561854730e-35L, +3.1558557277444692755039816944392770185432e-35L, +1.6295044520355461408265585619500238335614e-35L, +-3.5087245209270305856151230356171213582305e-35L, +2.9041041864282855679591055270946117300088e-35L, +-2.3128843453818356590931995209806627233282e-35L, +-7.7124923181471578439967973820714857839953e-35L, +2.7539027829886922429092063590445808781462e-35L, +-9.4500899453181308951084545990839335972452e-35L, +-7.3061755302032092337594946001641651543473e-35L, +-4.1736144813953752193952770157406952602798e-35L, +3.4369948356256407045344855262863733571105e-35L, +-6.3790243492298090907302084924276831116460e-35L, +-9.6842943816353261291004127866079538980649e-36L, +4.8746757539138870909275958326700072821615e-35L, +-8.7533886477084190884511601368582548254655e-35L, +1.4284743992327918892692551138086727754845e-35L, +5.7262776211073389542565625693479173445042e-35L, +-3.2254883148780411245594822270747948565684e-35L, +7.8853548190609877325965525252380833808405e-35L, +8.4081736739037194097515038365370730251333e-35L, +7.4722870357563683815078242981933587273670e-35L, +7.9977202825793435289434813600890494256112e-36L, +-8.0577840773362139054848492346292673645405e-35L, +1.4217746753670583065490040209048757624336e-35L, +1.2232486914221205004109743560319090913328e-35L, +8.9696055070830036447361957217943988339065e-35L, +-3.1480394435081884410686066739846269858951e-35L, +-5.0927146040715345013240642517608928352977e-35L, +-5.7431997715924136568133859432702789493569e-35L, +-4.3920451405083770279099766080476485439987e-35L, +9.1106753984907715563018666776308759323326e-35L, +-3.7032569014272841009512400773061537538358e-35L, +8.8167419429746714276909825405131416764489e-35L, +-3.8389341696028352503752312861740895209678e-36L, +-3.3462959341960891546340895508017603408404e-35L, +-3.9212626776786074383916188498955828634947e-35L, +-7.8340397396377867255864494568594088378648e-35L, +7.4681018632456986520600640340627309824469e-35L, +8.9110918618956918451135594876165314884113e-35L, +3.9418160632271890530431797145664308529115e-35L, +-4.1048114088580104820193435638327617443913e-35L, +-2.3165419451582153326383944756220900454330e-35L, +-1.8428312581525319409399330203703211113843e-35L, +7.1477316546709482345411712017906842769961e-35L, +2.9914501578435874662153637707016094237004e-35L, +#elif defined(__i386) || defined(__LITTLE_ENDIAN) +1.108243739551347953496477557317e-11L, 3.644022694535396219063202730280e-11L, +7.667835628314065801595065768845e-12L, 5.026377078169301918590803009109e-11L, +1.161327548990211907411719105561e-11L, 4.785569941615255008968280209991e-11L, +5.595107356360146549819920947848e-11L, 1.673930035747684999707469623769e-11L, +2.611250523102718193166964451527e-11L, 1.384250305661681615897729354721e-11L, +2.278105796029649304219088055497e-11L, 3.586371256902077123693302823191e-13L, +3.342842716722085763523965049902e-11L, 3.670968534386232233574504707347e-11L, +6.196832945990602657404893210974e-13L, 4.169679549603939604438777470618e-11L, +2.274351222528987867221331091414e-11L, 8.872382531858169709022188891298e-11L, +4.344925246387385146717580155420e-11L, 8.707377833692929105196832265348e-11L, +2.881671577173773513055821329154e-11L, 9.763393361566846205717315422347e-12L, +6.476296480975626822569454546857e-11L, 3.569597877124574002505169001136e-11L, +1.772007853877284712958549977698e-11L, 1.347141028196192304932683248872e-11L, +3.676555884905046507598141175404e-11L, 4.881564068032948912761478588710e-11L, +4.416715404487185607337693704681e-11L, 2.314128999621257979016734983553e-11L, +5.380138283056477968352133002913e-11L, 4.393022562414389595406841771063e-11L, +6.299816718559209976839402028537e-12L, 7.304511413053165996581483735843e-11L, +1.978381648117426221467592544212e-10L, 2.024381732686578226139414070989e-10L, +2.255178211796380992141612703464e-10L, 1.204566302442290648452508620986e-10L, +1.034473912921080457667329099995e-10L, 2.225691010059030834353745950874e-10L, +4.817137162794350606107263804151e-11L, 6.565755971506095086327587326326e-11L, +1.644791039522307629611529931429e-10L, 2.820930388953087163050126809014e-11L, +1.766182540818701085571546539514e-10L, 2.124059054092171070266466628320e-10L, +1.567258302596026515190288816001e-10L, 1.742241535800378094231540188685e-10L, +3.038550253253096300737572104929e-11L, 5.925991958164150280814584656688e-11L, +3.355266774764151155289750652594e-11L, 2.637254809561744853531409402995e-11L, +3.227621096606048365493782702458e-11L, 1.094459672377587282585894259882e-10L, +6.064676448464127209709358607166e-11L, 1.182850444360454453720999258140e-10L, +1.428492049425553288966601449688e-11L, 3.032079976125434624889374125094e-10L, +3.784543889504767060855636487744e-10L, 3.540092982887960328254439790467e-10L, +4.020318667701700464612998296302e-10L, 4.544042324059585739827798668654e-10L, +3.645299460952866120296998202703e-10L, 2.776662293911361485235212513020e-12L, +1.708865101734375304910370400700e-10L, 3.909810965716415233488278047493e-10L, +7.606461848875826105025137974947e-11L, 3.263814502297453347587046149712e-10L, +1.499334758629144388918183376012e-10L, 3.771581242675818925565576303133e-10L, +1.746932950084818923507049088298e-11L, 2.837781909176306820465786987027e-10L, +3.859312847318946163435901230778e-10L, 4.601335192895268187473357720101e-10L, +2.811262558622337888849804940684e-10L, 4.060360843532416964489955306249e-10L, +8.058369357752989796958168458531e-11L, 3.725546414244147566166855921414e-10L, +1.040286509953292907344053122733e-10L, 3.094968093808145773271362531155e-10L, +4.454811192340438979284756311844e-10L, 5.676678748199027602705574110388e-11L, +2.518376833121948163898128509842e-10L, 3.907837370041422778250991189943e-10L, +7.687158710333735613246114865100e-11L, 1.334418885622867537060685125566e-10L, +1.353147719826124443836432060856e-10L, 2.825131007652335581739282335732e-10L, +4.161925466840049254333079881002e-10L, 4.265713490956410156084891599630e-10L, +2.437693664320585461575989523716e-10L, 4.466519138542116247357297503086e-10L, +3.113875178143440979746983590908e-10L, 4.910822904159495654488736486097e-11L, +2.818831329324169810481585538618e-12L, 7.767009768334052125229252512543e-12L, +3.698307026936191862258804165254e-10L, +#endif +}; + +/* + * mx_atanl(x, err) + * Table look-up algorithm + * By K.C. Ng, March 9, 1989 + * + * Algorithm. + * + * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)). + * We use poly1(x) to approximate atan(x) for x in [0,1/8] with + * error (relative) + * |(atan(x)-poly1(x))/x|<= 2^-140 + * + * and use poly2(x) to approximate atan(x) for x in [0,1/65] with + * error + * |atan(x)-poly2(x)|<= 2^-143.7 + * + * Here poly1 and poly2 are odd polynomial with the following form: + * x + x^3*(a1+x^2*(a2+...)) + * + * (0). Purge off Inf and NaN and 0 + * (1). Reduce x to positive by atan(x) = -atan(-x). + * (2). For x <= 1/8, use + * (2.1) if x < 2^(-prec/2), atan(x) = x with inexact flag raised + * (2.2) Otherwise + * atan(x) = poly1(x) + * (3). For x >= 8 then (prec = 78) + * (3.1) if x >= 2^prec, atan(x) = atan(inf) - pio2_lo + * (3.2) if x >= 2^(prec/3), atan(x) = atan(inf) - 1/x + * (3.3) if x > 65, atan(x) = atan(inf) - poly2(1/x) + * (3.4) Otherwise, atan(x) = atan(inf) - poly1(1/x) + * + * (4). Now x is in (0.125, 8) + * Find y that match x to 4.5 bit after binary (easy). + * If iy is the high word of y, then + * single : j = (iy - 0x3e000000) >> 19 + * double : j = (iy - 0x3fc00000) >> 16 + * quad : j = (iy - 0x3ffc0000) >> 12 + * + * Let s = (x-y)/(1+x*y). Then + * atan(x) = atan(y) + poly1(s) + * = _TBL_atan_hi[j] + (_TBL_atan_lo[j] + poly2(s) ) + * + * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125 + * + */ + +/* + * p[0] - p[16] for atan(x) = + * x + x^3*(p1+x^2*(p2+...)) + */ +static const long double pe[] = { + 1.0L, + 0.0L, +#if defined(__sparc) + -0.33333333333333332870740406406184774823L, + -4.62592926927148558508441072595508240609e-18L, + 0.19999999999999999722444243843710864894L, + 2.77555756156289124602047010782090464486e-18L, + -0.14285714285714285615158658515611023176L, + -9.91270557700756738621231719241800559409e-19L, +#elif defined(__i386) || defined(__LITTLE_ENDIAN) + -0.33333333325572311878204345703125L, + -7.76102145512898763020833333192787755766644373e-11L, + 0.19999999995343387126922607421875L, + 4.65661287307739257812498949613909375938538636e-11L, + -0.142857142840512096881866455078125L, + -1.66307602609906877787419703858463013035681375e-11L, +#endif +}; + +static const long double p[] = { /* p[0] - p[16] */ + 1.0L, + -3.33333333333333333333333333333333333319278775586e-0001L, + 1.99999999999999999999999999999999894961390937601e-0001L, + -1.42857142857142857142857142856866970385846301312e-0001L, + 1.11111111111111111111111110742899094415954427738e-0001L, + -9.09090909090909090909087972707015549231951421806e-0002L, + 7.69230769230769230767699003016385628597359717046e-0002L, + -6.66666666666666666113842763495291228025226575259e-0002L, + 5.88235294117646915706902204947653640091126695962e-0002L, + -5.26315789473657016886225044679594035524579379810e-0002L, + 4.76190476186633969331771169790375592681525481267e-0002L, + -4.34782608290146274616081389793141896576997370161e-0002L, + 3.99999968161267722260103962788865225205057218988e-0002L, + -3.70368536844778256320786172745225703228683638328e-0002L, + 3.44752320396524479494062858284036892703898522150e-0002L, + -3.20491216046653214683721787776813360591233428081e-0002L, + 2.67632651033434456758550618122802167256870856514e-0002L, +}; + +/* q[0] - q[9] */ +static const long double qe[] = { + 1.0L, + 0.0L, +#if defined(__sparc) + -0.33333333333333332870740406406184774823486804962158203125L, + -4.625929269271485585069345465471207312531868714634217630e-18L, + 0.19999999999999999722444243843710864894092082977294921875L, + 2.7755575615628864268260553912956813621977220359134667560e-18L, +#elif defined(__i386) || defined(__LITTLE_ENDIAN) + -0.33333333325572311878204345703125L, + -7.76102145512898763020833333042135150927893e-11L, + 0.19999999995343387126922607421875L, + 4.656612873077392578124507576697622106863058e-11L, +#endif +}; + +static const long double q[] = { /* q[0] - q[9] */ + -3.33333333333333333333333333333333333304213515094e-0001L, + 1.99999999999999999999999999999995075766976221077e-0001L, + -1.42857142857142857142857142570379604317921113079e-0001L, + 1.11111111111111111111102923861900979127978214077e-0001L, + -9.09090909090909089586854075816999506863320031460e-0002L, + 7.69230769230756334929213246003824644696974730368e-0002L, + -6.66666666589192433974402013508912138168133579856e-0002L, + 5.88235013696778007696800252045588307023299350858e-0002L, + -5.25754959898164576495303840687699583228444695685e-0002L, +}; + +static const long double +two8700 = 9.140338438955067659002088492701e+2618L, /* 2^8700 */ +twom8700 = 1.094051392821643668051436593760e-2619L, /* 2^-8700 */ +one = 1.0L, +zero = 0.0L, +pi = 3.1415926535897932384626433832795028841971693993751L, +pio2 = 1.57079632679489661923132169163975144209858469968755L, +pio4 = 0.785398163397448309615660845819875721049292349843776L, +pi3o4 = 2.356194490192344928846982537459627163147877049531329L, +#if defined(__sparc) +pi_lo = 8.67181013012378102479704402604335196876232e-35L, +pio2_lo = 4.33590506506189051239852201302167598438116e-35L, +pio4_lo = 2.16795253253094525619926100651083799219058e-35L, +pi3o4_lo = 6.50385759759283576859778301953251397657174e-35L; +#elif defined(__i386) || defined(__LITTLE_ENDIAN) +pi_lo = -5.01655761266833202355732708e-20L, +pio2_lo = -2.50827880633416601177866354e-20L, +pio4_lo = -1.25413940316708300588933177e-20L, +pi3o4_lo = -9.18342907192877118770525931e-20L; +#endif + +static long double +mx_atanl(long double x, long double *err) { + long double y, z, r, s, t, w, s_h, s_l, x_h, x_l, zz[3], ee[2], z_h, + z_l, r_h, r_l, u, v; + int ix, iy, hx, i, j; + float fx; + + hx = HI_XWORD(x); + ix = hx & (~0x80000000); + + /* for |x| < 1/8 */ + if (ix < 0x3ffc0000) { + if (ix < 0x3ff30000) { /* when |x| < 2**-12 */ + if (ix < 0x3fc60000) { /* if |x| < 2**-prec/2 */ + *err = (long double) ((int) x); + return (x); + } + z = x * x; + t = q[8]; + for (i = 7; i >= 0; i--) t = q[i] + z * t; + t *= x * z; + r = x + t; + *err = t - (r - x); + return (r); + } + z = x * x; + + /* use long double precision at p4 and on */ + t = p[16]; + for (i = 15; i >= 4; i--) t = p[i] + z * t; + ee[0] = z * t; + + x_h = x; HALF(x_h); + z_h = z; HALF(z_h); + x_l = x - x_h; + z_l = (x_h * x_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + x_l * (x + x_h); + + /* compute (1+z*(p1+z*(p2+z*(p3+e)))) */ + + mx_polyl(zz, pe, ee, 3); + + /* finally x*(1+z*(p1+...)) */ + r = x_h * ee[0]; + t = x * ee[1] + x_l * ee[0]; + s = t + r; + *err = t - (s - r); + return (s); + } + /* for |x| >= 8.0 */ + if (ix >= 0x40020000) { /* x >= 8 */ + x = fabsl(x); + if (ix >= 0x402e0000) { /* x >= 2**47 */ + if (ix >= 0x408b0000) { /* x >= 2**140 */ + y = -pio2_lo; + } else + y = one / x - pio2_lo; + if (hx >= 0) { + t = pio2 - y; + *err = -(y - (pio2 - t)); + } else { + t = y - pio2; + *err = y - (pio2 + t); + } + return (t); + } else { + /* compute r = 1/x */ + r = one / x; + z = r * r; + x_h = x; HALF(x_h); + r_h = r; HALF(r_h); + z_h = z; HALF(z_h); + r_l = r * ((x_h - x) * r_h - (x_h * r_h - one)); + z_l = (r_h * r_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + r_l * (r + r_h); + if (ix < 0x40050400) { /* 8 < x < 65 */ + /* use double precision at p4 and on */ + t = p[16]; + for (i = 15; i >= 4; i--) t = p[i] + z * t; + ee[0] = z * t; + /* compute (1+z*(p1+z*(p2+z*(p3+e)))) */ + mx_polyl(zz, pe, ee, 3); + } else { /* x < 65 < 2**47 */ + /* use long double at q3 and on */ + t = q[8]; + for (i = 7; i >= 2; i--) t = q[i] + z * t; + ee[0] = z * t; + /* compute (1+z*(q1+z*(q2+e))) */ + mx_polyl(zz, qe, ee, 2); + } + /* pio2 - r*(1+...) */ + v = r_h * ee[0]; + t = pio2_lo - (r * ee[1] + r_l * ee[0]); + if (hx >= 0) { + s = pio2 - v; + t -= (v - (pio2 - s)); + } else { + s = v - pio2; + t = -(t - (v - (s + pio2))); + } + w = s + t; + *err = t - (w - s); + return (w); + } + } + /* now x is between 1/8 and 8 */ + iy = (ix + 0x00000800) & 0x7ffff000; + j = (iy - 0x3ffc0000) >> 12; + ((int *) &fx)[0] = 0x3e000000 + (j << 19); + y = (long double) fx; + x = fabsl(x); + + w = (x - y); + v = 1.0L / (one + x * y); + s = w * v; + z = s * s; + /* use long double precision at q3 and on */ + t = q[8]; + for (i = 7; i >= 2; i--) t = q[i] + z * t; + ee[0] = z * t; + s_h = s; HALF(s_h); + z_h = z; HALF(z_h); + x_h = x; HALF(x_h); + t = one + x * y; HALF(t); + r = -((x_h - x) * y - (x_h * y - (t - one))); + s_l = -v * (s_h * r - (w - s_h * t)); + z_l = (s_h * s_h - z_h); + zz[0] = z; + zz[1] = z_h; + zz[2] = z_l + s_l * (s + s_h); + /* compute (1+z*(q1+z*(q2+e))) by call mx_poly */ + mx_polyl(zz, qe, ee, 2); + v = s_h * ee[0]; + t = TBL_atan_lol[j] + (s * ee[1] + s_l * ee[0]); + u = TBL_atan_hil[j]; + s = u + v; + t += (v - (s - u)); + w = s + t; + *err = t - (w - s); + if (hx < 0) { + w = -w; + *err = -*err; + } + return (w); +} + +long double +__k_atan2l(long double y, long double x, long double *w) { + long double t, xh, th, t1, t2, w1, w2; + int ix, iy, hx, hy; + + hy = HI_XWORD(y); + hx = HI_XWORD(x); + iy = hy & ~0x80000000; + ix = hx & ~0x80000000; + + *w = 0.0; + if (ix >= 0x7fff0000 || iy >= 0x7fff0000) { /* ignore inexact */ + if (isnanl(x) || isnanl(y)) + return (x * y); + else if (iy < 0x7fff0000) { + if (hx >= 0) { /* ATAN2(+-finite, +inf) is +-0 */ + *w *= y; + return (*w); + } else { /* ATAN2(+-finite, -inf) is +-pi */ + *w = copysignl(pi_lo, y); + return (copysignl(pi, y)); + } + } else if (ix < 0x7fff0000) { + /* ATAN2(+-inf, finite) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (hx > 0) { /* ATAN2(+-INF,+INF) = +-pi/4 */ + *w = (hy >= 0)? pio4_lo : -pio4_lo; + return ((hy >= 0)? pio4 : -pio4); + } else { /* ATAN2(+-INF,-INF) = +-3pi/4 */ + *w = (hy >= 0)? pi3o4_lo : -pi3o4_lo; + return ((hy >= 0)? pi3o4 : -pi3o4); + } + } else if (x == zero || y == zero) { + if (y == zero) { + if (hx >= 0) /* ATAN2(+-0, +(0 <= x <= inf)) is +-0 */ + return (y); + else { /* ATAN2(+-0, -(0 <= x <= inf)) is +-pi */ + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } + } else { /* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } + } else if (iy - ix > 0x00640000) { /* |x/y| < 2 ** -100 */ + *w = (hy >= 0)? pio2_lo : -pio2_lo; + return ((hy >= 0)? pio2 : -pio2); + } else if (ix - iy > 0x00640000) { /* |y/x| < 2 ** -100 */ + if (hx < 0) { + *w = (hy >= 0)? pi_lo : -pi_lo; + return ((hy >= 0)? pi : -pi); + } else { + t = y / x; + th = t; HALF(th); + xh = x; HALF(xh); + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + *w = (t2 - t1) / x; + return (t); + } + } else { + if (ix >= 0x5fff3000) { + x *= twom8700; + y *= twom8700; + } else if (ix < 0x203d0000) { + x *= two8700; + y *= two8700; + } + y = fabsl(y); + x = fabsl(x); + t = y / x; + th = t; HALF(th); + xh = x; HALF(xh); + t1 = (x - xh) * t + xh * (t - th); + t2 = y - xh * th; + w1 = mx_atanl(t, &w2); + w2 += (t2 - t1) / (x + y * t); + if (hx < 0) { + t1 = pi - w1; + t2 = pi - t1; + w2 = (pi_lo - w2) - (w1 - t2); + w1 = t1; + } + *w = (hy >= 0)? w2 : -w2; + return ((hy >= 0)? w1 : -w1); + } +} diff --git a/usr/src/libm/src/complex/k_cexp.c b/usr/src/libm/src/complex/k_cexp.c new file mode 100644 index 0000000..a219e99 --- /dev/null +++ b/usr/src/libm/src/complex/k_cexp.c @@ -0,0 +1,179 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)k_cexp.c 1.3 06/01/31 SMI" + +/* INDENT OFF */ +/* + * double __k_cexp(double x, int *n); + * Returns the exponential of x in the form of 2**n * y, y=__k_cexp(x,&n). + * + * Method + * 1. Argument reduction: + * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2. + * + * Here r will be represented as r = hi-lo for better + * accuracy. + * + * 2. Approximation of exp(r) by a special rational function on + * the interval [0,0.34658]: + * Write + * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... + * We use a special Remez algorithm on [0,0.34658] to generate + * a polynomial of degree 5 to approximate R. The maximum error + * of this polynomial approximation is bounded by 2**-59. In + * other words, + * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 + * (where z=r*r, and the values of P1 to P5 are listed below) + * and + * | 5 | -59 + * | 2.0+P1*z+...+P5*z - R(z) | <= 2 + * | | + * The computation of exp(r) thus becomes + * 2*r + * exp(r) = 1 + ------- + * R - r + * r*R1(r) + * = 1 + r + ----------- (for better accuracy) + * 2 - R1(r) + * where + * 2 4 10 + * R1(r) = r - (P1*r + P2*r + ... + P5*r ). + * + * 3. Return n = k and __k_cexp = exp(r). + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) is 0, and + * for finite argument, only exp(0)=1 is exact. + * + * Range and Accuracy: + * When |x| is really big, say |x| > 50000, the accuracy + * is not important because the ultimate result will over or under + * flow. So we will simply replace n = 50000 and r = 0.0. For + * moderate size x, according to an error analysis, the error is + * always less than 1 ulp (unit in the last place). + * + * Constants: + * The hexadecimal values are the intended ones for the following + * constants. The decimal values may be used, provided that the + * compiler will convert from decimal to binary accurately enough + * to produce the hexadecimal values shown. + */ +/* INDENT ON */ + +#include "libm.h" /* __k_cexp */ +#include "complex_wrapper.h" /* HI_WORD/LO_WORD */ + +/* INDENT OFF */ +static const double +one = 1.0, +two128 = 3.40282366920938463463e+38, +halF[2] = { + 0.5, -0.5, +}, +ln2HI[2] = { + 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ + -6.93147180369123816490e-01, /* 0xbfe62e42, 0xfee00000 */ +}, +ln2LO[2] = { + 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ + -1.90821492927058770002e-10, /* 0xbdea39ef, 0x35793c76 */ +}, +invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ +P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ +P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ +P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ +P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ +P5 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ +/* INDENT ON */ + +double +__k_cexp(double x, int *n) { + double hi, lo, c, t; + int k, xsb; + unsigned hx, lx; + + hx = HI_WORD(x); /* high word of x */ + lx = LO_WORD(x); /* low word of x */ + xsb = (hx >> 31) & 1; /* sign bit of x */ + hx &= 0x7fffffff; /* high word of |x| */ + + /* filter out non-finite argument */ + if (hx >= 0x40e86a00) { /* if |x| > 50000 */ + if (hx >= 0x7ff00000) { + *n = 1; + if (((hx & 0xfffff) | lx) != 0) + return (x + x); /* NaN */ + else + return ((xsb == 0) ? x : 0.0); + /* exp(+-inf)={inf,0} */ + } + *n = (xsb == 0) ? 50000 : -50000; + return (one + ln2LO[1] * ln2LO[1]); /* generate inexact */ + } + + *n = 0; + /* argument reduction */ + if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ + if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ + hi = x - ln2HI[xsb]; + lo = ln2LO[xsb]; + k = 1 - xsb - xsb; + } else { + k = (int) (invln2 * x + halF[xsb]); + t = k; + hi = x - t * ln2HI[0]; + /* t*ln2HI is exact for t<2**20 */ + lo = t * ln2LO[0]; + } + x = hi - lo; + *n = k; + } else if (hx < 0x3e300000) { /* when |x|<2**-28 */ + return (one + x); + } else + k = 0; + + /* x is now in primary range */ + t = x * x; + c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); + if (k == 0) + return (one - ((x * c) / (c - 2.0) - x)); + else { + t = one - ((lo - (x * c) / (2.0 - c)) - hi); + if (k > 128) { + t *= two128; + *n = k - 128; + } else if (k > 0) { + HI_WORD(t) += (k << 20); + *n = 0; + } + return (t); + } +} diff --git a/usr/src/libm/src/complex/k_cexpl.c b/usr/src/libm/src/complex/k_cexpl.c new file mode 100644 index 0000000..25b7eb2 --- /dev/null +++ b/usr/src/libm/src/complex/k_cexpl.c @@ -0,0 +1,282 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)k_cexpl.c 1.4 06/01/31 SMI" + +/* INDENT OFF */ +/* + * long double __k_cexpl(long double x, int *n); + * Returns the exponential of x in the form of 2**n * y, y=__k_cexpl(x,&n). + * + * 1. Argument Reduction: given the input x, find r and integer k + * and j such that + * x = (32k+j)*ln2 + r, |r| <= (1/64)*ln2 . + * + * 2. expl(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r)) + * Note: + * a. expm1(r) = (2r)/(2-R), R = r - r^2*(t1 + t2*r^2) + * b. 2^(j/32) is represented as + * exp2_32_hi[j]+exp2_32_lo[j] + * where + * exp2_32_hi[j] = 2^(j/32) rounded + * exp2_32_lo[j] = 2^(j/32) - exp2_32_hi[j]. + * + * Special cases: + * expl(INF) is INF, expl(NaN) is NaN; + * expl(-INF)= 0; + * for finite argument, only expl(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * an ulp (unit in the last place). + * + * Misc. info. + * When |x| is really big, say |x| > 1000000, the accuracy + * is not important because the ultimate result will over or under + * flow. So we will simply replace n = 1000000 and r = 0.0. For + * moderate size x, according to an error analysis, the error is + * always less than 1 ulp (unit in the last place). + * + * Constants: + * Only decimal values are given. We assume that the compiler will convert + * from decimal to binary accurately enough to produce the correct + * hexadecimal values. + */ +/* INDENT ON */ + +#include "libm.h" /* __k_cexpl */ +#include "complex_wrapper.h" /* HI_XWORD */ + +/* INDENT OFF */ +/* ln2/32 = 0.0216608493924982909192885037955680177523593791987579766912713 */ +#if defined(__i386) +static const long double + /* 43 significant bits, 21 trailing zeros */ +ln2_32hi = 2.166084939249657281834515742957592010498046875e-2L, +ln2_32lo = 1.7181009433463659920976473789104487579766912713e-15L; +static const long double exp2_32_hi[] = { /* exp2_32[j] = 2^(j/32) */ + 1.0000000000000000000000000e+00L, + 1.0218971486541166782081522e+00L, + 1.0442737824274138402382006e+00L, + 1.0671404006768236181297224e+00L, + 1.0905077326652576591003302e+00L, + 1.1143867425958925362894369e+00L, + 1.1387886347566916536971221e+00L, + 1.1637248587775775137938619e+00L, + 1.1892071150027210666875674e+00L, + 1.2152473599804688780476325e+00L, + 1.2418578120734840485256747e+00L, + 1.2690509571917332224885722e+00L, + 1.2968395546510096659215822e+00L, + 1.3252366431597412945939118e+00L, + 1.3542555469368927282668852e+00L, + 1.3839098819638319548151403e+00L, + 1.4142135623730950487637881e+00L, + 1.4451808069770466200253470e+00L, + 1.4768261459394993113155431e+00L, + 1.5091644275934227397133885e+00L, + 1.5422108254079408235859630e+00L, + 1.5759808451078864864006862e+00L, + 1.6104903319492543080837174e+00L, + 1.6457554781539648445110730e+00L, + 1.6817928305074290860378350e+00L, + 1.7186192981224779156032914e+00L, + 1.7562521603732994831094730e+00L, + 1.7947090750031071864148413e+00L, + 1.8340080864093424633989166e+00L, + 1.8741676341102999013002103e+00L, + 1.9152065613971472938202589e+00L, + 1.9571441241754002689657438e+00L, +}; +static const long double exp2_32_lo[] = { + 0.0000000000000000000000000e+00L, + 2.6327965667180882569382524e-20L, + 8.3765863521895191129661899e-20L, + 3.9798705777454504249209575e-20L, + 1.0668046596651558640993042e-19L, + 1.9376009847285360448117114e-20L, + 6.7081819456112953751277576e-21L, + 1.9711680502629186462729727e-20L, + 2.9932584438449523689104569e-20L, + 6.8887754153039109411061914e-20L, + 6.8002718741225378942847820e-20L, + 6.5846917376975403439742349e-20L, + 1.2171958727511372194876001e-20L, + 3.5625253228704087115438260e-20L, + 3.1129551559077560956309179e-20L, + 5.7519192396164779846216492e-20L, + 3.7900651177865141593101239e-20L, + 1.1659262405698741798080115e-20L, + 7.1364385105284695967172478e-20L, + 5.2631003710812203588788949e-20L, + 2.6328853788732632868460580e-20L, + 5.4583950085438242788190141e-20L, + 9.5803254376938269960718656e-20L, + 7.6837733983874245823512279e-21L, + 2.4415965910835093824202087e-20L, + 2.6052966871016580981769728e-20L, + 2.6876456344632553875309579e-21L, + 1.2861930155613700201703279e-20L, + 8.8166633394037485606572294e-20L, + 2.9788615389580190940837037e-20L, + 5.2352341619805098677422139e-20L, + 5.2578463064010463732242363e-20L, +}; +#else /* sparc */ +static const long double + /* 0x3FF962E4 2FEFA39E F35793C7 00000000 */ +ln2_32hi = 2.166084939249829091928849858592451515688e-2L, +ln2_32lo = 5.209643502595475652782654157501186731779e-27L; +static const long double exp2_32_hi[] = { /* exp2_32[j] = 2^(j/32) */ + 1.000000000000000000000000000000000000000e+0000L, + 1.021897148654116678234480134783299439782e+0000L, + 1.044273782427413840321966478739929008785e+0000L, + 1.067140400676823618169521120992809162607e+0000L, + 1.090507732665257659207010655760707978993e+0000L, + 1.114386742595892536308812956919603067800e+0000L, + 1.138788634756691653703830283841511254720e+0000L, + 1.163724858777577513813573599092185312343e+0000L, + 1.189207115002721066717499970560475915293e+0000L, + 1.215247359980468878116520251338798457624e+0000L, + 1.241857812073484048593677468726595605511e+0000L, + 1.269050957191733222554419081032338004715e+0000L, + 1.296839554651009665933754117792451159835e+0000L, + 1.325236643159741294629537095498721674113e+0000L, + 1.354255546936892728298014740140702804343e+0000L, + 1.383909881963831954872659527265192818002e+0000L, + 1.414213562373095048801688724209698078570e+0000L, + 1.445180806977046620037006241471670905678e+0000L, + 1.476826145939499311386907480374049923924e+0000L, + 1.509164427593422739766019551033193531420e+0000L, + 1.542210825407940823612291862090734841307e+0000L, + 1.575980845107886486455270160181905008906e+0000L, + 1.610490331949254308179520667357400583459e+0000L, + 1.645755478153964844518756724725822445667e+0000L, + 1.681792830507429086062250952466429790080e+0000L, + 1.718619298122477915629344376456312504516e+0000L, + 1.756252160373299483112160619375313221294e+0000L, + 1.794709075003107186427703242127781814354e+0000L, + 1.834008086409342463487083189588288856077e+0000L, + 1.874167634110299901329998949954446534439e+0000L, + 1.915206561397147293872611270295830887850e+0000L, + 1.957144124175400269018322251626871491190e+0000L, +}; + +static const long double exp2_32_lo[] = { + +0.000000000000000000000000000000000000000e+0000L, + +1.805067874203309547455733330545737864651e-0035L, + -9.374520292280427421957567419730832143843e-0035L, + -1.596968447292758770712909630231499971233e-0035L, + +9.112493410125022978511686101672486662119e-0035L, + -6.504228206978548287230374775259388710985e-0035L, + -8.148468844525851137325691767488155323605e-0035L, + -5.066214576721800313372330745142903350963e-0035L, + -1.359830974688816973749875638245919118924e-0035L, + +9.497427635563196470307710566433246597109e-0035L, + -3.283170523176998601615065965333915261932e-0036L, + -5.017235709387190410290186530458428950862e-0035L, + -2.391474797689109171622834301602640139258e-0035L, + -8.350571357633908815298890737944083853080e-0036L, + +7.036756889073265042421737190671876440729e-0035L, + -5.182484853064646457536893018566956189817e-0035L, + +9.422242548621832065692116736394064879758e-0035L, + -3.967500825398862309167306130216418281103e-0035L, + +7.143528991563300614523273615092767243521e-0035L, + +1.159871252867985124246517834100444327747e-0035L, + +4.696933478358115495309739213201874466685e-0035L, + -3.386513175995004710799241984999819165197e-0035L, + -8.587318774298247068868655935103874453522e-0035L, + -9.605951548749350503185499362246069088835e-0035L, + +9.609733932128012784507558697141785813655e-0035L, + +6.378397921440028439244761449780848545957e-0035L, + +7.792430785695864249456461125169277701177e-0035L, + +7.361337767588456524131930836633932195088e-0035L, + -6.472995147913347230035214575612170525266e-0035L, + +8.587474417953698694278798062295229624207e-0035L, + +2.371815422825174835691651228302690977951e-0035L, + -3.026891682096118773004597373421900314256e-0037L, +}; +#endif + +static const long double + one = 1.0L, + two = 2.0L, + ln2_64 = 1.083042469624914545964425189778400898568e-2L, + invln2_32 = 4.616624130844682903551758979206054839765e+1L; + +/* rational approximation coeffs for [-(ln2)/64,(ln2)/64] */ +static const long double + t1 = 1.666666666666666666666666666660876387437e-1L, + t2 = -2.777777777777777777777707812093173478756e-3L, + t3 = 6.613756613756613482074280932874221202424e-5L, + t4 = -1.653439153392139954169609822742235851120e-6L, + t5 = 4.175314851769539751387852116610973796053e-8L; +/* INDENT ON */ + +long double +__k_cexpl(long double x, int *n) { + int hx, ix, j, k; + long double t, r; + + *n = 0; + hx = HI_XWORD(x); + ix = hx & 0x7fffffff; + if (hx >= 0x7fff0000) + return (x + x); /* NaN of +inf */ + if (((unsigned) hx) >= 0xffff0000) + return (-one / x); /* NaN or -inf */ + if (ix < 0x3fc30000) + return (one + x); /* |x|<2^-60 */ + if (hx > 0) { + if (hx > 0x401086a0) { /* x > 200000 */ + *n = 200000; + return (one); + } + k = (int) (invln2_32 * (x + ln2_64)); + } else { + if (ix > 0x401086a0) { /* x < -200000 */ + *n = -200000; + return (one); + } + k = (int) (invln2_32 * (x - ln2_64)); + } + j = k & 0x1f; + *n = k >> 5; + t = (long double) k; + x = (x - t * ln2_32hi) - t * ln2_32lo; + t = x * x; + r = (x - t * (t1 + t * (t2 + t * (t3 + t * (t4 + t * t5))))) - two; + x = exp2_32_hi[j] - ((exp2_32_hi[j] * (x + x)) / r - exp2_32_lo[j]); + k >>= 5; + if (k > 240) { + XFSCALE(x, 240); + *n -= 240; + } else if (k > 0) { + XFSCALE(x, k); + *n = 0; + } + return (x); +} diff --git a/usr/src/libm/src/complex/k_clog_r.c b/usr/src/libm/src/complex/k_clog_r.c new file mode 100644 index 0000000..08e48ce --- /dev/null +++ b/usr/src/libm/src/complex/k_clog_r.c @@ -0,0 +1,411 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)k_clog_r.c 1.7 06/01/23 SMI" + +#include "libm.h" /* __k_clog_r */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +/* + * double __k_clog_r(double x, double y, double *e); + * + * Compute real part of complex natural logarithm of x+iy in extra precision + * + * __k_clog_r returns log(hypot(x, y)) with a correction term e. + * + * Accuracy: 70 bits + * + * Method. + * Let Z = x*x + y*y. Z can be normalized as Z = 2^N * z, 1 <= z < 2. + * We further break down z into 1 + zk + zh + zt, where + * zk = K*(2^-7) matches z to 7.5 significant bits, 0 <= K <= 2^(-7)-1 + * zh = (z-zk) rounded to 24 bits + * zt = (z-zk-zh) rounded. + * + * z - (1+zk) (zh+zt) + * Let s = ------------ = ---------------, then + * z + (1+zk) 2(1+zk)+zh+zt + * z + * log(Z) = N*log2 + log(z) = N*log2 + log(1+zk) + log(------) + * 1+zk + * 1+s + * = N*log2 + log(1+zk) + log(---) + * 1-s + * + * 1 3 1 5 + * = N*log2 + log(1+zk) + 2s + -- (2s) + -- (2s) + ... + * 12 80 + * + * Note 1. For IEEE double precision, a seven degree odd polynomial + * 2s + P1*(2s)^3 + P2*(2s)^5 + P3*(2s)^7 + * is generated by a special remez algorithm to + * approx log((1+s)/(1-s)) accurte to 72 bits. + * Note 2. 2s can be computed accurately as s2h+s2t by + * r = 2/((zh+zt)+2(1+zk)) + * s2 = r*(zh+zt) + * s2h = s2 rounded to float; v = 0.5*s2h; + * s2t = r*((((zh-s2h*(1+zk))-v*zh)+zt)-v*zt) + */ +/* INDENT ON */ + +static const double +zero = 0.0, +half = 0.5, +two = 2.0, +two120 = 1.32922799578491587290e+36, /* 2^120 */ +ln2_h = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ +ln2_t = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ +P1 = .083333333333333351554108717377986202224765262191125, +P2 = .01249999999819227552330700574633767185896464873834375, +P3 = .0022321938458645656605471559987512516234702284287265625; + +/* +* T[2k, 2k+1] = log(1+k*2^-7) for k = 0, ..., 2^7 - 1, +* with T[2k] * 2^40 is an int +*/ + +static const double TBL_log1k[] = { +0.00000000000000000000e+00, 0.00000000000000000000e+00, +7.78214044203195953742e-03, 2.29894100462035112076e-14, +1.55041865355087793432e-02, 4.56474807636434698847e-13, +2.31670592811497044750e-02, 3.84673753843363762372e-13, +3.07716586667083902285e-02, 4.52981425779092882775e-14, +3.83188643018002039753e-02, 3.36395218465265063278e-13, +4.58095360309016541578e-02, 3.92549008891706208826e-13, +5.32445145181554835290e-02, 6.56799336898521766515e-13, +6.06246218158048577607e-02, 6.29984819938331143924e-13, +6.79506619080711971037e-02, 4.36552290856295281946e-13, +7.52234212368421140127e-02, 7.45411685916941618656e-13, +8.24436692109884461388e-02, 8.61451293608781447223e-14, +8.96121586893059429713e-02, 3.81189648692113819551e-13, +9.67296264579999842681e-02, 5.51128027471986918274e-13, +1.03796793680885457434e-01, 7.58107392301637643358e-13, +1.10814366339582193177e-01, 7.07921017612766061755e-13, +1.17783035655520507134e-01, 8.62947404296943765415e-13, +1.24703478500123310369e-01, 8.33925494898414856118e-13, +1.31576357788617315236e-01, 1.01957352237084734958e-13, +1.38402322858382831328e-01, 7.36304357708705134617e-13, +1.45182009843665582594e-01, 8.32314688404647202319e-13, +1.51916042025732167531e-01, 1.09807540998552379211e-13, +1.58605030175749561749e-01, 8.89022343972466269900e-13, +1.65249572894936136436e-01, 3.71026439894104998399e-13, +1.71850256926518341061e-01, 1.40881279371111350341e-13, +1.78407657472234859597e-01, 5.83437522462346671423e-13, +1.84922338493379356805e-01, 6.32635858668445232946e-13, +1.91394852999110298697e-01, 5.19155912393432989209e-13, +1.97825743329303804785e-01, 6.16075577558872326221e-13, +2.04215541428311553318e-01, 3.79338185766902218086e-13, +2.10564769106895255391e-01, 4.54382278998146218219e-13, +2.16873938300523150247e-01, 9.12093724991498410553e-14, +2.23143551314024080057e-01, 1.85675709597960106615e-13, +2.29374101064422575291e-01, 4.23254700234549300166e-13, +2.35566071311950508971e-01, 8.16400106820959292914e-13, +2.41719936886511277407e-01, 6.33890736899755317832e-13, +2.47836163904139539227e-01, 4.41717553713155466566e-13, +2.53915209980732470285e-01, 2.30973852175869394892e-13, +2.59957524436686071567e-01, 2.39995404842117353465e-13, +2.65963548496984003577e-01, 1.53937761744554075681e-13, +2.71933715483100968413e-01, 5.40790418614551497411e-13, +2.77868451003087102436e-01, 3.69203750820800887027e-13, +2.83768173129828937817e-01, 8.15660529536291275782e-13, +2.89633292582948342897e-01, 9.43339818951269030846e-14, +2.95464212893421063200e-01, 4.14813187042585679830e-13, +3.01261330577290209476e-01, 8.71571536970835103739e-13, +3.07025035294827830512e-01, 8.40315630479242455758e-14, +3.12755710003330023028e-01, 5.66865358290073900922e-13, +3.18453731118097493891e-01, 4.37121919574291444278e-13, +3.24119468653407238889e-01, 8.04737201185162774515e-13, +3.29753286371669673827e-01, 7.98307987877335024112e-13, +3.35355541920762334485e-01, 3.75495772572598557174e-13, +3.40926586970454081893e-01, 1.39128412121975659358e-13, +3.46466767346100823488e-01, 1.07757430375726404546e-13, +3.51976423156884266064e-01, 2.93918591876480007730e-13, +3.57455888921322184615e-01, 4.81589611172320539489e-13, +3.62905493689140712377e-01, 2.27740761140395561986e-13, +3.68325561158599157352e-01, 1.08495696229679121506e-13, +3.73716409792905324139e-01, 6.78756682315870616582e-13, +3.79078352934811846353e-01, 1.57612037739694350287e-13, +3.84411698910298582632e-01, 3.34571026954408237380e-14, +3.89716751139530970249e-01, 4.94243121138567024911e-13, +3.94993808240542421117e-01, 3.26556988969071456956e-13, +4.00243164126550254878e-01, 4.62452051668403792833e-13, +4.05465108107819105498e-01, 3.45276479520397708744e-13, +4.10659924984429380856e-01, 8.39005077851830734139e-13, +4.15827895143593195826e-01, 1.17769787513692141889e-13, +4.20969294643327884842e-01, 8.01751287156832458079e-13, +4.26084395310681429692e-01, 2.18633432932159103190e-13, +4.31173464818130014464e-01, 2.41326394913331314894e-13, +4.36236766774527495727e-01, 3.90574622098307022265e-13, +4.41274560804231441580e-01, 6.43787909737320689684e-13, +4.46287102628048160113e-01, 3.71351419195920213229e-13, +4.51274644138720759656e-01, 7.37825488412103968058e-13, +4.56237433480964682531e-01, 6.22911850193784704748e-13, +4.61175715121498797089e-01, 6.71369279138460114513e-13, +4.66089729924533457961e-01, 6.57665976858006147528e-14, +4.70979715218163619284e-01, 6.27393263311115598424e-13, +4.75845904869856894948e-01, 1.07019317621142549209e-13, +4.80688529345570714213e-01, 1.81193463664411114729e-13, +4.85507815781602403149e-01, 9.84046527823262695501e-14, +4.90303988044615834951e-01, 5.78003198945402769376e-13, +4.95077266797125048470e-01, 7.26466128212511528295e-13, +4.99827869555701909121e-01, 7.47420700205478712293e-13, +5.04556010751912253909e-01, 4.83033149495532022300e-13, +5.09261901789614057634e-01, 1.93889170049107088943e-13, +5.13945751101346104406e-01, 8.88212395185718544720e-13, +5.18607764207445143256e-01, 6.00488896640545761201e-13, +5.23248143764249107335e-01, 2.98729182044413286731e-13, +5.27867089620485785417e-01, 3.56599696633478298092e-13, +5.32464798869114019908e-01, 3.57823965912763837621e-13, +5.37041465896436420735e-01, 4.47233831757482468946e-13, +5.41597282432121573947e-01, 6.22797629172251525649e-13, +5.46132437597407260910e-01, 7.28389472720657362987e-13, +5.50647117952394182794e-01, 2.68096466152116723636e-13, +5.55141507539701706264e-01, 7.99886451312335479470e-13, +5.59615787935399566777e-01, 2.31194938380053776320e-14, +5.64070138284478161950e-01, 3.24804121719935740729e-13, +5.68504735351780254859e-01, 8.88457219261483317716e-13, +5.72919753561109246220e-01, 6.76262872317054154667e-13, +5.77315365034337446559e-01, 4.86157758891509033842e-13, +5.81691739634152327199e-01, 4.70155322075549811780e-13, +5.86049045003164792433e-01, 4.13416470738355643357e-13, +5.90387446602107957006e-01, 6.84176364159146659095e-14, +5.94707107746216934174e-01, 4.75855340044306376333e-13, +5.99008189645246602595e-01, 8.36796786747576938145e-13, +6.03290851438032404985e-01, 5.18573553063418286042e-14, +6.07555250224322662689e-01, 2.19132812293400917731e-13, +6.11801541105705837253e-01, 2.87066276408616768331e-13, +6.16029877214714360889e-01, 7.99658758518543977451e-13, +6.20240409751204424538e-01, 6.53104313776336534177e-13, +6.24433288011459808331e-01, 4.33692711555820529733e-13, +6.28608659421843185555e-01, 5.30952189118357790115e-13, +6.32766669570628437214e-01, 4.09392332186786656392e-13, +6.36907462236194987781e-01, 8.74243839148582888557e-13, +6.41031179420679109171e-01, 2.52181884568428814231e-13, +6.45137961372711288277e-01, 8.73413388168702670246e-13, +6.49227946624705509748e-01, 4.04309142530119209805e-13, +6.53301272011958644725e-01, 7.86994033233553225797e-13, +6.57358072708120744210e-01, 2.39285932153437645135e-13, +6.61398482245203922503e-01, 1.61085757539324585156e-13, +6.65422632544505177066e-01, 5.85271884362515112697e-13, +6.69430653942072240170e-01, 5.57027128793880294600e-13, +6.73422675211440946441e-01, 7.25773856816637653180e-13, +6.77398823590920073912e-01, 8.86066898134949155668e-13, +6.81359224807238206267e-01, 6.64862680714687006264e-13, +6.85304003098281100392e-01, 6.38316151706465171657e-13, +6.89233281238557538018e-01, 2.51442307283760746611e-13, +}; + +/* + * Compute N*log2 + log(1+zk+zh+zt) in extra precision + */ +static double k_log_NKz(int N, int K, double zh, double *zt) +{ + double y, r, w, s2, s2h, s2t, t, zk, v, P; + + ((int *)&zk)[HIWORD] = 0x3ff00000 + (K << 13); + ((int *)&zk)[LOWORD] = 0; + t = zh + (*zt); + r = two / (t + two * zk); + s2h = s2 = r * t; + ((int *)&s2h)[LOWORD] &= 0xe0000000; + v = half * s2h; + w = s2 * s2; + s2t = r * ((((zh - s2h * zk) - v * zh) + (*zt)) - v * (*zt)); + P = s2t + (w * s2) * ((P1 + w * P2) + (w * w) * P3); + P += N * ln2_t + TBL_log1k[K + K + 1]; + t = N*ln2_h + TBL_log1k[K+K]; + y = t + (P + s2h); + P -= ((y - t) - s2h); + *zt = P; + return (y); +} + +double +__k_clog_r(double x, double y, double *er) +{ + double t1, t2, t3, t4, tk, z, wh, w, zh, zk; + int n, k, ix, iy, iz, nx, ny, nz, i, j; + unsigned lx, ly; + + ix = (((int *)&x)[HIWORD]) & ~0x80000000; + lx = ((unsigned *)&x)[LOWORD]; + iy = (((int *)&y)[HIWORD]) & ~0x80000000; + ly = ((unsigned *)&y)[LOWORD]; + y = fabs(y); x = fabs(x); + if (ix < iy || (ix == iy && lx < ly)) { /* force x >= y */ + tk = x; x = y; y = tk; + n = ix, ix = iy; iy = n; + n = lx, lx = ly; ly = n; + } + *er = zero; + nx = ix >> 20; ny = iy >> 20; + if (nx >= 0x7ff) { /* x or y is Inf or NaN */ + if (ISINF(ix, lx)) + return (x); + else if (ISINF(iy, ly)) + return (y); + else + return (x+y); + } +/* + * for tiny y (double y < 2^-35, extended y < 2^-46, quad y < 2^-70): + * log(sqrt(1+y^2)) = (y^2)/2 - (y^4)/8 + ... ~= (y^2)/2 + */ + if ((((ix - 0x3ff00000) | lx) == 0) && ny < (0x3ff - 35)) { + t2 = y * y; + if (ny >= 565) { /* compute er = tail of t2 */ + ((int *)&wh)[HIWORD] = iy; + ((unsigned *)&wh)[LOWORD] = ly & 0xf8000000; + *er = half * ((y - wh) * (y + wh) - (t2 - wh * wh)); + } + return (half * t2); + } +/* + * x or y is subnormal or zero + */ + if (nx == 0) { + if ((ix | lx) == 0) + return (-1.0 / x); + else { + x *= two120; + y *= two120; + ix = ((int *)&x)[HIWORD]; + lx = ((unsigned *)&x)[LOWORD]; + iy = ((int *)&y)[HIWORD]; + ly = ((unsigned *)&y)[LOWORD]; + nx = (ix >> 20) - 120; + ny = (iy >> 20) - 120; + /* guard subnormal flush to 0 */ + if ((ix | lx) == 0) + return (-1.0 / x); + } + } else if (ny == 0) { /* y subnormal, scale it */ + y *= two120; + iy = ((int *)&y)[HIWORD]; + ly = ((unsigned *)&y)[LOWORD]; + ny = (iy >> 20) - 120; + } + n = nx - ny; +/* + * return log(x) when y is zero or x >> y so that + * log(x) ~ log(sqrt(x*x+y*y)) to 27 extra bits + * (n > 62 for double, 78 for i386 extended, 122 for quad) + */ + if (n > 62 || (iy | ly) == 0) { + i = (0x000fffff & ix) | 0x3ff00000; /* normalize x */ + ((int *)&x)[HIWORD] = i; + i += 0x1000; + ((int *)&zk)[HIWORD] = i & 0xffffe000; + ((int *)&zk)[LOWORD] = 0; /* zk matches 7.5 bits of x */ + z = x - zk; + zh = (double)((float)z); + i >>= 13; + k = i & 0x7f; /* index of zk */ + n = nx - 0x3ff; + *er = z - zh; + if (i >> 17) { /* if zk = 2.0, adjust scaling */ + n += 1; + zh *= 0.5; *er *= 0.5; + } + w = k_log_NKz(n, k, zh, er); + } else { +/* + * compute z = x*x + y*y + */ + ix = (ix & 0xfffff) | 0x3ff00000; + iy = (iy & 0xfffff) | (0x3ff00000 - (n << 20)); + ((int *)&x)[HIWORD] = ix; ((int *)&y)[HIWORD] = iy; + t1 = x * x; t2 = y * y; + j = ((lx >> 26) + 1) >> 1; + ((int *)&wh)[HIWORD] = ix + (j >> 5); + ((unsigned *)&wh)[LOWORD] = (j << 27); + z = t1+t2; +/* + * higher precision simulation x*x = t1 + t3, y*y = t2 + t4 + */ + tk = wh - x; + t3 = tk * tk - (two * wh * tk - (wh * wh - t1)); + j = ((ly >> 26) + 1) >> 1; + ((int *)&wh)[HIWORD] = iy + (j >> 5); + ((unsigned *)&wh)[LOWORD] = (j << 27); + tk = wh - y; + t4 = tk * tk - (two * wh * tk - (wh * wh - t2)); +/* + * find zk matches z to 7.5 bits + */ + nx -= 0x3ff; + iz = ((int *)&z)[HIWORD] + 0x1000; + k = (iz >> 13) & 0x7f; + nz = (iz >> 20) - 0x3ff; + ((int *)&zk)[HIWORD] = iz & 0xffffe000; + ((int *)&zk)[LOWORD] = 0; +/* + * order t1,t2,t3,t4 according to their size + */ + if (t2 >= fabs(t3)) { + if (fabs(t3) < fabs(t4)) { + wh = t3; t3 = t4; t4 = wh; + } + } else { + wh = t2; t2 = t3; t3 = wh; + } +/* + * higher precision simulation: x * x + y * y = t1 + t2 + t3 + t4 + * = zk (7 bits) + zh (24 bits) + *er (tail) and call k_log_NKz + */ + tk = t1 - zk; + zh = ((tk + t2) + t3) + t4; + ((int *)&zh)[LOWORD] &= 0xe0000000; + w = fabs(zh); + if (w >= fabs(t2)) + *er = (((tk - zh) + t2) + t3) + t4; + else { + if (n == 0) { + wh = half * zk; + wh = (t1 - wh) - (wh - t2); + } else + wh = tk + t2; + if (w >= fabs(t3)) + *er = ((wh - zh) + t3) + t4; + else { + z = t3; + t3 += t4; + t4 -= t3 - z; + if (w >= fabs(t3)) + *er = ((wh - zh) + t3) + t4; + else + *er = ((wh + t3) - zh) + t4; + } + } + if (nz == 3) {zh *= 0.125; *er *= 0.125; } + if (nz == 2) {zh *= 0.25; *er *= 0.25; } + if (nz == 1) {zh *= half; *er *= half; } + nz += nx + nx; + w = half * k_log_NKz(nz, k, zh, er); + *er *= half; + } + return (w); +} diff --git a/usr/src/libm/src/complex/k_clog_rl.c b/usr/src/libm/src/complex/k_clog_rl.c new file mode 100644 index 0000000..e02f6f2 --- /dev/null +++ b/usr/src/libm/src/complex/k_clog_rl.c @@ -0,0 +1,620 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)k_clog_rl.c 1.4 06/01/23 SMI" + +#include "libm.h" /* __k_clog_rl */ +#include "complex_wrapper.h" + +/* INDENT OFF */ +/* + * long double __k_clog_rl(long double x, long double y, long double *e); + * + * Compute real part of complex natural logarithm of x+iy in extra precision + * + * __k_clog_rl returns log(hypot(x, y)) with a correction term e. + * + * Accuracy: quad 140 bits, intel extended 91 bits. + * + * Method. + * Assume X > Y >= 0 . Let X = 2**nx * x, Y = 2**nx * y, where 1 <= x < 2. + * Let Z = X*X + Y*Y. Then Z = 2**(nx+nx) * z, where z = x*x + y*y. + * Note that z < 8. + * Let Z = x*x + y*y. Z can be normalized as Z = 2**N * z, 1 <= z < 2. + * We further break down z into 1 + zk + zh + zt, where + * zk = K*(2**-7) matches z to 7.5 significant bits, 0 <= K <= 2**(-7)-1 + * zh = (z-zk) rounded to half of the current significant bits + * zt = (z-zk-zh) rounded. + * + * z - (1+zk) (zh+zt) + * Let s = ------------ = ---------------, then + * z + (1+zk) 2(1+zk)+zh+zt + * z + * log(Z) = N*log2 + log(z) = N*log2 + log(1+zk) + log(------) + * 1+zk + * 1+s + * = N * log2 + log(1 +zk) + log(---) + * 1-s + * + * 3 5 + * = N*log2 + log(1+zk) + 2s + 1/12(2s) + 1/80(2s) + ... + * + * + * Note 1. For IEEE double precision, a fifteen degree odd polynomial + * 2s + P1*(2s)^3 + P2*(2s)^5 + P3*(2s)^7 + ... + P7*(2s)^15 + * is generated by a special remez algorithm to + * approx log((1+s)/(1-s)) accurte to 145 bits. + * Note 2. 2s can be computed accurately as s2h+s2t by + * r = 2/((zh+zt)+2(1+zk)) + * s2 = r*(zh+zt) + * s2h = s2 rounded to double; v = 0.5*s2h; + * s2t = r*((((zh-s2h*(1+zk))-v*zh)+zt)-v*zt) + */ +/* INDENT ON */ + +static const long double +zero = 0.0L, +half = 0.5L, +two = 2.0L, +two240 = 1.7668470647783843295832975007429185158274839e+72L, /* 2^240 */ + +/* first 48 bits of ln2 */ +ln2_h = 0.693147180559943620892227045260369777679443359375L, +ln2_t = 1.68852500507619780679039605677498525525412068e-15L, +P1 = .083333333333333333333333333333333333341023785768375L, +P2 = .01249999999999999999999999999999679085402075766159375L, +P3 = .002232142857142857142857143310092047621284490564671875L, +P4 = .00043402777777777777774746781319264872413156956512109375L, +P5 = .0000887784090909101756336594019277185263940665468935546875L, +P6 = .000018780048055589639895360927834628371268354778446533203125L, +P7 = .000004069227854328982921366736003458838031087153635406494140625L; + +/* + * T[2k, 2k+1] = log(1+k*2**-7) for k = 0, ..., 2**7 - 1, + * with T[2k] * 2^48 is an int + */ + +static const long double TBL_log1k[] = { +0.0000000000000000000000000000000000000000e+00L, +0.0000000000000000000000000000000000000000e+00L, +7.7821404420532758194894995540380477905273e-03L, +1.6731279734005070987158875984584325351222e-15L, +1.5504186535963526694104075431823730468750e-02L, +1.7274567499706106231054091184928671990316e-15L, +2.3167059281533397552266251295804977416992e-02L, +9.8067653290966648493916241687661877474892e-16L, +3.0771658666751022792595904320478439331055e-02L, +2.6655784323032762937247606420524589813624e-15L, +3.8318864302134159061097307130694389343262e-02L, +2.4401326580179931029010027013316092332340e-15L, +4.5809536031292452662455616518855094909668e-02L, +1.7505042236510958082472042641283104263139e-15L, +5.3244514518809182845870964229106903076172e-02L, +3.1000199992295574218738634002122149891138e-15L, +6.0624621816433688081815489567816257476807e-02L, +1.1544987906424726040058093958345197512800e-15L, +6.7950661908504628172522643581032752990723e-02L, +3.1212220426341915966610439115772728417386e-15L, +7.5223421237584631171557703055441379547119e-02L, +2.8945270476369282210350897509258766743153e-15L, +8.2443669211073711267090402543544769287109e-02L, +8.8000106966612476303662698634483335676886e-16L, +8.9612158689686083334891009144484996795654e-02L, +1.0492850604602339995319895311151740799226e-15L, +9.6729626458550654888313147239387035369873e-02L, +4.5740725790924807640164516707244620870662e-16L, +1.0379679368164218544734467286616563796997e-01L, +1.3793787171308978090503366050174239822054e-15L, +1.1081436634028918319927470292896032333374e-01L, +9.3099553146639425160476473362380086036919e-16L, +1.1778303565638026384476688690483570098877e-01L, +3.1906940272225656860040797111813146690890e-15L, +1.2470347850095464536934741772711277008057e-01L, +2.5904940590976537504984110469214193890052e-15L, +1.3157635778871679121948545798659324645996e-01L, +2.4813692306707028899159917911012100567219e-15L, +1.3840232285911824305912887211889028549194e-01L, +8.9262619700148275890190121571708972000380e-16L, +1.4518200984449691759436973370611667633057e-01L, +9.7968756533003444764719201050911636480025e-16L, +1.5191604202583874894116888754069805145264e-01L, +3.2261306345373561864598749471119213018106e-15L, +1.5860503017663774016909883357584476470947e-01L, +8.4392427234104999681053621980394827998735e-16L, +1.6524957289530561865831259638071060180664e-01L, +1.5442172988528965297119225948270579746101e-15L, +1.7185025692665689689420105423778295516968e-01L, +2.3254458978918173643097657009894831132739e-15L, +1.7840765747281750464026117697358131408691e-01L, +7.9247913906453736065426776912520942036896e-16L, +1.8492233849401173984006163664162158966064e-01L, +2.5282384195601762803134514624610774126020e-16L, +1.9139485299962899489401024766266345977783e-01L, +4.5971528855989864541366920731297729269228e-16L, +1.9782574332991842425144568551331758499146e-01L, +1.4561111263856836438840838027526567191527e-15L, +2.0421554142868814096800633706152439117432e-01L, +2.7505358140491347148810394262840919337709e-15L, +2.1056476910734645002776233013719320297241e-01L, +3.1876417904825951583107481283088861928977e-15L, +2.1687393830061196808856038842350244522095e-01L, +2.3915305291373208450532580201045871599499e-15L, +2.2314355131420882116799475625157356262207e-01L, +9.3459830033405826094075253077304795996257e-16L, +2.2937410106484534821902343537658452987671e-01L, +4.8177245728966955534167425511952551974164e-16L, +2.3556607131276408040321257431060075759888e-01L, +2.8286743756446304426525380844720043381780e-15L, +2.4171993688714366044223424978554248809814e-01L, +1.5077020732661279714120052415509585052975e-15L, +2.4783616390458007572306087240576744079590e-01L, +1.1810575418933407573072030113600980623171e-15L, +2.5391520998096339667426946107298135757446e-01L, +4.7463053836833625309891834934881898560705e-17L, +2.5995752443692410338371701072901487350464e-01L, +1.9635883624838132961710716735786266795913e-15L, +2.6596354849713677026556979399174451828003e-01L, +1.1710735561325457988709887923652142233351e-15L, +2.7193371548364098089223261922597885131836e-01L, +7.7793943687530702031066421537496360004376e-16L, +2.7786845100345303194444568362087011337280e-01L, +3.2742419043493025311197092322146237692165e-15L, +2.8376817313064250924981024581938982009888e-01L, +2.0890970909765308649465619266075677112425e-15L, +2.8963329258304071345264674164354801177979e-01L, +1.9634262463138821209582240742801727823629e-15L, +2.9546421289383317798638017848134040832520e-01L, +2.6984003017275736237868564402005801750600e-15L, +3.0126133057816062432721082586795091629028e-01L, +1.1566856647123658045763670687640673680383e-15L, +3.0702503529490954292668902780860662460327e-01L, +2.3191484355127267712770857311812090801833e-15L, +3.1275571000389490450288576539605855941772e-01L, +1.9838833607942922604727420618882220398852e-15L, +3.1845373111853447767316538374871015548706e-01L, +1.3813708182984188944010814590398164268227e-16L, +3.2411946865421015218089451082050800323486e-01L, +1.8239097762496144793489474731253815376404e-15L, +3.2975328637246548169059678912162780761719e-01L, +2.5001238260227991620033344720809714552230e-15L, +3.3535554192113536942088103387504816055298e-01L, +2.4608362985459391180385214539620341910962e-15L, +3.4092658697059263772644044365733861923218e-01L, +5.7257864875612301758921090406373771458003e-16L, +3.4646676734620740489845047704875469207764e-01L, +1.1760200117113770182586341947822306069951e-15L, +3.5197642315717558858523261733353137969971e-01L, +2.5960702148389259075462896448369304790506e-15L, +3.5745588892180180096147523727267980575562e-01L, +1.9732645342528682246686790561260072184839e-15L, +3.6290549368936808605212718248367309570312e-01L, +3.6708569716349381675043725477739939978160e-16L, +3.6832556115870573876236448995769023895264e-01L, +1.9142858656640927085879445412821643247628e-15L, +3.7371640979358389245135185774415731430054e-01L, +1.8836966497497166619234389157276681281343e-16L, +3.7907835293496816575498087331652641296387e-01L, +1.2926358724723144934459175417385013725801e-15L, +3.8441169891033055705520382616668939590454e-01L, +1.4826795862363146014726140088145939341729e-15L, +3.8971675114002479745067830663174390792847e-01L, +4.1591978529737177695912258866565331189698e-16L, +3.9499380824086571806219581048935651779175e-01L, +3.2600441982258756252505182317625310732365e-15L, +4.0024316412701210765590076334774494171143e-01L, +5.9927342433864738622836851475469574662703e-16L, +4.0546510810816371872533636633306741714478e-01L, +6.6325267674913128171942721503283748008372e-16L, +4.1065992498526782128465129062533378601074e-01L, +5.6464965491255048900165082436455718077885e-16L, +4.1582789514371043537721561733633279800415e-01L, +5.3023611327561856950735176370587227509442e-16L, +4.2096929464412724541944044176489114761353e-01L, +2.3907094267197419048248363335257046791153e-15L, +4.2608439531089814522601955104619264602661e-01L, +1.9178985253285492839728700574592375309985e-15L, +4.3117346481836804628073878120630979537964e-01L, +3.2945784336977492852031005044499611665595e-15L, +4.3623676677491474151793227065354585647583e-01L, +3.3288311090524075754441878570852962903891e-15L, +4.4127456080487448275562201160937547683716e-01L, +7.4673387443005192574852544613692268411229e-16L, +4.4628710262841764233598951250314712524414e-01L, +1.8691966006681165218815050615460959199251e-15L, +4.5127464413945617138779198285192251205444e-01L, +2.4137569004002270899666314791611479063976e-15L, +4.5623743348158640742440184112638235092163e-01L, +1.1869564036970375473975162509216610120281e-15L, +4.6117571512216670726047595962882041931152e-01L, +3.4591075239659690349392915732654828400811e-15L, +4.6608972992459740680715185590088367462158e-01L, +1.8177514673916038857252366108673570603067e-15L, +4.7097971521878889689105562865734100341797e-01L, +2.1156558422273990182479555421331461933366e-15L, +4.7584590486996347635795245878398418426514e-01L, +4.3790725712752039722791012358345927696967e-16L, +4.8068852934575190261057286988943815231323e-01L, +5.0660455855585733988956280680891477171499e-18L, +4.8550781578169832641833636444061994552612e-01L, +2.4813834547127501689550526444948043590905e-15L, +4.9030398804519137456736643798649311065674e-01L, +2.4635829797216592537498738468934647345741e-15L, +4.9507726679784980206022737547755241394043e-01L, +1.7125377372093652812514167461480115600063e-15L, +4.9982786955644797899367404170334339141846e-01L, +1.3508276573735437007500942002018098437396e-15L, +5.0455601075239187025545106735080480575562e-01L, +3.4168028574643873701242268618467347998876e-15L, +5.0926190178980590417268103919923305511475e-01L, +2.0426313938800290907697638200502614622891e-15L, +5.1394575110223428282552049495279788970947e-01L, +3.3975485593321419703400672813719873194659e-17L, +5.1860776420804555186805373523384332656860e-01L, +8.0284923261130955371987633083003284697416e-17L, +5.2324814376454753528378205373883247375488e-01L, +3.0123302517119603836788558832352723470118e-16L, +5.2786708962084105678513878956437110900879e-01L, +1.3283287534282139298545497336570406582397e-15L, +5.3246479886946929127589100971817970275879e-01L, +2.5525980327137419625398485590148417041921e-15L, +5.3704146589688050994482182431966066360474e-01L, +3.1446219074198341716354190061340477078626e-15L, +5.4159728243274329884116014000028371810913e-01L, +1.0727353821639001503808606766770295812627e-15L, +5.4613243759813556721383065450936555862427e-01L, +8.3168566554721843605240702438699163825794e-17L, +5.5064711795266063631970610003918409347534e-01L, +1.6429402420791657293666192255419538448840e-15L, +5.5514150754050106684189813677221536636353e-01L, +5.2587358222274368868380660194332415847228e-16L, +5.5961578793542088305912329815328121185303e-01L, +1.8032117652023735453816330571171114110385e-15L, +5.6407013828480145889443519990891218185425e-01L, +1.5071769490901812785299634348367857600711e-15L, +5.6850473535266843327917740680277347564697e-01L, +2.7879956135806418878792935692629147550413e-16L, +5.7291975356178426181941176764667034149170e-01L, +1.2472733449589795907271346997596471822345e-15L, +5.7731536503482061561953742057085037231445e-01L, +2.9886985746409486460291929160223207644146e-15L, +5.8169173963462128540413687005639076232910e-01L, +1.1971164738836689815783808674399742176950e-15L, +5.8604904500357690722012193873524665832520e-01L, +1.3016839974975520776911897855504474452726e-15L, +5.9038744660217545856539800297468900680542e-01L, +9.1607651870514890975077236127894522134392e-16L, +5.9470710774668944509357970673590898513794e-01L, +3.3444207638397932963480545729233567201211e-15L, +5.9900818964608149030937056522816419601440e-01L, +1.9090722294592334873060460706130642200729e-15L, +6.0329085143808214297678205184638500213623e-01L, +2.1193638031348149256035110177854940281795e-15L, +6.0755525022453937822319858241826295852661e-01L, +2.4172778865703728624133665395876418941354e-15L, +6.1180154110599005434778518974781036376953e-01L, +2.8491821045766810044199163148675291775782e-15L, +6.1602987721551372146677749697118997573853e-01L, +2.9818078843122551067455400545109858745295e-16L, +6.2024040975185457114093878772109746932983e-01L, +2.9577105558448461493874424529516311623184e-15L, +6.2443328801189323939979658462107181549072e-01L, +2.6164274215943360130441858075903119505815e-16L, +6.2860865942237253989333112258464097976685e-01L, +1.5978509770831895426601797458058854400463e-15L, +6.3276666957103699928666173946112394332886e-01L, +8.3025912472904245581515990140161946934461e-16L, +6.3690746223706895534633076749742031097412e-01L, +2.7627416365968377888021629180796328536455e-16L, +6.4103117942092779912854894064366817474365e-01L, +3.4919270523937617243719652995048419893186e-15L, +6.4513796137358170312836591619998216629028e-01L, +2.9985368625799347497396478978681548584217e-15L, +6.4922794662510696639401430729776620864868e-01L, +2.8524968256626075449136225882322854909611e-15L, +6.5330127201274379444839723873883485794067e-01L, +1.8443102186424720390266302263929355424008e-15L, +6.5735807270835877602621621917933225631714e-01L, +1.2541156738040666039091970075936624723645e-15L, +6.6139848224536379461824253667145967483521e-01L, +1.2136419933020381912633127333149145382797e-15L, +6.6542263254508782210905337706208229064941e-01L, +2.6268410392329445778904988886114643307320e-15L, +6.6943065394262646350398426875472068786621e-01L, +2.8037949010021747828222575923191438798877e-15L, +6.7342267521216570003161905333399772644043e-01L, +1.0202663413354670195383104149875619397268e-15L, +6.7739882359180469961756898555904626846313e-01L, +1.4411921136244383020300914304078010801275e-15L, +6.8135922480790256372529256623238325119019e-01L, +5.0522277899333570619054540068138110661023e-16L, +6.8530400309891703614084690343588590621948e-01L, +2.3804032011755313470802014258958896193599e-15L, +6.8923328123880622797514661215245723724365e-01L, +2.7523497677256621466659891416404053623832e-15L, +}; + +/* + * Compute N*log2 + log(1+zk+zh+zt) in extra precision + */ +static long double k_log_NKzl(int N, int K, long double zh, long double *zt) +{ + long double y, r, w, s2, s2h, s2t, t, zk, v, P; + double dzk; + unsigned lx, ly; + int j; + + ((int *)&dzk)[HIWORD] = 0x3ff00000 + (K << 13); + ((int *)&dzk)[LOWORD] = 0; + t = zh + (*zt); + zk = (long double) dzk; + r = two / (t + two * zk); + s2h = s2 = r * t; +/* split s2 into correctly rounded half */ +#if defined(__i386) + ((unsigned *)&s2h)[0] = 0; /* 32 bits chopped */ +#else + lx = ((unsigned *)&s2h)[2]; /* 56 bits rounded */ + j = ((lx >> 24) + 1) >> 1; + ((unsigned *)&s2h)[2] = (j << 25); + lx = ((unsigned *)&s2h)[1]; + ly = lx + (j >> 7); + ((unsigned *)&s2h)[1] = ly; + ((unsigned *)&s2h)[0] += (ly == 0 && lx != 0); + ((unsigned *)&s2h)[3] = 0; +#endif + v = half * s2h; + w = s2 * s2; + s2t = r * ((((zh - s2h * zk) - v * zh) + (*zt)) - v * (*zt)); + P = s2t + (w * s2) * ((P1 + w * P2) + (w * w) * ((P3 + w * P4) + + (w * w) * (P5 + w * P6 + (w * w) * P7))); + P += N * ln2_t + TBL_log1k[K + K + 1]; + t = N*ln2_h + TBL_log1k[K+K]; + y = t + (P + s2h); + P -= ((y - t) - s2h); + *zt = P; + return (y); +} + +long double +__k_clog_rl(long double x, long double y, long double *er) +{ + long double t1, t2, t3, t4, tk, z, wh, w, zh, zk; + int n, k, ix, iy, iz, nx, ny, nz, i, j; + double dk; + unsigned lx, ly; + + ix = HI_XWORD(x) & ~0x80000000; + iy = HI_XWORD(y) & ~0x80000000; + y = fabsl(y); x = fabsl(x); + if (ix < iy || (ix < 0x7fff0000 && ix == iy && x < y)) { + /* force x >= y */ + tk = x; x = y; y = tk; + n = ix, ix = iy; iy = n; + } + *er = zero; + nx = ix >> 16; ny = iy >> 16; + if (nx >= 0x7fff) { /* x or y is Inf or NaN */ + if (isinfl(x)) + return (x); + else if (isinfl(y)) + return (y); + else + return (x+y); + } +/* + * for tiny y:(double y < 2^-35, extended y < 2^-46, quad y < 2^-70) + * + * log(sqrt(1 + y**2)) = y**2 / 2 - y**4 / 8 + ... = y**2 / 2 + */ +#if defined(__i386) + if (x == 1.0L && ny < (0x3fff - 46)) { +#else + if (x == 1.0L && ny < (0x3fff - 70)) { +#endif + t2 = y * y; + if (ny >= 8305) { /* compute er = tail of t2 */ + dk = (double) y; +#if defined(__i386) + ((unsigned *)&dk)[LOWORD] &= 0xfffe0000; +#endif + wh = (long double) dk; + *er = half * ((y - wh) * (y + wh) - (t2 - wh * wh)); + } + return (half * t2); + } +/* + * x or y is subnormal or zero + */ + if (nx == 0) { + if (x == 0.0L) + return (-1.0L / x); + else { + x *= two240; + y *= two240; + ix = HI_XWORD(x); + iy = HI_XWORD(y); + nx = (ix >> 16) - 240; + ny = (iy >> 16) - 240; + /* guard subnormal flush to 0 */ + if (x == 0.0L) + return (-1.0L / x); + } + } else if (ny == 0) { /* y subnormal, scale it */ + y *= two240; + iy = HI_XWORD(y); + ny = (iy >> 16) - 240; + } + n = nx - ny; +/* + * When y is zero or when x >> y, i.e., n > 62, 78, 122 for DBLE, + * EXTENDED, QUAD respectively, + * log(x) = log(sqrt(x * x + y * y)) to 27 extra bits. + */ +#if defined(__i386) + if (n > 78 || y == 0.0L) { +#else + if (n > 122 || y == 0.0L) { +#endif + XFSCALE(x, 0x3fff - (ix >> 16)); + i = ((ix & 0xffff) + 0x100) >> 9; /* 7.5 bits of x */ + zk = 1.0L + ((long double) i) * 0.0078125L; + z = x - zk; + dk = (double)z; +#if defined(__i386) + ((unsigned *)&dk)[LOWORD] &= 0xfffe0000; +#endif + zh = (long double)dk; + k = i & 0x7f; /* index of zk */ + n = nx - 0x3fff; + *er = z - zh; + if (i == 0x80) { /* if zk = 2.0, adjust scaling */ + n += 1; + zh *= 0.5L; *er *= 0.5L; + } + w = k_log_NKzl(n, k, zh, er); + } else { +/* + * compute z = x*x + y*y + */ + XFSCALE(x, 0x3fff - (ix >> 16)); + XFSCALE(y, 0x3fff - n - (iy >> 16)); + ix = (ix & 0xffff) | 0x3fff0000; + iy = (iy & 0xffff) | (0x3fff0000 - (n << 16)); + nx -= 0x3fff; + t1 = x * x; t2 = y * y; + wh = x; +/* split x into correctly rounded half */ +#if defined(__i386) + ((unsigned *)&wh)[0] = 0; /* 32 bits chopped */ +#else + lx = ((unsigned *)&wh)[2]; /* 56 rounded */ + j = ((lx >> 24) + 1) >> 1; + ((unsigned *)&wh)[2] = (j << 25); + lx = ((unsigned *)&wh)[1]; + ly = lx + (j >> 7); + ((unsigned *)&wh)[1] = ly; + ((unsigned *)&wh)[0] += (ly == 0 && lx != 0); + ((unsigned *)&wh)[3] = 0; +#endif + z = t1+t2; +/* + * higher precision simulation x*x = t1 + t3, y*y = t2 + t4 + */ + tk = wh - x; + t3 = tk * tk - (two * wh * tk - (wh * wh - t1)); + wh = y; +/* split y into correctly rounded half */ +#if defined(__i386) + ((unsigned *)&wh)[0] = 0; /* 32 bits chopped */ +#else + ly = ((unsigned *)&wh)[2]; /* 56 bits rounded */ + j = ((ly >> 24) + 1) >> 1; + ((unsigned *)&wh)[2] = (j << 25); + lx = ((unsigned *)&wh)[1]; + ly = lx + (j >> 7); + ((unsigned *)&wh)[1] = ly; + ((unsigned *)&wh)[0] += (ly == 0 && lx != 0); + ((unsigned *)&wh)[3] = 0; +#endif + tk = wh - y; + t4 = tk * tk - (two * wh * tk - (wh * wh - t2)); +/* + * find zk matches z to 7.5 bits + */ + iz = HI_XWORD(z); + k = ((iz & 0xffff) + 0x100) >> 9; /* 7.5 bits of x */ + nz = (iz >> 16) - 0x3fff + (k >> 7); + k &= 0x7f; + zk = 1.0L + ((long double) k) * 0.0078125L; + if (nz == 1) zk += zk; + else if (nz == 2) zk *= 4.0L; + else if (nz == 3) zk *= 8.0L; +/* + * order t1, t2, t3, t4 according to their size + */ + if (t2 >= fabsl(t3)) { + if (fabsl(t3) < fabsl(t4)) { + wh = t3; t3 = t4; t4 = wh; + } + } else { + wh = t2; t2 = t3; t3 = wh; + } +/* + * higher precision simulation: x * x + y * y = t1 + t2 + t3 + t4 + * = zk(7 bits) + zh(24 bits) + *er(tail) and call k_log_NKz + */ + tk = t1 - zk; + zh = ((tk + t2) + t3) + t4; +/* split zh into correctly rounded half */ +#if defined(__i386) + ((unsigned *)&zh)[0] = 0; +#else + ly = ((unsigned *)&zh)[2]; + j = ((ly >> 24) + 1) >> 1; + ((unsigned *)&zh)[2] = (j << 25); + lx = ((unsigned *)&zh)[1]; + ly = lx + (j >> 7); + ((unsigned *)&zh)[1] = ly; + ((unsigned *)&zh)[0] += (ly == 0 && lx != 0); + ((unsigned *)&zh)[3] = 0; +#endif + w = fabsl(zh); + if (w >= fabsl(t2)) +{ + *er = (((tk - zh) + t2) + t3) + t4; +} + + else { + + if (n == 0) { + wh = half * zk; + wh = (t1 - wh) - (wh - t2); + } else + wh = tk + t2; + if (w >= fabsl(t3)) + *er = ((wh - zh) + t3) + t4; + else { + z = t3; + t3 += t4; + t4 -= t3 - z; + if (w >= fabsl(t3)) + *er = ((wh - zh) + t3) + t4; + else + *er = ((wh + t3) - zh) + t4; + } + } + if (nz == 3) { + zh *= 0.125L; *er *= 0.125L; + } else if (nz == 2) { + zh *= 0.25L; *er *= 0.25L; + } else if (nz == 1) { + zh *= half; *er *= half; + } + nz += nx + nx; + w = half * k_log_NKzl(nz, k, zh, er); + *er *= half; + } + return (w); +} diff --git a/usr/src/libm/src/i386/amd64/__swapFLAGS.s b/usr/src/libm/src/i386/amd64/__swapFLAGS.s new file mode 100644 index 0000000..1f76767 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/__swapFLAGS.s @@ -0,0 +1,161 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__swapFLAGS.s 1.3 06/01/23 SMI" + + .file "__swapFLAGS.s" + +#include "libm.h" +#include "libm_synonyms.h" + +/* + * swap exception masks + * + * Put the complement of bits 5-0 of the argument into FPCW bits 5-0 + * and MXCSR bits 12-7, return the complement of the previous FPCW + * bits 5-0. + */ + ENTRY(__swapTE) / di <-- NOT(desired xcptn_masks) + subq $8,%rsp + fstcw (%rsp) / push current_cw on '86 stack + movq (%rsp),%rcx / cx <-- current_cw + movw %cx,%ax / ax <-- current_cw + orw $0x3f,%cx / cx <-- current_cw, but masking all xcptns + andw $0x3f,%di / make sure bits > B5 are all zero + xorw %di,%cx / cx <-- present_cw, with new xcptn_masks + movw %cx,(%rsp) + fldcw (%rsp) / load new cw + stmxcsr (%rsp) + movq (%rsp),%rcx + orw $0x1f80,%cx / cx <-- current mxcsr, but masking all xcptns + shlw $7,%di + xorw %di,%cx / cx <-- present mxcsr, with new xcptn_masks + movq %rcx,(%rsp) + ldmxcsr (%rsp) + andq $0x3f,%rax / al[5..0] <-- former xcptn_masks + xorq $0x3f,%rax / al[5..0] <-- NOT(former xcptn_masks) + addq $8,%rsp + ret + .align 16 + SET_SIZE(__swapTE) + +/* + * swap exception flags + * + * Put bits 5-0 of the argument into FPSW bits 5-0 and MXCSR bits 5-0, + * return the "or" of the previous FPSW bits 5-0 and MXCSR bits 5-0. + */ + ENTRY(__swapEX) + fstsw %ax / ax = sw + andq $0x3f,%rdi + jnz .L1 + / input ex=0, clear all exception + fnclex + subq $8,%rsp + stmxcsr (%rsp) + movq (%rsp),%rcx + orw %cx,%ax + andw $0xffc0,%cx + movq %rcx,(%rsp) + ldmxcsr (%rsp) + andq $0x3f,%rax + addq $8,%rsp + ret +.L1: + / input ex !=0, use fnstenv and fldenv + subq $32,%rsp / only needed 28 + fnstenv (%rsp) + movw %ax,%dx + andw $0xffc0,%dx + orw %cx,%dx + movw %dx,4(%rsp) / replace old sw by new one + fldenv (%rsp) + stmxcsr (%rsp) + movq (%rsp),%rdx + orw %dx,%ax + andw $0xffc0,%dx + orw %cx,%dx + movq %rdx,(%rsp) + ldmxcsr (%rsp) + andq $0x3f,%rax + addq $32,%rsp + ret + .align 16 + SET_SIZE(__swapEX) + +/* + * swap rounding precision + * + * Put bits 1-0 of the argument into FPCW bits 9-8, return the + * previous FPCW bits 9-8. + */ + ENTRY(__swapRP) + subq $8,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xfcff,%cx + andq $0x3,%rdi + shlw $8,%di + orw %di,%cx + movq %rcx,(%rsp) + fldcw (%rsp) + shrw $8,%ax + andq $0x3,%rax + addq $8,%rsp + ret + .align 16 + SET_SIZE(__swapRP) + +/* + * swap rounding direction + * + * Put bits 1-0 of the argument into FPCW bits 11-10 and MXCSR + * bits 14-13, return the previous FPCW bits 11-10. + */ + ENTRY(__swapRD) + subq $8,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xf3ff,%cx + andq $0x3,%rdi + shlw $10,%di + orw %di,%cx + movq %rcx,(%rsp) + fldcw (%rsp) + stmxcsr (%rsp) + movq (%rsp),%rcx + andw $0x9fff,%cx + shlw $3,%di + orw %di,%cx + movq %rcx,(%rsp) + ldmxcsr (%rsp) + shrw $10,%ax + andq $0x3,%rax + addq $8,%rsp + ret + .align 16 + SET_SIZE(__swapRD) diff --git a/usr/src/libm/src/i386/amd64/acosl.s b/usr/src/libm/src/i386/amd64/acosl.s new file mode 100644 index 0000000..b552fdf --- /dev/null +++ b/usr/src/libm/src/i386/amd64/acosl.s @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)acosl.s 1.3 06/01/23 SMI" + + .file "acosl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acosl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(acosl) + fldt 8(%rsp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomip %st(1),%st + ja 9f + fadd %st(1),%st / 1+x,x + fldz + fucomip %st(1),%st + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / -1 + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 16 + SET_SIZE(acosl) diff --git a/usr/src/libm/src/i386/amd64/asinl.s b/usr/src/libm/src/i386/amd64/asinl.s new file mode 100644 index 0000000..7568647 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/asinl.s @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)asinl.s 1.3 06/01/23 SMI" + + .file "asinl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asinl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(asinl) + fldt 8(%rsp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomip %st(1),%st + ja 9f + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)*(1+x)),x + fpatan / atan(x/sqrt((1-x)*(1+x))) + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 16 + SET_SIZE(asinl) diff --git a/usr/src/libm/src/i386/amd64/atan2l.s b/usr/src/libm/src/i386/amd64/atan2l.s new file mode 100644 index 0000000..0af2d54 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/atan2l.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atan2l.s 1.3 06/01/23 SMI" + + .file "atan2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2l,function) +#include "libm_synonyms.h" + + ENTRY(atan2l) + fldt 8(%rsp) / push y + fldt 24(%rsp) / push x + fpatan / return atan2(y,x) + ret + .align 16 + SET_SIZE(atan2l) diff --git a/usr/src/libm/src/i386/amd64/atanl.s b/usr/src/libm/src/i386/amd64/atanl.s new file mode 100644 index 0000000..924ec67 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/atanl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atanl.s 1.3 06/01/23 SMI" + + .file "atanl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atanl,function) +#include "libm_synonyms.h" + + ENTRY(atanl) + fldt 8(%rsp) / push arg + fld1 / push 1.0 + fpatan / atan(arg/1.0) + ret + .align 16 + SET_SIZE(atanl) diff --git a/usr/src/libm/src/i386/amd64/copysignl.s b/usr/src/libm/src/i386/amd64/copysignl.s new file mode 100644 index 0000000..31fa92e --- /dev/null +++ b/usr/src/libm/src/i386/amd64/copysignl.s @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)copysignl.s 1.3 06/01/23 SMI" + + .file "copysignl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysignl,function) +#include "libm_synonyms.h" + + ENTRY(copysignl) + movl 16(%rsp),%eax + movl 32(%rsp),%ecx + andl $0x7fff,%eax + andl $0x8000,%ecx + orl %ecx,%eax + movl %eax,16(%rsp) + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(copysignl) diff --git a/usr/src/libm/src/i386/amd64/exp10l.s b/usr/src/libm/src/i386/amd64/exp10l.s new file mode 100644 index 0000000..6d1f724 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/exp10l.s @@ -0,0 +1,115 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp10l.s 1.3 06/01/23 SMI" + + .file "exp10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10l,function) +#include "libm_synonyms.h" + + .data + .align 16 +lt2_hi: .4byte 0xfbd00000, 0x9a209a84, 0x3ffd, 0x0 +lt2_lo: .4byte 0x653f4837, 0x8677076a, 0xbfc9, 0x0 + + ENTRY(exp10l) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffd,%ecx / Is |x| < log10(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / maybe |x| only slightly < log10(2) +.general_case: / Here, |x| > log10(2) or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 8(%rsp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, log10(2) < |x| < 2^15 + fldt 8(%rsp) / x + fld %st(0) / x, x + fldl2t / log2(10), x, x + fmul / z := x*log2(10), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(lt2_hi) / lt2_hi, [z], x, [z] + fmul / [z]*lt2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*lt2_hi, [z] + fldt PIC_L(lt2_lo) / lt2_lo, x-[z]*lt2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*lt2_lo, x-[z]*lt2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*log10(2), [z] + fldl2t / log2(10), r, [z] + fmul / f := r*log2(10), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / 10^x, [z] + fstp %st(1) + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x9a209a84,%ecx / Is |x| < log10(2)? + ja .finite_non_special + jb .shortcut + movl 8(%rsp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0xfbcff798,%edx / Is |x| slightly > log10(2)? + ja .finite_non_special / branch if |x| slightly > log10(2) +.shortcut: + / Here, |x| < log10(2), so |z| = |x/log10(2)| < 1 + / whence z is in f2xm1's domain. + fldt 8(%rsp) / x + fldl2t / log2(10), x + fmul / z := x*log2(10) + f2xm1 / 2^z-1 + fld1 / 1, 2^z-1 + faddp %st,%st(1) / 10^x + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(sgnfcnd(x)) = hi_32(sgnfcnd(INF))? + jne .NaN_or_pinf / if not, x is NaN or unsupp. + movl 8(%rsp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0,%edx / lo_32(sgnfcnd(x)) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(exp10l) diff --git a/usr/src/libm/src/i386/amd64/exp2l.s b/usr/src/libm/src/i386/amd64/exp2l.s new file mode 100644 index 0000000..330cb8f --- /dev/null +++ b/usr/src/libm/src/i386/amd64/exp2l.s @@ -0,0 +1,99 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp2l.s 1.3 06/01/23 SMI" + + .file "exp2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2l,function) +#include "libm_synonyms.h" + + ENTRY(exp2l) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3fff,%ecx / Is |x| <= 1? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be slightly > 1 +.general_case: / Here, |x| > 1 or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite +.finite_non_special: / Here, 1 < |x| < INF + fldt 8(%rsp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucomi %st(1),%st / x integral? + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / Is |x| <= 1? + ja .finite_non_special + movl 8(%rsp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0x0,%edx / Is |x| slightly > 1? + ja .finite_non_special / branch if |x| slightly > 1 +.shortcut: + / Here, |x| < 1, + / whence x is in f2xm1's domain. + fldt 8(%rsp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(exp2l) diff --git a/usr/src/libm/src/i386/amd64/expl.s b/usr/src/libm/src/i386/amd64/expl.s new file mode 100644 index 0000000..4deea86 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/expl.s @@ -0,0 +1,124 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)expl.s 1.3 06/01/23 SMI" + + .file "expl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expl,function) +#include "libm_synonyms.h" + + .data + .align 16 +ln2_hi: .4byte 0xd1d00000, 0xb17217f7, 0x3ffe, 0x0 +ln2_lo: .4byte 0x4c67fc0d, 0x8654361c, 0xbfce, 0x0 + + ENTRY(expl) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffe,%ecx / Is |x| < 0.5? + jb 2f / If so, see which shortcut to take + je .check_tail / More checking if 0.5 <= |x| < 1 +.general_case: / Here, |x| >= 1 or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 8(%rsp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, ln(2) < |x| < 2^15 + fldt 8(%rsp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmul / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmul / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmul / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / e^x, [z] + fstp %st(1) + ret + +2: / Here, |x| < 0.5 + cmpl $0x3fbe,%ecx / Is |x| >= 2^-65? + jae .shortcut / If so, take a shortcut + fldt 8(%rsp) / x + fld1 / 1, x + faddp %st,%st(1) / 1+x (for inexact & directed rounding) + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 8(%rsp) / x + fldl2e / log2(e), x + fmul / x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + fld1 / 1, e^x-1 + faddp %st,%st(1) / e^x + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + fadd %st(0),%st / quiet SNaN + ret + .align 16 + SET_SIZE(expl) diff --git a/usr/src/libm/src/i386/amd64/expm1l.s b/usr/src/libm/src/i386/amd64/expm1l.s new file mode 100644 index 0000000..3a33920 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/expm1l.s @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)expm1l.s 1.3 06/01/23 SMI" + + .file "expm1l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1l,function) +#include "libm_synonyms.h" + + .data + .align 16 +ln2_hi: .4byte 0xd1d00000, 0xb17217f7, 0x3ffe, 0x0 +ln2_lo: .4byte 0x4c67fc0d, 0x8654361c, 0xbfce, 0x0 + + ENTRY(expm1l) + movl 16(%rsp),%ecx / cx <--sign&bexp(x) + movl %ecx,%eax / ax <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffe,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) +.general_case: / Here, |x| > ln(2) or x is NaN + cmpl $0x7fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + andl $0xffff,%eax / eax <-- sign&bexp(x) + cmpl $0xc006,%eax / x <= -128? + jae 1f / if so, simply return -1 + cmpl $0x400d,%ecx / |x| < 16384 = 2^14? + jb .finite_non_special / if so, proceed with argument reduction + fldt 8(%rsp) / x >= 16384; x + fld1 / 1, x + fscale / +Inf, x + fstp %st(1) / +Inf + ret + +.finite_non_special: / -128 < x < -ln(2) || ln(2) < x < 2^14 + fldt 8(%rsp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmul / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmul / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmul / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] + fscale / e^x, [z] + fstp %st(1) / e^x + fld1 / 1, e^x + fsubrp %st,%st(1) / e^x-1 + ret + +.check_tail: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 8(%rsp) / x + fldl2e / log2(e), x + fmul / z := x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + ret + +.not_finite: + movl 12(%rsp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%rsp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 16(%rsp),%eax / ax <-- sign&bexp((x)) + andl $0x8000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF +1: + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 8(%rsp) + ret + .align 16 + SET_SIZE(expm1l) diff --git a/usr/src/libm/src/i386/amd64/fabsl.s b/usr/src/libm/src/i386/amd64/fabsl.s new file mode 100644 index 0000000..b5268d7 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/fabsl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fabsl.s 1.3 06/01/23 SMI" + + .file "fabsl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabsl,function) +#include "libm_synonyms.h" + + ENTRY(fabsl) + fldt 8(%rsp) +#undef fabs + fabs + ret + .align 16 + SET_SIZE(fabsl) diff --git a/usr/src/libm/src/i386/amd64/floorl.s b/usr/src/libm/src/i386/amd64/floorl.s new file mode 100644 index 0000000..7e48df1 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/floorl.s @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)floorl.s 1.3 06/01/23 SMI" + + .file "floorl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ceill,function) +LIBM_ANSI_PRAGMA_WEAK(floorl,function) +#include "libm_synonyms.h" + + ENTRY(ceill) + subq $16,%rsp + fstcw (%rsp) + fldt 24(%rsp) + movw (%rsp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%rsp) + fldcw 4(%rsp) / set RD = up + frndint + fstcw 4(%rsp) / restore RD + movw 4(%rsp),%dx + andw $0xf3ff,%dx + movw (%rsp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%rsp) + fldcw (%rsp) / restore RD + addq $16,%rsp + ret + .align 16 + SET_SIZE(ceill) + + + ENTRY(floorl) + subq $16,%rsp + fstcw (%rsp) + fldt 24(%rsp) + movw (%rsp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%rsp) + fldcw 4(%rsp) / set RD = down + frndint + fstcw 4(%rsp) / restore RD + movw 4(%rsp),%dx + andw $0xf3ff,%dx + movw (%rsp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%rsp) + fldcw (%rsp) / restore RD + addq $16,%rsp + ret + .align 16 + SET_SIZE(floorl) diff --git a/usr/src/libm/src/i386/amd64/fmod.s b/usr/src/libm/src/i386/amd64/fmod.s new file mode 100644 index 0000000..fd02c30 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/fmod.s @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fmod.s 1.2 06/01/23 SMI" + + .file "fmod.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmod,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(fmod) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movlpd %xmm1,-16(%rbp) + movlpd %xmm0,-8(%rbp) + + movl -12(%rbp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl -16(%rbp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .yzero + + fldl -16(%rbp) / y + fldl -8(%rbp) / x +.loop: + fprem / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .loop / loop while reduction incomplete + fstpl -8(%rbp) + movsd -8(%rbp),%xmm0 + fstp %st(0) + leave + ret + +.yzero: + PIC_SETUP(1) + movl $27,%edi + movl $2,%eax + call PIC_F(_SVID_libm_err) + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(fmod) diff --git a/usr/src/libm/src/i386/amd64/fmodf.s b/usr/src/libm/src/i386/amd64/fmodf.s new file mode 100644 index 0000000..3e3e143 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/fmodf.s @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fmodf.s 1.2 06/01/23 SMI" + + .file "fmodf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodf,function) +#include "libm_synonyms.h" + + ENTRY(fmodf) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movss %xmm1,-8(%rbp) + movss %xmm0,-4(%rbp) + flds -8(%rbp) / load arg y + flds -4(%rbp) / load arg x +.loop: + fprem / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .loop / loop while reduction incomplete + fstps -4(%rbp) + movss -4(%rbp),%xmm0 + fstp %st(0) + leave + ret + .align 4 + SET_SIZE(fmodf) diff --git a/usr/src/libm/src/i386/amd64/fmodl.s b/usr/src/libm/src/i386/amd64/fmodl.s new file mode 100644 index 0000000..671ea20 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/fmodl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fmodl.s 1.3 06/01/23 SMI" + + .file "fmodl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodl,function) +#include "libm_synonyms.h" + + ENTRY(fmodl) + fldt 24(%rsp) / load arg y + fldt 8(%rsp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret + .align 16 + SET_SIZE(fmodl) diff --git a/usr/src/libm/src/i386/amd64/ieee_funcl.s b/usr/src/libm/src/i386/amd64/ieee_funcl.s new file mode 100644 index 0000000..baa6c64 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/ieee_funcl.s @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ieee_funcl.s 1.3 06/01/23 SMI" + + .file "ieee_funcl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isinfl,function) +LIBM_ANSI_PRAGMA_WEAK(isnormall,function) +LIBM_ANSI_PRAGMA_WEAK(issubnormall,function) +LIBM_ANSI_PRAGMA_WEAK(iszerol,function) +LIBM_ANSI_PRAGMA_WEAK(signbitl,function) +#include "libm_synonyms.h" + + ENTRY(isinfl) + movl 16(%rsp),%eax / ax <-- sign and bexp of x + notl %eax + andq $0x7fff,%rax + jz .L6 + movq $0,%rax +.not_inf: + ret + +.L6: / here, (eax) = 0.0 + movl 12(%rsp),%ecx + xorl $0x80000000,%ecx / handle unsupported implicitly + orl 8(%rsp), %ecx + jnz .not_inf + movq $1,%rax + ret + .align 16 + SET_SIZE(isinfl) + + ENTRY(isnormall) + / TRUE iff (x is finite, but + / neither subnormal nor zero) + / iff (msb(sgnfcnd(x) /= 0 + / & 0 < bexp(x) < 0x7fff) + movl 12(%rsp),%eax / eax <-- hi_32(sgnfcnd(x)) + andq $0x80000000,%rax / eax[31] <-- msb(sgnfcnd(x)), + / rest_of(eax) <-- 0 + jz .L8 / jump iff msb(sgnfcnd(x)) = 0 + movl 16(%rsp),%eax / ax <-- sign and bexp of x + notl %eax / ax[0..14] <-- not(bexp(x)) + andq $0x7fff,%rax / eax <-- zero_xtnd(not(bexp(x))) + jz .L8 / jump iff bexp(x) = 0x7fff or 0 + xorq $0x7fff,%rax / treat pseudo-denormal as subnormal + jz .L8 + movq $1,%rax +.L8: + ret + .align 16 + SET_SIZE(isnormall) + + ENTRY(issubnormall) + / TRUE iff (bexp(x) = 0 & + / msb(sgnfcnd(x)) = 0 & frac(x) /= 0) + movl 12(%rsp),%eax / eax <-- hi_32(sgnfcnd(x)) + testl $0x80000000,%eax / eax[31] = msb(sgnfcnd(x)); + / set ZF if it's 0. + jz .may_be_subnorm / jump iff msb(sgnfcnd(x)) = 0 +.not_subnorm: + movq $0,%rax + ret +.may_be_subnorm: + testl $0x7fff,16(%rsp) / set ZF iff bexp(x) = 0 + jnz .not_subnorm / jump iff bexp(x) /= 0 + orl 8(%rsp),%eax / (eax) = 0 iff sgnfcnd(x) = 0 + jz .not_subnorm + movq $1,%rax + ret + .align 16 + SET_SIZE(issubnormall) + + ENTRY(iszerol) + movl 16(%rsp),%eax / ax <-- sign and bexp of x + andl $0x7fff,%eax / eax <-- zero_xtnd(bexp(x)) + jz .may_be_zero / jump iff bexp(x) = 0 +.not_zero: + movq $0,%rax + ret +.may_be_zero: / here, (eax) = 0 + orl 12(%rsp),%eax / is hi_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff hi_32(sgnfcnd(x)) /= 0 + orl 8(%rsp),%eax / is lo_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff lo_32(sgnfcnd(x)) /= 0 + movq $1,%rax + ret + .align 16 + SET_SIZE(iszerol) + + ENTRY(signbitl) + movl 16(%rsp),%eax / eax[15] <-- sign_bit(x) + shrl $15,%eax / eax <-- zero_xtnd(sign_bit(x)) + andq $1,%rax + ret + .align 16 + SET_SIZE(signbitl) diff --git a/usr/src/libm/src/i386/amd64/ilogbl.s b/usr/src/libm/src/i386/amd64/ilogbl.s new file mode 100644 index 0000000..30537af --- /dev/null +++ b/usr/src/libm/src/i386/amd64/ilogbl.s @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ilogbl.s 1.4 06/01/23 SMI" + + .file "ilogbl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogbl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 16 +two63: .4byte 0x0,0x43d00000 / 2**63 + + ENTRY(ilogbl) + movq 16(%rsp),%rax / eax <-- sign and bexp of x + andq $0x7fff,%rax / eax <-- bexp(x) + jz .bexp_0 / jump iff x is 0 or subnormal + / here, biased exponent is non-zero + testl $0x80000000,12(%rsp) / test msb of hi_32(sgnfcnd(x)) + jz .ilogbl_not_finite / jump if unsupported format + cmpq $0x7fff,%rax + je .ilogbl_not_finite + subq $16383,%rax / unbias exponent by 16383 = 0x3fff + ret + +.ilogbl_not_finite: + movq $0x7fffffff,%rax / x is NaN/inf/unsup + jmp 0f + +.bexp_0: + movq 8(%rsp),%rax / rax <-- sgnfcnd(x) + orq %rax,%rax + jnz .ilogbl_subnorm / jump iff x is subnormal + movq $-2147483647,%rax / x is +/-0, so return 1-2^31 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwq,__xpg6,rcx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + + +.ilogbl_subnorm: / subnormal or pseudo-denormal input + fldt 8(%rsp) / push x, setting D-flag + PIC_SETUP(1) + fmull PIC_L(two63) / x*2**63 + PIC_WRAPUP + subq $16,%rsp + fstpt (%rsp) + movq $0x7fff,%rax + andq 8(%rsp),%rax / eax <-- sign and bexp of x*2**63 + subq $16445,%rax / unbias it by (16,383 + 63) + addq $16,%rsp + ret + .align 16 + SET_SIZE(ilogbl) diff --git a/usr/src/libm/src/i386/amd64/libm.m4 b/usr/src/libm/src/i386/amd64/libm.m4 new file mode 100644 index 0000000..42f5187 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/libm.m4 @@ -0,0 +1,290 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ +/ @(#)libm.m4 1.8 06/01/31 SMI +/ +define(NAME,$1)dnl +dnl +ifdef(`LOCALLIBM',`dnl + .inline NAME(__ieee754_sqrt),0 + sqrtsd %xmm0,%xmm0 + .end +/ + .inline NAME(__inline_sqrtf),0 + sqrtss %xmm0,%xmm0 + .end +/ + .inline NAME(__inline_sqrt),0 + sqrtsd %xmm0,%xmm0 + .end +/ + .inline NAME(__inline_fstsw),0 + fstsw %ax + .end +/ +/ 00 - 24 bits +/ 01 - reserved +/ 10 - 53 bits +/ 11 - 64 bits +/ + .inline NAME(__swapRP),0 + subq $16,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xfcff,%cx + andl $0x3,%edi + shlw $8,%di + orw %di,%cx + movl %ecx,(%rsp) + fldcw (%rsp) + shrw $8,%ax + andq $0x3,%rax + addq $16,%rsp + .end +/ +/ 00 - Round to nearest, with even preferred +/ 01 - Round down +/ 10 - Round up +/ 11 - Chop +/ + .inline NAME(__swap87RD),0 + subq $16,%rsp + fstcw (%rsp) + movw (%rsp),%ax + movw %ax,%cx + andw $0xf3ff,%cx + andl $0x3,%edi + shlw $10,%di + orw %di,%cx + movl %ecx,(%rsp) + fldcw (%rsp) + shrw $10,%ax + andq $0x3,%rax + addq $16,%rsp + .end +/ + .inline NAME(abs),0 + cmpl $0,%edi + jge 1f + negl %edi +1: movl %edi,%eax + .end +/ +ifdef(`XXX64',`dnl +dnl/ Vulcan Build11.0 chokes on the following template (BugId 5069852) + .inline NAME(abs),0 + movl %edi,%eax + negl %edi + cmovnsl %edi,%eax + .end +')dnl +')dnl +ifdef(`XXX64',`dnl +/ +/ Convert Top-of-Stack to long +/ + .inline NAME(__xtol),0 + .end +/ + .inline NAME(ceil),0 + .end +/ +')dnl + .inline NAME(copysign),0 + movq $0x7fffffffffffffff,%rax + movdq %rax,%xmm2 + andpd %xmm2,%xmm0 + andnpd %xmm1,%xmm2 + orpd %xmm2,%xmm0 + .end +/ + .inline NAME(d_sqrt_),0 + movlpd (%rdi),%xmm0 + sqrtsd %xmm0,%xmm0 + .end +/ + .inline NAME(fabs),0 + movq $0x7fffffffffffffff,%rax + movdq %rax,%xmm1 + andpd %xmm1,%xmm0 + .end +/ + .inline NAME(fabsf),0 + movl $0x7fffffff,%eax + movdl %eax,%xmm1 + andps %xmm1,%xmm0 + .end +/ +ifdef(`XXX64',`dnl +dnl/ Vulcan Build12.0 corrupts callee-saved registers (BugId 5083361) + .inline NAME(fabsl),0 + fldt (%rsp) +ifdef(`LOCALLIBM',`dnl +#undef fabs +')dnl + fabs + .end +/ +')dnl + .inline NAME(finite),0 + subq $16,%rsp + movlpd %xmm0,(%rsp) + movq (%rsp),%rcx + movq $0x7fffffffffffffff,%rax + andq %rcx,%rax + movq $0x7ff0000000000000,%rcx + subq %rcx,%rax + shrq $63,%rax + addq $16,%rsp + .end +/ +ifdef(`XXX64',`dnl + .inline NAME(floor),0 + .end +/ +dnl/ branchless isnan +dnl/ ((0x7ff00000-[((lx|-lx)>>31)&1]|ahx)>>31)&1 = 1 iff x is NaN +dnl/ + .inline NAME(isnan),0 + .end +/ + .inline NAME(isnanf),0 + .end +/ + .inline NAME(isinf),0 + .end +/ + .inline NAME(isnormal),0 + .end +/ + .inline NAME(issubnormal),0 + .end +/ + .inline NAME(iszero),0 + .end +/ +')dnl + .inline NAME(r_sqrt_),0 + movss (%rdi),%xmm0 + sqrtss %xmm0,%xmm0 + .end +/ +ifdef(`XXX64',`dnl + .inline NAME(rint),0 + .end +/ + .inline NAME(scalbn),0 + .end +/ +')dnl + .inline NAME(signbit),0 + movmskpd %xmm0,%eax + andq $1,%rax + .end +/ + .inline NAME(signbitf),0 + movmskps %xmm0,%eax + andq $1,%rax + .end +/ + .inline NAME(sqrt),0 + sqrtsd %xmm0,%xmm0 + .end +/ + .inline NAME(sqrtf),0 + sqrtss %xmm0,%xmm0 + .end +/ +ifdef(`XXX64',`dnl +dnl/ Vulcan Build12.0 corrupts callee-saved registers (BugId 5083361) + .inline NAME(sqrtl),0 + fldt (%rsp) + fsqrt + .end +/ + .inline NAME(isnanl),0 + movl 8(%rsp),%eax / ax <-- sign bit and exp + andq $0x7fff,%rax + jz 1f / jump if exp is all 0 + xorq $0x7fff,%rax + jz 2f / jump if exp is all 1 + testl $0x80000000,4(%rsp) + jz 3f / jump if leading bit is 0 + movq $0,%rax + jmp 1f +2: / note that %eax = 0 from before + cmpl $0x80000000,4(%rsp) / what is first half of significand? + jnz 3f / jump if not equal to 0x80000000 + testl $0xffffffff,(%rsp) / is second half of significand 0? + jnz 3f / jump if not equal to 0 + jmp 1f +3: + movq $1,%rax +1: + .end +/ + .inline NAME(__anint),0 + .end +/ +')dnl + .inline NAME(__f95_signf),0 + movl (%rdi),%eax + movl (%rsi),%ecx + andl $0x7fffffff,%eax + andl $0x80000000,%ecx + orl %ecx,%eax + movdl %eax,%xmm0 + .end +/ + .inline NAME(__f95_sign),0 + movq (%rsi),%rax + movq $0x7fffffffffffffff,%rdx + shrq $63,%rax + shlq $63,%rax + andq (%rdi),%rdx + orq %rdx,%rax + movdq %rax,%xmm0 + .end +/ + .inline NAME(__r_sign),0 + movl $0x7fffffff,%eax + movl $0x80000000,%edx + andl (%rdi),%eax + cmpl (%rsi),%edx + cmovel %eax,%edx + andl (%rsi),%edx + orl %edx,%eax + movdl %eax,%xmm0 + .end +/ + .inline NAME(__d_sign),0 + movq $0x7fffffffffffffff,%rax + movq $0x8000000000000000,%rdx + andq (%rdi),%rax + cmpq (%rsi),%rdx + cmoveq %rax,%rdx + andq (%rsi),%rdx + orq %rdx,%rax + movdq %rax,%xmm0 + .end diff --git a/usr/src/libm/src/i386/amd64/log10l.s b/usr/src/libm/src/i386/amd64/log10l.s new file mode 100644 index 0000000..e713520 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/log10l.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log10l.s 1.3 06/01/23 SMI" + + .file "log10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10l,function) +#include "libm_synonyms.h" + + ENTRY(log10l) + fldlg2 + fldt 8(%rsp) / st = arg, st(1) = log10(2) + fyl2x / st = log10(arg) = log10(2)*log2(arg) + ret + .align 16 + SET_SIZE(log10l) diff --git a/usr/src/libm/src/i386/amd64/log2l.s b/usr/src/libm/src/i386/amd64/log2l.s new file mode 100644 index 0000000..48dcd6c --- /dev/null +++ b/usr/src/libm/src/i386/amd64/log2l.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log2l.s 1.3 06/01/23 SMI" + + .file "log2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2l,function) +#include "libm_synonyms.h" + + ENTRY(log2l) + fld1 / push 1.0 + fldt 8(%rsp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 16 + SET_SIZE(log2l) diff --git a/usr/src/libm/src/i386/amd64/logl.s b/usr/src/libm/src/i386/amd64/logl.s new file mode 100644 index 0000000..a9104e0 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/logl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)logl.s 1.3 06/01/23 SMI" + + .file "logl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(logl,function) +#include "libm_synonyms.h" + + ENTRY(logl) + fldln2 + fldt 8(%rsp) / st = arg, st(1) = loge(2) + fyl2x / st = ln(arg) = loge(2)*log2(arg) + ret + .align 16 + SET_SIZE(logl) diff --git a/usr/src/libm/src/i386/amd64/powl.s b/usr/src/libm/src/i386/amd64/powl.s new file mode 100644 index 0000000..820c836 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/powl.s @@ -0,0 +1,419 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)powl.s 1.4 06/01/23 SMI" + + .file "powl.s" + +/ Special cases: +/ +/ x ** 0 is 1 +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ +0 ** -y (except 0, NaN) is +inf (z flag) +/ -0 ** -y (except 0, NaN, odd int) is +inf (z flag) +/ -0 ** y (odd int) is - (+0 ** x) +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) is NaN (i flag) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(powl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 16 +negzero: + .float -0.0 +half: + .float 0.5 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .4byte 0x7f800001 +pinfinity: + .4byte 0x7f800000 +ninfinity: + .4byte 0xff800000 + + + ENTRY(powl) + pushq %rbp + movq %rsp,%rbp + PIC_SETUP(1) + + fldt 16(%rbp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + fldt 32(%rbp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwq,__xpg6,rax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomip %st(2),%st / y, x + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fld1 / 1, y, x + fcomip %st(1),%st / y, x + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + flds PIC_L(negone) / -1, y, x + fcomip %st(1),%st / y, x + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is x*x + flds PIC_L(two) / 2, y , x + fcomip %st(1),%st / y, x + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmul / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / x ** 1/2 is sqrt(x) + flds PIC_L(half) / 1/2, y , x + fcomip %st(1),%st / y, x + jne 1f + fld %st(1) / x , y , x + fsqrt / sqrt(x) , y , x + jmp .signok / check for over/underflow + +1: / y is not 1/2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fldz / 0 , y , x , y , x + fcomip %st(2),%st / compare 0 with %st(2) + jb .merge / 0 < x + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0 & y != int so x**y = NaN (i flag) + fstp %st(0) / x , y , x + fstp %st(0) / y , x + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdiv %st,%st(0) / 0/0 + ret + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucomi %st(1),%st + je 1f / t is integral + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + fldz + fcomip %st(1),%st / compare 0 with %st(1) + jb .retpinf / 0 < y + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + flds PIC_L(one) / 1 , |x| , y , x + fcomip %st(1),%st / |x| , y , x + fstp %st(0) / y , x + je .retponeorinvalid / x == -1 C99 + jb .retpinf / 1 < |x| + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + flds PIC_L(one) / 1 , |x| , y , x + fcomip %st(1),%st / |x| , y , x + fstp %st(0) / y , x + je .retponeorinvalid / x == -1 C99 + jb .retpzero / 1 < |x| + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + fldz / 0 , y , x + fcomip %st(1),%st / compare 0 with %st(1) + jb .retpzero / 0 < y + / x = +0 & y < 0 so x**y = +inf + jmp .retpinfzflag / ret +inf & z flag + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + fldz + fcomip %st(1),%st / compare 0 with %st(1) + jb .retnzero / 0 < y + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + je 2f + fdiv %st,%st(1) / y / x, x (raise z flag) +2: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + fldz + fcomip %st(1),%st / compare 0 with %st(1) + jb .retpzero / 0 < y + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .retpinfzflag / ret +inf & divide-by-0 flag + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwq,__xpg6,rax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.retpinfzflag: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdivrs PIC_L(one) / 1/0 + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 40(%rbp),%eax + andl $0x7fff,%eax / exponent of y + cmpl $0x403f,%eax + jae 1f / |y| >= 2^64, an even int + cmpl $0x3fff,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + subl $0x403e,%ecx + negl %ecx / 63 - unbiased exponent of y + movq 32(%rbp),%rax + bsfq %rax,%rax / index of least sig. 1 bit + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 16 + SET_SIZE(powl) diff --git a/usr/src/libm/src/i386/amd64/remainder.s b/usr/src/libm/src/i386/amd64/remainder.s new file mode 100644 index 0000000..984b7ec --- /dev/null +++ b/usr/src/libm/src/i386/amd64/remainder.s @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remainder.s 1.2 06/01/23 SMI" + + .file "remainder.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainder,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remainder) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movlpd %xmm1,-16(%rbp) + movlpd %xmm0,-8(%rbp) + + ucomisd %xmm0,%xmm1 / if x or y is NaN, use fprem1 + jp 1f + + movl -12(%rbp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl -16(%rbp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .yzero_or_xinf + + movl -4(%rbp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + cmpl $0x7ff00000,%eax + jne 1f + cmpl $0,-8(%rbp) + je .yzero_or_xinf +1: + fldl -16(%rbp) / y + fldl -8(%rbp) / x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .rem_loop / while incomplete, do fprem1 again + fstpl -8(%rbp) + movsd -8(%rbp),%xmm0 + fstp %st(0) + leave + ret + +.yzero_or_xinf: + PIC_SETUP(1) + movl $28,%edi + movl $2,%eax + call PIC_F(_SVID_libm_err) + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(remainder) diff --git a/usr/src/libm/src/i386/amd64/remainderf.s b/usr/src/libm/src/i386/amd64/remainderf.s new file mode 100644 index 0000000..8a3b350 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/remainderf.s @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remainderf.s 1.2 06/01/23 SMI" + + .file "remainderf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderf,function) +#include "libm_synonyms.h" + + ENTRY(remainderf) + push %rbp + movq %rsp,%rbp + subq $16,%rsp + movss %xmm1,-8(%rbp) + movss %xmm0,-4(%rbp) + flds -8(%rbp) / load arg y + flds -4(%rbp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstps -4(%rbp) + movss -4(%rbp),%xmm0 + fstp %st(0) + leave + ret + .align 4 + SET_SIZE(remainderf) diff --git a/usr/src/libm/src/i386/amd64/remainderl.s b/usr/src/libm/src/i386/amd64/remainderl.s new file mode 100644 index 0000000..2893872 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/remainderl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remainderl.s 1.3 06/01/23 SMI" + + .file "remainderl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderl,function) +#include "libm_synonyms.h" + + ENTRY(remainderl) + fldt 24(%rsp) / load arg y + fldt 8(%rsp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstp %st(1) + ret + .align 16 + SET_SIZE(remainderl) diff --git a/usr/src/libm/src/i386/amd64/remquol.s b/usr/src/libm/src/i386/amd64/remquol.s new file mode 100644 index 0000000..d361092 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/remquol.s @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remquol.s 1.3 06/01/23 SMI" + + .file "remquol.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquol,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + ENTRY(remquol) + fldt 24(%rsp) / load arg y + fldt 8(%rsp) / load arg x +.Lreml_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lreml_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 16(%rsp),%edx / sign and bexp of x + movl 32(%rsp),%ecx / sign and bexp of y + andl $0x8000,%edx / edx <- sign(x) + andl $0x8000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je 1f + negl %eax / negative n +1: + movl %eax,(%rdi) / last 3 significant bits of quotient + ret + .align 16 + SET_SIZE(remquol) diff --git a/usr/src/libm/src/i386/amd64/rintl.s b/usr/src/libm/src/i386/amd64/rintl.s new file mode 100644 index 0000000..caab55c --- /dev/null +++ b/usr/src/libm/src/i386/amd64/rintl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)rintl.s 1.3 06/01/23 SMI" + + .file "rintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rintl,function) +#include "libm_synonyms.h" + + ENTRY(rintl) + fldt 8(%rsp) / load x + frndint / [x], per rounding mode + fwait + ret + .align 16 + SET_SIZE(rintl) diff --git a/usr/src/libm/src/i386/amd64/rndintl.s b/usr/src/libm/src/i386/amd64/rndintl.s new file mode 100644 index 0000000..2b94e0e --- /dev/null +++ b/usr/src/libm/src/i386/amd64/rndintl.s @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)rndintl.s 1.4 06/01/23 SMI" + + .file "rndintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(aintl,function) +LIBM_ANSI_PRAGMA_WEAK(irintl,function) +LIBM_ANSI_PRAGMA_WEAK(anintl,function) +LIBM_ANSI_PRAGMA_WEAK(nintl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(aintl) + movq %rsp,%rax + subq $16,%rsp + fstcw -8(%rax) + fldt 8(%rax) + movw -8(%rax),%cx + orw $0x0c00,%cx + movw %cx,-4(%rax) + fldcw -4(%rax) / set RD = to_zero + frndint + fstcw -4(%rax) + movw -4(%rax),%dx + andw $0xf3ff,%dx + movw -8(%rax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%rax) + fldcw -8(%rax) / restore RD + addq $16,%rsp + ret + .align 16 + SET_SIZE(aintl) + + ENTRY(irintl) + movq %rsp,%rcx + subq $16,%rsp + fldt 8(%rcx) / load x + fistpl -8(%rcx) / [x] + fwait + movslq -8(%rcx),%rax + addq $16,%rsp + ret + .align 16 + SET_SIZE(irintl) + + .data + .align 16 +half: .float 0.5 + + ENTRY(anintl) +.Lanintl: + movq %rsp,%rcx + subq $16,%rsp + fstcw -8(%rcx) + fldt 8(%rcx) + movw -8(%rcx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%rcx) + fldcw -4(%rcx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%rcx) + movw -4(%rcx),%dx + andw $0xf3ff,%dx + movw -8(%rcx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%rcx) + fldcw -8(%rcx) / restore RD + fucomi %st(1),%st / check if x is already an integer + jp .L0 + je .L0 + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + flds PIC_L(half) + fcomip %st(1),%st / compare 0.5 with |x-[x]| + PIC_WRAPUP + je .halfway / if 0.5 = |x-[x]| goto halfway, + / most cases will not take branch. +.L0: + addq $16,%rsp + fstp %st(0) + ret +.halfway: + / x = n+0.5, recompute anint(x) as x+sign(x)*0.5 + fldt 8(%rcx) / x, 0.5, [x] + movw 16(%rcx),%ax / sign+exp part of x + andw $0x8000,%ax / look at sign bit + jnz .x_neg + fadd + addq $16,%rsp + fstp %st(1) + ret +.x_neg: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addq $16,%rsp + fstp %st(1) + ret + .align 16 + SET_SIZE(anintl) + + ENTRY(nintl) + pushq %rbp + movq %rsp,%rbp + subq $16,%rsp + pushq 24(%rbp) + pushq 16(%rbp) + call .Lanintl /// LOCAL + fistpl -8(%rbp) + fwait + movslq -8(%rbp),%rax + leave + ret + .align 16 + SET_SIZE(nintl) diff --git a/usr/src/libm/src/i386/amd64/scalbnl.s b/usr/src/libm/src/i386/amd64/scalbnl.s new file mode 100644 index 0000000..e960cdc --- /dev/null +++ b/usr/src/libm/src/i386/amd64/scalbnl.s @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalbnl.s 1.3 06/01/23 SMI" + + .file "scalbnl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbnl,function) +#include "libm_synonyms.h" + + ENTRY(scalbnl) + subq $16,%rsp + movl %edi,(%rsp) + fildl (%rsp) + fldt 24(%rsp) + addq $16,%rsp + fscale + fstp %st(1) + ret + .align 16 + SET_SIZE(scalbnl) diff --git a/usr/src/libm/src/i386/amd64/sqrtl.s b/usr/src/libm/src/i386/amd64/sqrtl.s new file mode 100644 index 0000000..8709df4 --- /dev/null +++ b/usr/src/libm/src/i386/amd64/sqrtl.s @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)sqrtl.s 1.3 06/01/23 SMI" + + .file "sqrtl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sqrtl,function) +#include "libm_synonyms.h" + + ENTRY(sqrtl) + fldt 8(%rsp) + fsqrt + ret + .align 16 + SET_SIZE(sqrtl) diff --git a/usr/src/libm/src/i386/common/__reduction.s b/usr/src/libm/src/i386/common/__reduction.s new file mode 100644 index 0000000..a3fefbb --- /dev/null +++ b/usr/src/libm/src/i386/common/__reduction.s @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__reduction.s 1.16 06/01/23 SMI" + + .file "__reduction.s" + +/ +/ After argument reduction which returns n: +/ n mod 4 sin(x) cos(x) tan(x) +/ ---------------------------------------------------------- +/ 0 S C S/C +/ 1 C -S -C/S +/ 2 -S -C S/C +/ 3 -C S -C/S +/ ---------------------------------------------------------- + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#undef fabs + + ENTRY(__reduction) +#ifndef PIC + movl 12(%esp),%eax / load the high part of arg +#else + movl 16(%esp),%eax / load the high part of arg +#endif + andl $0x7fffffff,%eax / clear sign + cmpl $0x3fe921fb,%eax / Is |x| < pi/4 (= 0x3fe921fb54...) ? + jbe .L0 + cmpl $0x7ff00000,%eax / Is arg a NaN or an Inf ? + jb .L1 +.L0: +#ifndef PIC + fldl 8(%esp) / push arg +#else + fldl 12(%esp) / push arg +#endif + fwait + movl $0,%eax / set n = 0 + ret +.L1: + pushl %ebp + movl %esp,%ebp + subl $16,%esp + PIC_SETUP(1) + leal -16(%ebp),%eax / address of y[0] + pushl %eax +#ifndef PIC + pushl 16(%ebp) + pushl 12(%ebp) +#else + pushl 20(%ebp) + pushl 16(%ebp) +#endif + call PIC_F(__rem_pio2) / call __rem_pio2(x,&y) + fldl -8(%ebp) / y[1] + fldl -16(%ebp) / y[0], y[1] + faddp %st,%st(1) / y[0]+y[1] round-to-extended + addl $28,%esp / 16+4*3 + andl $3,%eax + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(__reduction) diff --git a/usr/src/libm/src/i386/common/acos.s b/usr/src/libm/src/i386/common/acos.s new file mode 100644 index 0000000..6b55abe --- /dev/null +++ b/usr/src/libm/src/i386/common/acos.s @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)acos.s 1.8 06/01/23 SMI" + + .file "acos.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acos,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(acos) + fldl 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fldz + fucomp + fstsw %ax + sahf + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / -1 + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret + +.ERR: + / |x| > 1 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + fstp %st(0) / x + fstp %st(0) / empty NPX stack + pushl $1 + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) / report SVID result/error + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(acos) diff --git a/usr/src/libm/src/i386/common/acosf.s b/usr/src/libm/src/i386/common/acosf.s new file mode 100644 index 0000000..897790e --- /dev/null +++ b/usr/src/libm/src/i386/common/acosf.s @@ -0,0 +1,77 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)acosf.s 1.9 06/01/23 SMI" + + .file "acosf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acosf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(acosf) + flds 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fldz + fucomp + fstsw %ax + sahf + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / x + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret + +.ERR: + / |x| > 1 + fstp %st(0) / x + fstp %st(0) / empty NPX stack + fldz + fdiv %st(0),%st / 0/0 + ret + .align 4 + SET_SIZE(acosf) diff --git a/usr/src/libm/src/i386/common/acosl.s b/usr/src/libm/src/i386/common/acosl.s new file mode 100644 index 0000000..17a95d4 --- /dev/null +++ b/usr/src/libm/src/i386/common/acosl.s @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)acosl.s 1.8 06/01/23 SMI" + + .file "acosl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(acosl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(acosl) + fldt 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja 9f + fadd %st(1),%st / 1+x,x + fldz + fucomp + fstsw %ax + sahf + jp .L1 + jne .L1 + / x is -1 + fstp %st(0) / -1 + fstp %st(0) / empty NPX stack + fldpi + ret +.L1: + fxch %st(1) / x,1+x + fld1 / 1,x,1+x + fsubp %st,%st(1) / 1-x,1+x + fdivp %st,%st(1) / (1-x)/(1+x) + fsqrt + fld1 / 1,sqrt((1-x)/(1+x)) + fpatan + fadd %st(0),%st + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 4 + SET_SIZE(acosl) diff --git a/usr/src/libm/src/i386/common/asin.s b/usr/src/libm/src/i386/common/asin.s new file mode 100644 index 0000000..bb23ccd --- /dev/null +++ b/usr/src/libm/src/i386/common/asin.s @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)asin.s 1.9 06/01/23 SMI" + + .file "asin.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asin,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(asin) + fldl 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)/(1+x)),x + fpatan / atan(x/sqrt((1-x)/(1+x))) + ret + +.ERR: + / |x| > 1 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + fstp %st(0) / x + fstp %st(0) / empty NPX stack + pushl $2 + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) / report SVID result/error + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(asin) diff --git a/usr/src/libm/src/i386/common/asinf.s b/usr/src/libm/src/i386/common/asinf.s new file mode 100644 index 0000000..b05ea27 --- /dev/null +++ b/usr/src/libm/src/i386/common/asinf.s @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)asinf.s 1.9 06/01/23 SMI" + + .file "asinf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asinf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(asinf) + flds 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja .ERR + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)*(1+x)),x + fpatan / atan(x/sqrt((1-x)*(1+x))) + ret + +.ERR: + / |x| > 1 + fstp %st(0) / x + fstp %st(0) / empty NPX stack + fldz + fdiv %st(0),%st / 0/0 + ret + .align 4 + SET_SIZE(asinf) diff --git a/usr/src/libm/src/i386/common/asinl.s b/usr/src/libm/src/i386/common/asinl.s new file mode 100644 index 0000000..4fef6f5 --- /dev/null +++ b/usr/src/libm/src/i386/common/asinl.s @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)asinl.s 1.8 06/01/23 SMI" + + .file "asinl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(asinl,function) +#include "libm_synonyms.h" + +#undef fabs + + ENTRY(asinl) + fldt 4(%esp) / push x + fld1 / push 1 + fld %st(1) / x , 1 , x + fabs / |x| , 1 , x + fucomp + fstsw %ax + sahf + ja 9f + fadd %st(1),%st / 1+x,x + fld1 / 1,1+x,x + fsub %st(2),%st / 1-x,1+x,x + fmulp %st,%st(1) / (1-x)*(1+x),x + fsqrt / sqrt((1-x)*(1+x)),x + fpatan / atan(x/sqrt((1-x)*(1+x))) + ret +9: + / |x| > 1 + fstp %st(0) / x + fsub %st,%st(0) / +/-0 or NaN+invalid + fdiv %st,%st(0) / NaN+invalid or NaN + ret + .align 4 + SET_SIZE(asinl) diff --git a/usr/src/libm/src/i386/common/atan.s b/usr/src/libm/src/i386/common/atan.s new file mode 100644 index 0000000..f279ad7 --- /dev/null +++ b/usr/src/libm/src/i386/common/atan.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atan.s 1.6 06/01/23 SMI" + + .file "atan.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan,function) +#include "libm_synonyms.h" + + ENTRY(atan) + fldl 4(%esp) / push arg + fld1 / push 1.0 + fpatan / atan(arg/1.0) + ret + .align 4 + SET_SIZE(atan) diff --git a/usr/src/libm/src/i386/common/atan2.s b/usr/src/libm/src/i386/common/atan2.s new file mode 100644 index 0000000..d98a75a --- /dev/null +++ b/usr/src/libm/src/i386/common/atan2.s @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atan2.s 1.8 06/01/23 SMI" + + .file "atan2.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(atan2) + movl 4(%esp),%eax / low part of y + movl 12(%esp),%ecx / low part of x + orl %eax,%ecx + jz .maybe_0s + + / not both x and y are 0's +1: + fldl 4(%esp) / push y + fldl 12(%esp) / push x + fpatan / return atan2(y,x) + ret + +.maybe_0s: + movl 8(%esp),%eax / high part of y + movl 16(%esp),%ecx / high part of x + orl %eax,%ecx + andl $0x7fffffff,%ecx / clear sign + jnz 1b + / both x and y are 0's + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $3 + pushl 12(%ebp) / high y + pushl 8(%ebp) / low y + pushl 20(%ebp) / high x + pushl 16(%ebp) / low x + call PIC_F(_SVID_libm_err) / report SVID result/error + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(atan2) diff --git a/usr/src/libm/src/i386/common/atan2f.s b/usr/src/libm/src/i386/common/atan2f.s new file mode 100644 index 0000000..668a4ac --- /dev/null +++ b/usr/src/libm/src/i386/common/atan2f.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atan2f.s 1.9 06/01/23 SMI" + + .file "atan2f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2f,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(atan2f) + flds 4(%esp) / push y + flds 8(%esp) / push x + fpatan / return atan2(y,x) + ret + .align 4 + SET_SIZE(atan2f) diff --git a/usr/src/libm/src/i386/common/atan2l.s b/usr/src/libm/src/i386/common/atan2l.s new file mode 100644 index 0000000..081aa9a --- /dev/null +++ b/usr/src/libm/src/i386/common/atan2l.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atan2l.s 1.6 06/01/23 SMI" + + .file "atan2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atan2l,function) +#include "libm_synonyms.h" + + ENTRY(atan2l) + fldt 4(%esp) / push y + fldt 16(%esp) / push x + fpatan / return atan2(y,x) + ret + .align 4 + SET_SIZE(atan2l) diff --git a/usr/src/libm/src/i386/common/atanl.s b/usr/src/libm/src/i386/common/atanl.s new file mode 100644 index 0000000..a0e6074 --- /dev/null +++ b/usr/src/libm/src/i386/common/atanl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)atanl.s 1.6 06/01/23 SMI" + + .file "atanl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(atanl,function) +#include "libm_synonyms.h" + + ENTRY(atanl) + fldt 4(%esp) / push arg + fld1 / push 1.0 + fpatan / atan(arg/1.0) + ret + .align 4 + SET_SIZE(atanl) diff --git a/usr/src/libm/src/i386/common/ceil.s b/usr/src/libm/src/i386/common/ceil.s new file mode 100644 index 0000000..a42a4a2 --- /dev/null +++ b/usr/src/libm/src/i386/common/ceil.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ceil.s 1.6 06/01/23 SMI" + + .file "ceil.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ceil,function) +#include "libm_synonyms.h" + + ENTRY(ceil) + subl $8,%esp + fstcw (%esp) + fldl 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = up + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(ceil) diff --git a/usr/src/libm/src/i386/common/copysign.s b/usr/src/libm/src/i386/common/copysign.s new file mode 100644 index 0000000..9234c55 --- /dev/null +++ b/usr/src/libm/src/i386/common/copysign.s @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)copysign.s 1.6 06/01/23 SMI" + + .file "copysign.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysign,function) +#include "libm_synonyms.h" + + ENTRY(copysign) + movl 8(%esp),%eax / eax <-- hi_32(x) + movl 16(%esp),%ecx / ecx <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + andl $0x80000000,%ecx / ecx[31] <-- sign_bit(y) + orl %ecx,%eax / eax <-- hi_32(copysign(x,y)) + movl 4(%esp),%ecx / ecx <-- lo_32(x) + / = lo_32(copysign(x,y)) + subl $8,%esp / set up loading dock for result + movl %ecx,(%esp) / copy lo_32(result) to loading dock + movl %eax,4(%esp) / copy hi_32(result) to loading dock + fldl (%esp) / load copysign(x,y) + fwait / in case fldl causes exception + addl $8,%esp / restore stack-pointer for return + ret + .align 4 + SET_SIZE(copysign) diff --git a/usr/src/libm/src/i386/common/copysignf.s b/usr/src/libm/src/i386/common/copysignf.s new file mode 100644 index 0000000..63527ef --- /dev/null +++ b/usr/src/libm/src/i386/common/copysignf.s @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)copysignf.s 1.3 06/01/23 SMI" + + .file "copysignf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysignf,function) +#include "libm_synonyms.h" + + ENTRY(copysignf) + movl 4(%esp),%eax / eax <-- x + movl 8(%esp),%ecx / ecx <-- y + andl $0x7fffffff,%eax / eax <-- abs(x) + andl $0x80000000,%ecx / ecx[31] <-- sign_bit(y) + orl %ecx,%eax / eax <-- copysign(x,y) + subl $4,%esp / set up loading dock for result + movl %eax,(%esp) / copy result to loading dock + flds (%esp) / load copysign(x,y) + fwait / in case fldl causes exception + addl $4,%esp / restore stack-pointer for return + ret + .align 4 + SET_SIZE(copysignf) diff --git a/usr/src/libm/src/i386/common/copysignl.s b/usr/src/libm/src/i386/common/copysignl.s new file mode 100644 index 0000000..919f7d8 --- /dev/null +++ b/usr/src/libm/src/i386/common/copysignl.s @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)copysignl.s 1.3 06/01/23 SMI" + + .file "copysignl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysignl,function) +#include "libm_synonyms.h" + + ENTRY(copysignl) + movl 12(%esp),%eax / sign and bexp of x + movl 24(%esp),%ecx / sign and bexp of y + andl $0x00007fff,%eax / eax <-- bexp(x) + andl $0x00008000,%ecx / ecx <-- sign(y) + orl %ecx,%eax / eax <-- bexp(x) with sign(y) + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + subl $12,%esp / set up loading dock for result + movl %edx,(%esp) / copy lo_32(result's sgnfcnd) + / to loading dock + movl %ecx,4(%esp) / copy hi_32(result's sgnfcnd) + / to loading dock + movl %eax,8(%esp) / copy sign&bexp(result) + / to loading dock + fldt (%esp) / load copysign(x,y) + addl $12,%esp / restore stack-pointer for return + ret + .align 4 + SET_SIZE(copysignl) diff --git a/usr/src/libm/src/i386/common/cos.s b/usr/src/libm/src/i386/common/cos.s new file mode 100644 index 0000000..d6f2df8 --- /dev/null +++ b/usr/src/libm/src/i386/common/cos.s @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)cos.s 1.10 06/01/23 SMI" + + .file "cos.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(cos,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(cos) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + cmpl $1,%eax + jl .cos0 + je .cos1 + cmpl $2,%eax + je .cos2 + fsin + ret +.cos2: + fcos + fchs + ret +.cos1: + fsin + fchs + ret +.cos0: + fcos + ret + .align 4 + SET_SIZE(cos) diff --git a/usr/src/libm/src/i386/common/exp.s b/usr/src/libm/src/i386/common/exp.s new file mode 100644 index 0000000..7f5bab6 --- /dev/null +++ b/usr/src/libm/src/i386/common/exp.s @@ -0,0 +1,155 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp.s 1.10 06/01/23 SMI" + + .file "exp.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(exp) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, ln(2) < |x| < INF + fldl 4(%esp) / push x + subl $8,%esp + /// overhead of RP save/restore; 63/15 + fstcw (%esp) /// ; 15/3 + movw (%esp),%ax /// ; 4/1 + movw %ax,4(%esp) /// save old RP; 2/1 + orw $0x0300,%ax /// force 64-bit RP; 2/1 + movw %ax,(%esp) /// ; 2/1 + fldcw (%esp) /// ; 19/4 + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / This and the next 3 instructions + fstsw %ax / add 10 clocks to runtime of the + sahf / main branch, but save about 265 + je .z_integral / upon detection of integral z. + / [z] != z, compute exp(x) + fxch / z,[z] + fsub %st(1),%st / z-[z],[z] + f2xm1 / 2**(z-[z])-1,[z] + fld1 / 1,2**(z-[z])-1,[z] + faddp %st,%st(1) / 2**(z-[z]) ,[z] +.merge: + fscale / exp(x) ,[z] + fstp %st(1) + fstcw (%esp) / restore RD + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) /// restore old RP; 19/4 + fstpl (%esp) / round to double + fldl (%esp) / exp(x) rounded to double + fxam / determine class of exp(x) + add $8,%esp + fstsw %ax / store status in ax + andw $0x4500,%ax + cmpw $0x0500,%ax + je .overflow + cmpw $0x4000,%ax + je .underflow + ret + +.overflow: + fstp %st(0) / stack empty + push %ebp + mov %esp,%ebp + PIC_SETUP(1) + pushl $6 + jmp .error + +.underflow: + fstp %st(0) / stack empty + push %ebp + mov %esp,%ebp + PIC_SETUP(2) + pushl $7 + +.error: + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + fld1 / 1,z + jmp .merge + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, + / whence z is in f2xm1's domain. + fldl 4(%esp) / push x + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + f2xm1 / 2**(x*log2(e))-1 = e**x - 1 + fld1 / 1,2**(z)-1 + faddp %st,%st(1) / 2**(z) = e**x + ret + +.not_finite: + / Here, flags still have settings from execution of + / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if not, x may be +/- INF + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp) diff --git a/usr/src/libm/src/i386/common/exp10.s b/usr/src/libm/src/i386/common/exp10.s new file mode 100644 index 0000000..1326fc5 --- /dev/null +++ b/usr/src/libm/src/i386/common/exp10.s @@ -0,0 +1,132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp10.s 1.7 06/01/23 SMI" + + .file "exp10.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10,function) +#include "libm_synonyms.h" + + ENTRY(exp10) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3fd34413,%ecx / Is |x| < log10(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / maybe |x| only slightly < log10(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, log10(2) < |x| < INF + fldl 4(%esp) / push x (=arg) + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / z integral? + fstsw %ax + sahf + je .z_integral / branch if z integral + fxch / z, [z] + fsub %st(1),%st / z-[z], [z] + f2xm1 / 2**(z-[z])-1, [z] + fld1 / 1,2**(z-[z])-1, [z] + faddp %st,%st(1) / 2**(z-[z]), [z] + fscale / 2**z = 10**(arg), [z] + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + fld1 / 1 = 2**0, z + fscale / 2**(0 + z) = 2**z = 10**(arg), z + fstp %st(1) / 10**(arg) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0x509f79fe,%edx / Is |x| slightly > log10(2)? + ja .finite_non_special / branch if |x| slightly > log10(2) +.shortcut: + / Here, |x| < log10(2), so |z| = |x*log2(10)| < 1 + / whence z is in f2xm1's domain. + fldl 4(%esp) / push x (=arg) + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + f2xm1 / 2**z - 1 + fld1 / 1,2**z - 1 + faddp %st,%st(1) / 2**z = 10**x + ret + +.not_finite: + cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if so, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp10) diff --git a/usr/src/libm/src/i386/common/exp10f.s b/usr/src/libm/src/i386/common/exp10f.s new file mode 100644 index 0000000..b949f89 --- /dev/null +++ b/usr/src/libm/src/i386/common/exp10f.s @@ -0,0 +1,122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp10f.s 1.7 06/01/23 SMI" + + .file "exp10f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10f,function) +#include "libm_synonyms.h" + + ENTRY(exp10f) + movl 4(%esp),%ecx / ecx <-- x + andl $0x7fffffff,%ecx / ecx <-- |x| + cmpl $0x3e9a209a,%ecx / Is |x| < log10(2)? + jbe .shortcut / If so, take a shortcut. + cmpl $0x7f800000,%ecx / |x| >= INF? + jae .not_finite / if so, x is not finite + flds 4(%esp) / push x (=arg) + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / z integral? + fstsw %ax + sahf + je .z_integral / branch if z integral + fxch / z, [z] + fsub %st(1),%st / z-[z], [z] + f2xm1 / 2**(z-[z])-1, [z] + fld1 / 1,2**(z-[z])-1, [z] + faddp %st,%st(1) / 2**(z-[z]), [z] + fscale / 2**z = 10**(arg), [z] + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + fld1 / 1 = 2**0, z + fscale / 2**(0 + z) = 2**z = 10**(arg), z + fstp %st(1) / 10**(arg) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.shortcut: + / Here, |x| < log10(2), so |z| = |x*log2(10)| < 1 + / whence z is in f2xm1's domain. + flds 4(%esp) / push x (=arg) + fldl2t / push log2(10) }NOT for xtndd_dbl + fmulp %st,%st(1) / z = x*log2(10) }NOT for xtndd_dbl + f2xm1 / 2**z - 1 + fld1 / 1,2**z - 1 + faddp %st,%st(1) / 2**z = 10**x + ret + +.not_finite: + ja .NaN_or_pinf / branch if x is NaN + movl 4(%esp),%eax / eax <-- x + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + flds 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp10f) diff --git a/usr/src/libm/src/i386/common/exp10l.s b/usr/src/libm/src/i386/common/exp10l.s new file mode 100644 index 0000000..aab724e --- /dev/null +++ b/usr/src/libm/src/i386/common/exp10l.s @@ -0,0 +1,114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp10l.s 1.8 06/01/23 SMI" + + .file "exp10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp10l,function) +#include "libm_synonyms.h" + + .data + .align 4 +lt2_hi: .long 0xfbd00000, 0x9a209a84, 0x00003ffd +lt2_lo: .long 0x653f4837, 0x8677076a, 0x0000bfc9 + + ENTRY(exp10l) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + andl $0x00007fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x00003ffd,%ecx / Is |x| < log10(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / maybe |x| only slightly < log10(2) + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x0000400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 4(%esp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, log10(2) < |x| < 2^15 + fldt 4(%esp) / x + fld %st(0) / x, x + fldl2t / log2(10), x, x + fmul / z := x*log2(10), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(lt2_hi) / lt2_hi, [z], x, [z] + fmul / [z]*lt2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*lt2_hi, [z] + fldt PIC_L(lt2_lo) / lt2_lo, x-[z]*lt2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*lt2_lo, x-[z]*lt2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*log10(2), [z] + fldl2t / log2(10), r, [z] + fmul / f := r*log2(10), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / 10^x, [z] + fstp %st(1) + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x9a209a84,%ecx / Is |x| < log10(2)? + ja .finite_non_special + jb .shortcut + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0xfbcff798,%edx / Is |x| slightly > log10(2)? + ja .finite_non_special / branch if |x| slightly > log10(2) +.shortcut: + / Here, |x| < log10(2), so |z| = |x/log10(2)| < 1 + / whence z is in f2xm1's domain. + fldt 4(%esp) / x + fldl2t / log2(10), x + fmul / z := x*log2(10) + f2xm1 / 2^z-1 + fld1 / 1, 2^z-1 + faddp %st,%st(1) / 10^x + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(sgnfcnd(x)) = hi_32(sgnfcnd(INF))? + jne .NaN_or_pinf / if not, x is NaN or unsupp. + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0,%edx / lo_32(sgnfcnd(x)) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + ret + .align 4 + SET_SIZE(exp10l) diff --git a/usr/src/libm/src/i386/common/exp2.s b/usr/src/libm/src/i386/common/exp2.s new file mode 100644 index 0000000..cd29369 --- /dev/null +++ b/usr/src/libm/src/i386/common/exp2.s @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp2.s 1.7 06/01/23 SMI" + + .file "exp2.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2,function) +#include "libm_synonyms.h" + + ENTRY(exp2) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3ff00000,%ecx / Is |x| < 1? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, 1 < |x| < INF + fldl 4(%esp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucom / x integral? + fstsw %ax + sahf + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0x00000000,%edx / Is |x| slightly > 1? + ja .finite_non_special / branch if |x| slightly > 1 +.shortcut: + / Here, |x| <= 1, + / whence x is in f2xm1's domain. + fldl 4(%esp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if so, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp2) diff --git a/usr/src/libm/src/i386/common/exp2f.s b/usr/src/libm/src/i386/common/exp2f.s new file mode 100644 index 0000000..631c865 --- /dev/null +++ b/usr/src/libm/src/i386/common/exp2f.s @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp2f.s 1.7 06/01/23 SMI" + + .file "exp2f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2f,function) +#include "libm_synonyms.h" + + ENTRY(exp2f) + movl 4(%esp),%ecx / ecx <-- x + andl $0x7fffffff,%ecx / ecx <-- |x| + cmpl $0x3f800000,%ecx / Is |x| <= 1? + jbe .shortcut / If so, take a shortcut. + cmpl $0x7f800000,%ecx / |x| >= INF? + jae .not_finite / if so, x is not finite + flds 4(%esp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucom / x integral? + fstsw %ax + sahf + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: / here, x is integral + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.shortcut: + / Here, |x| <= 1, + / whence x is in f2xm1's domain. + flds 4(%esp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + ja .NaN_or_pinf / branch if x is NaN + movl 4(%esp),%eax / eax <-- x + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + flds 4(%esp) + fwait + ret + .align 4 + SET_SIZE(exp2f) diff --git a/usr/src/libm/src/i386/common/exp2l.s b/usr/src/libm/src/i386/common/exp2l.s new file mode 100644 index 0000000..38a05b3 --- /dev/null +++ b/usr/src/libm/src/i386/common/exp2l.s @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)exp2l.s 1.6 06/01/23 SMI" + + .file "exp2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(exp2l,function) +#include "libm_synonyms.h" + + ENTRY(exp2l) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + andl $0x00007fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x00003fff,%ecx / Is |x| <= 1? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be slightly > 1 + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite +.finite_non_special: / Here, 1 < |x| < INF + fldt 4(%esp) / push arg + fld %st(0) / duplicate stack top + frndint / [x],x + fucom / x integral? + fnstsw %ax + sahf + je .x_integral / branch if x integral + fxch / x, [x] + fsub %st(1),%st / x-[x], [x] + f2xm1 / 2**(x-[x])-1, [x] + fld1 / 1,2**(x-[x])-1, [x] + faddp %st,%st(1) / 2**(x-[x]), [x] + fscale / 2**x = 2**(arg), [x] + fstp %st(1) + ret + +.x_integral: + fstp %st(0) / ,x + fld1 / 1 = 2**0, x + fscale / 2**(0 + x) = 2**x, x + fstp %st(1) / 2**x + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / Is |x| <= 1? + ja .finite_non_special + movl 4(%esp),%edx / edx <-- lo_32(sgnfcnd(x)) + cmpl $0x00000000,%edx / Is |x| slightly > 1? + ja .finite_non_special / branch if |x| slightly > 1 +.shortcut: + / Here, |x| < 1, + / whence x is in f2xm1's domain. + fldt 4(%esp) / push x + f2xm1 / 2**x - 1 + fld1 / 1,2**x - 1 + faddp %st,%st(1) / 2**x + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + ret + .align 4 + SET_SIZE(exp2l) diff --git a/usr/src/libm/src/i386/common/expl.s b/usr/src/libm/src/i386/common/expl.s new file mode 100644 index 0000000..b1a63ef --- /dev/null +++ b/usr/src/libm/src/i386/common/expl.s @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)expl.s 1.9 06/01/23 SMI" + + .file "expl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expl,function) +#include "libm_synonyms.h" + + .data + .align 4 +ln2_hi: .long 0xd1d00000, 0xb17217f7, 0x00003ffe +ln2_lo: .long 0x4c67fc0d, 0x8654361c, 0x0000bfce + + ENTRY(expl) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + andl $0x7fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x3ffe,%ecx / Is |x| < 0.5? + jb 2f / If so, see which shortcut to take + je .check_tail / More checking if 0.5 <= |x| < 1 + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + cmpl $0x0000400e,%ecx / |x| < 32768 = 2^15? + jb .finite_non_special / if so, proceed with argument reduction + fldt 4(%esp) / x + fld1 / 1, x + jmp 1f +.finite_non_special: / Here, ln(2) < |x| < 2^15 + fldt 4(%esp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmul / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmul / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmul / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] +1: + fscale / e^x, [z] + fstp %st(1) + ret + +2: / Here, |x| < 0.5 + cmpl $0x3fbe,%ecx / Is |x| >= 2^-65? + jae .shortcut / If so, take a shortcut + fldt 4(%esp) / x + fld1 / 1, x + faddp %st,%st(1) / 1+x (for inexact & directed rounding) + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 4(%esp) / x + fldl2e / log2(e), x + fmul / x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + fld1 / 1, e^x-1 + faddp %st,%st(1) / e^x + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fldz / Here, x = -inf, so return 0 + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + fadd %st(0),%st / quiet SNaN + ret + .align 4 + SET_SIZE(expl) diff --git a/usr/src/libm/src/i386/common/expm1.s b/usr/src/libm/src/i386/common/expm1.s new file mode 100644 index 0000000..42b1256 --- /dev/null +++ b/usr/src/libm/src/i386/common/expm1.s @@ -0,0 +1,129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)expm1.s 1.8 06/01/23 SMI" + + .file "expm1.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1,function) +#include "libm_synonyms.h" + + .data + .align 4 +.mhundred: .float -100.0 + + ENTRY(expm1) + movl 8(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) + cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? + jae .not_finite / if so, x is not finite +.finite_non_special: / Here, ln(2) < |x| < INF + fldl 4(%esp) / push x + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + / [z] != 0, compute exp(x) and then subtract one to get expm1(x) + fxch / z,[z] + fsub %st(1),%st / z-[z],[z] + f2xm1 / 2**(z-[z])-1,[z] + / avoid spurious underflow when scaling to compute exp(x) + PIC_SETUP(1) + flds PIC_L(.mhundred) + PIC_WRAPUP + fucom %st(2) / if -100 !< [z], then use -100 + fstsw %ax + sahf + jb .got_int_part + fxch %st(2) +.got_int_part: + fstp %st(0) / 2**(z-[z])-1,max([z],-100) + fld1 / 1,2**(z-[z])-1,max([z],-100) + faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) + fscale / exp(x) ,max([z],-100) + fld1 / 1,exp(x) ,max([z],-100) + fxch / exp(x),1 ,max([z],-100) + fsubp %st,%st(1) / exp(x)-1 ,max([z],-100) + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.check_tail: + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, + / whence z is in f2xm1's domain. + fldl 4(%esp) / push x + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + f2xm1 / 2**(x*log2(e))-1 = e**x - 1 + ret + +.not_finite: + / Here, flags still have settings from execution of + / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? + ja .NaN_or_pinf / if not, x may be +/- INF + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldl 4(%esp) + fwait + ret + .align 4 + SET_SIZE(expm1) diff --git a/usr/src/libm/src/i386/common/expm1f.s b/usr/src/libm/src/i386/common/expm1f.s new file mode 100644 index 0000000..3cfba7b --- /dev/null +++ b/usr/src/libm/src/i386/common/expm1f.s @@ -0,0 +1,152 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)expm1f.s 1.9 06/01/23 SMI" + + .file "expm1f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1f,function) +#include "libm_synonyms.h" + + .data + .align 4 +.mhundred: .float -100.0 + + ENTRY(expm1f) + movl 4(%esp),%ecx / ecx <-- x + andl $0x7fffffff,%ecx / ecx <-- |x| + cmpl $0x3f317217,%ecx / Is |x| < ln(2)? + jbe .shortcut / If so, take a shortcut. + cmpl $0x7f800000,%ecx / |x| >= INF? + jae .not_finite / if so, x is not finite + flds 4(%esp) / push x + + subl $8,%esp / save RP and set round-to-64-bits + fstcw (%esp) + movw (%esp),%ax + movw %ax,4(%esp) + orw $0x0300,%ax + movw %ax,(%esp) + fldcw (%esp) + + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + fld %st(0) / duplicate stack top + frndint / [z],z + fucom / This and the next 3 instructions + fstsw %ax / add 10 clocks to runtime of the + sahf / main branch, but save about 265 + je .z_integral / upon detection of integral z. + / [z] != 0, compute exp(x) and then subtract one to get expm1(x) + fxch / z,[z] + fsub %st(1),%st / z-[z],[z] + f2xm1 / 2**(z-[z])-1,[z] + / avoid spurious underflow when scaling to compute exp(x) + PIC_SETUP(1) + flds PIC_L(.mhundred) + PIC_WRAPUP + fucom %st(2) / if -100 !< [z], then use -100 + fstsw %ax + sahf + jb .got_int_part + fxch %st(2) +.got_int_part: + fstp %st(0) / 2**(z-[z])-1,max([z],-100) + fld1 / 1,2**(z-[z])-1,max([z],-100) + faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) + fscale / exp(x) ,max([z],-100) + fld1 / 1,exp(x) ,max([z],-100) + fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.z_integral: / here, z is integral + fstp %st(0) / ,z + / avoid spurious underflow when scaling to compute exp(x) + PIC_SETUP(2) + flds PIC_L(.mhundred) + PIC_WRAPUP + fucom %st(1) / if -100 !< [z], then use -100 + fstsw %ax + sahf + jb .scale_wont_ovfl + fxch %st(1) +.scale_wont_ovfl: + fstp %st(0) / max([z],-100) + fld1 / 1,max([z],-100) + fscale / exp(x) ,max([z],-100) + fld1 / 1,exp(x) ,max([z],-100) + fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) + fstp %st(1) + + fstcw (%esp) / restore old RP + movw (%esp),%dx + andw $0xfcff,%dx + movw 4(%esp),%cx + andw $0x0300,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + add $8,%esp + + ret + +.shortcut: + / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, + / whence z is in f2xm1's domain. + flds 4(%esp) / push x + fldl2e / push log2e }not for xtndd_dbl + fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl + f2xm1 / 2**(x*log2(e))-1 = e**x - 1 + ret + +.not_finite: + ja .NaN_or_pinf / branch if x is NaN + movl 4(%esp),%eax / eax <-- x + andl $0x80000000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + flds 4(%esp) + fwait + ret + .align 4 + SET_SIZE(expm1f) diff --git a/usr/src/libm/src/i386/common/expm1l.s b/usr/src/libm/src/i386/common/expm1l.s new file mode 100644 index 0000000..82b592d --- /dev/null +++ b/usr/src/libm/src/i386/common/expm1l.s @@ -0,0 +1,122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)expm1l.s 1.8 06/01/23 SMI" + + .file "expm1l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(expm1l,function) +#include "libm_synonyms.h" + + .data + .align 4 +ln2_hi: .long 0xd1d00000, 0xb17217f7, 0x00003ffe +ln2_lo: .long 0x4c67fc0d, 0x8654361c, 0x0000bfce + + ENTRY(expm1l) + movl 12(%esp),%ecx / cx <--sign&bexp(x) + movl %ecx,%eax / ax <--sign&bexp(x) + andl $0x00007fff,%ecx / ecx <-- zero_xtnd(bexp(x)) + cmpl $0x00003ffe,%ecx / Is |x| < ln(2)? + jb .shortcut / If so, take a shortcut. + je .check_tail / |x| may be only slightly < ln(2) + cmpl $0x00007fff,%ecx / bexp(|x|) = bexp(INF)? + je .not_finite / if so, x is not finite + andl $0x0000ffff,%eax / eax <-- sign&bexp(x) + cmpl $0x0000c006,%eax / x <= -128? + jae 1f / if so, simply return -1 + cmpl $0x0000400d,%ecx / |x| < 16384 = 2^14? + jb .finite_non_special / if so, proceed with argument reduction + fldt 4(%esp) / x >= 16384; x + fld1 / 1, x + fscale / +Inf, x + fstp %st(1) / +Inf + ret + +.finite_non_special: / -128 < x < -ln(2) || ln(2) < x < 2^14 + fldt 4(%esp) / x + fld %st(0) / x, x + fldl2e / log2(e), x, x + fmul / z := x*log2(e), x + frndint / [z], x + fst %st(2) / [z], x, [z] + PIC_SETUP(1) + fldt PIC_L(ln2_hi) / ln2_hi, [z], x, [z] + fmul / [z]*ln2_hi, x, [z] + fsubrp %st,%st(1) / x-[z]*ln2_hi, [z] + fldt PIC_L(ln2_lo) / ln2_lo, x-[z]*ln2_hi, [z] + PIC_WRAPUP + fmul %st(2),%st / [z]*ln2_lo, x-[z]*ln2_hi, [z] + fsubrp %st,%st(1) / r := x-[z]*ln(2), [z] + fldl2e / log2(e), r, [z] + fmul / f := r*log2(e), [z] + f2xm1 / 2^f-1,[z] + fld1 / 1, 2^f-1, [z] + faddp %st,%st(1) / 2^f, [z] + fscale / e^x, [z] + fstp %st(1) / e^x + fld1 / 1, e^x + fsubrp %st,%st(1) / e^x-1 + ret + +.check_tail: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0xb17217f7,%ecx / Is |x| < ln(2)? + ja .finite_non_special + jb .shortcut + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0xd1cf79ab,%edx / Is |x| slightly < ln(2)? + ja .finite_non_special / branch if |x| slightly > ln(2) +.shortcut: + / Here, |x| < ln(2), so |z| = |x/ln(2)| < 1, + / whence z is in f2xm1's domain. + fldt 4(%esp) / x + fldl2e / log2(e), x + fmul / z := x*log2(e) + f2xm1 / 2^(x*log2(e))-1 = e^x-1 + ret + +.not_finite: + movl 8(%esp),%ecx / ecx <-- hi_32(sgnfcnd(x)) + cmpl $0x80000000,%ecx / hi_32(|x|) = hi_32(INF)? + jne .NaN_or_pinf / if not, x is NaN + movl 4(%esp),%edx / edx <-- lo_32(x) + cmpl $0,%edx / lo_32(x) = 0? + jne .NaN_or_pinf / if not, x is NaN + movl 12(%esp),%eax / ax <-- sign&bexp((x)) + andl $0x00008000,%eax / here, x is infinite, but +/-? + jz .NaN_or_pinf / branch if x = +INF +1: + fld1 / Here, x = -inf, so return -1 + fchs + ret + +.NaN_or_pinf: + / Here, x = NaN or +inf, so load x and return immediately. + fldt 4(%esp) + ret + .align 4 + SET_SIZE(expm1l) diff --git a/usr/src/libm/src/i386/common/fabs.s b/usr/src/libm/src/i386/common/fabs.s new file mode 100644 index 0000000..8f898dc --- /dev/null +++ b/usr/src/libm/src/i386/common/fabs.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fabs.s 1.6 06/01/23 SMI" + + .file "fabs.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabs,function) +#include "libm_synonyms.h" + + ENTRY(fabs) + fldl 4(%esp) +#undef fabs + fabs + ret + .align 4 + SET_SIZE(fabs) diff --git a/usr/src/libm/src/i386/common/fabsf.s b/usr/src/libm/src/i386/common/fabsf.s new file mode 100644 index 0000000..0b0c67e --- /dev/null +++ b/usr/src/libm/src/i386/common/fabsf.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fabsf.s 1.6 06/01/23 SMI" + + .file "fabsf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabsf,function) +#include "libm_synonyms.h" + + ENTRY(fabsf) + flds 4(%esp) +#undef fabs + fabs + ret + .align 4 + SET_SIZE(fabsf) + diff --git a/usr/src/libm/src/i386/common/fabsl.s b/usr/src/libm/src/i386/common/fabsl.s new file mode 100644 index 0000000..dd66379 --- /dev/null +++ b/usr/src/libm/src/i386/common/fabsl.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fabsl.s 1.6 06/01/23 SMI" + + .file "fabsl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabsl,function) +#include "libm_synonyms.h" + + ENTRY(fabsl) + fldt 4(%esp) +#undef fabs + fabs + ret + .align 4 + SET_SIZE(fabsl) + diff --git a/usr/src/libm/src/i386/common/finitef.s b/usr/src/libm/src/i386/common/finitef.s new file mode 100644 index 0000000..37a8c9a --- /dev/null +++ b/usr/src/libm/src/i386/common/finitef.s @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)finitef.s 1.6 06/01/23 SMI" + + .file "finitef.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(finitef,function) +#include "libm_synonyms.h" + + ENTRY(finitef) + movl 4(%esp),%eax / eax <-- x + notl %eax / not(bexp) = 0 iff bexp = all 1's + andl $0x7f800000,%eax / ZF <-- 1 iff not(bexp) = 0 + jz .done / no jump if arg. is finite + movl $1,%eax / %ax was 0; ansi needs %eax = 1 +.done: + ret + .align 4 + SET_SIZE(finitef) diff --git a/usr/src/libm/src/i386/common/finitel.s b/usr/src/libm/src/i386/common/finitel.s new file mode 100644 index 0000000..9c788fd --- /dev/null +++ b/usr/src/libm/src/i386/common/finitel.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)finitel.s 1.6 06/01/23 SMI" + + .file "finitel.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(finitel,function) +#include "libm_synonyms.h" + + ENTRY(finitel) + movl 12(%esp),%eax / %ax <-- sign&bexp(x) + testl $0x80000000,8(%esp) / ZF = 1 iff hi_32(sgnfcnd(x))'s msb = 0 + jz .chk_denormal_or_0 + notl %eax / not(bexp) = 0 iff bexp = all 1's + andl $0x00007fff,%eax / ZF <-- 1 iff not(bexp) = 0 + jz .done / no jump if arg. is finite + movl $1,%eax / ansi needs %eax = 1 +.done: + ret + +.chk_denormal_or_0: + andl $0x00007fff,%eax / ZF <-- 1 iff bexp = 0 iff denormal or 0 + jnz .unsupported / jump if arg has unsupported format + movl $1,%eax / ansi needs %eax = 1 + ret + +.unsupported: + movl $0,%eax / unsupported format does not represent + ret / a finite number + .align 4 + SET_SIZE(finitel) diff --git a/usr/src/libm/src/i386/common/floor.s b/usr/src/libm/src/i386/common/floor.s new file mode 100644 index 0000000..dfb7041 --- /dev/null +++ b/usr/src/libm/src/i386/common/floor.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)floor.s 1.8 06/01/23 SMI" + + .file "floor.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(floor,function) +#include "libm_synonyms.h" + + ENTRY(floor) + subl $8,%esp + fstcw (%esp) + fldl 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = down + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(floor) diff --git a/usr/src/libm/src/i386/common/floorl.s b/usr/src/libm/src/i386/common/floorl.s new file mode 100644 index 0000000..902b7ca --- /dev/null +++ b/usr/src/libm/src/i386/common/floorl.s @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)floorl.s 1.7 06/01/23 SMI" + + .file "floorl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ceill,function) +LIBM_ANSI_PRAGMA_WEAK(floorl,function) +#include "libm_synonyms.h" + + ENTRY(ceill) + subl $8,%esp + fstcw (%esp) + fldt 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = up + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(ceill) + + + ENTRY(floorl) + subl $8,%esp + fstcw (%esp) + fldt 12(%esp) + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = down + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(floorl) diff --git a/usr/src/libm/src/i386/common/fmod.s b/usr/src/libm/src/i386/common/fmod.s new file mode 100644 index 0000000..74626d4 --- /dev/null +++ b/usr/src/libm/src/i386/common/fmod.s @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fmod.s 1.11 06/01/23 SMI" + + .file "fmod.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmod,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(fmod) + movl 16(%esp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl 12(%esp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .zero + + fldl 12(%esp) / load arg y + fldl 4(%esp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret +.zero: + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $27 / case 27 in _SVID_libm_err + pushl 20(%ebp) / pass x + pushl 16(%ebp) + pushl 12(%ebp) / pass y + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(fmod) diff --git a/usr/src/libm/src/i386/common/fmodf.s b/usr/src/libm/src/i386/common/fmodf.s new file mode 100644 index 0000000..45a152f --- /dev/null +++ b/usr/src/libm/src/i386/common/fmodf.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fmodf.s 1.7 06/01/23 SMI" + + .file "fmodf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodf,function) +#include "libm_synonyms.h" + + ENTRY(fmodf) + flds 8(%esp) / load arg y + flds 4(%esp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret + .align 4 + SET_SIZE(fmodf) diff --git a/usr/src/libm/src/i386/common/fmodl.s b/usr/src/libm/src/i386/common/fmodl.s new file mode 100644 index 0000000..11d58eb --- /dev/null +++ b/usr/src/libm/src/i386/common/fmodl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fmodl.s 1.7 06/01/23 SMI" + + .file "fmodl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fmodl,function) +#include "libm_synonyms.h" + + ENTRY(fmodl) + fldt 16(%esp) / load arg y + fldt 4(%esp) / load arg x +.mod_loop: + fprem / partial fmod + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .mod_loop / while incomplete, do fprem again + fstp %st(1) + ret + .align 4 + SET_SIZE(fmodl) diff --git a/usr/src/libm/src/i386/common/hypot.s b/usr/src/libm/src/i386/common/hypot.s new file mode 100644 index 0000000..e305004 --- /dev/null +++ b/usr/src/libm/src/i386/common/hypot.s @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)hypot.s 1.11 06/01/23 SMI" + + .file "hypot.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(hypot,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + .data + .align 4 +inf: + .long 0x7f800000 + + ENTRY(hypot) + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + jz .x_maybe_0 / if x = +/-0, return |y| + subl $0x7ff00000,%eax / eax <-- hi_32(|x|) - hi_32(INF) + jz .x_maybe_inf +.check_y: + movl 16(%esp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + jz .y_maybe_0 / if y = +/-0, return |x| + subl $0x7ff00000,%eax / eax <-- hi_32(|y|) - hi_32(INF) + jz .y_maybe_inf +.do_hypot: + fldl 12(%esp) / ,y + fmul %st(0),%st / ,y*y + fldl 4(%esp) / x,y*y + fmul %st(0),%st / x*x,y*y + faddp %st,%st(1) / x*x+y*y + fsqrt / sqrt(x*x+y*y) + subl $8,%esp + fstpl (%esp) / round to double + fldl (%esp) / sqrt(x*x+y*y) rounded to double + PIC_SETUP(1) + flds PIC_L(inf) / inf , sqrt(x*x+y*y) + PIC_WRAPUP + addl $8,%esp + fucomp + fstsw %ax / store status in %ax + sahf / 80387 flags in %ah to 80386 flags + jz .maybe_ovflw + ret + +.maybe_ovflw: + jnp .ovflw + ret + +.ovflw: + / overflow occurred + fstp %st(0) / stack empty + pushl %ebp + movl %esp,%ebp + PIC_SETUP(2) + pushl $4 + pushl 20(%ebp) / high y + pushl 16(%ebp) / low y + pushl 12(%ebp) / high x + pushl 8(%ebp) / low x + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + +.x_maybe_0: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + orl %ecx,%eax / is x = +/-0? + jnz .check_y / branch if x is denormal + / x = +/-0, so return |y| + fldl 12(%esp) + fabs + ret + +.x_maybe_inf: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + orl %ecx,%eax / is x = +/-INF? + jnz .check_y / branch if x is NaN + / push&pop y in case y is a SNaN + fldl 12(%esp) + fstp %st(0) + / x = +/-INF, so return |x| + fldl 4(%esp) + fabs + ret + +.y_maybe_0: + movl 12(%esp),%ecx / ecx <-- lo_32(y) + orl %ecx,%eax / is y = +/-0? + jnz .do_hypot / branch if y is denormal + / y = +/-0, so return |x| + fldl 4(%esp) + fabs + ret + +.y_maybe_inf: + movl 12(%esp),%ecx / ecx <-- lo_32(y) + orl %ecx,%eax / is y = +/-INF? + jnz .do_hypot / branch if y is NaN + / push&pop x in case x is a SNaN + fldl 4(%esp) + fstp %st(0) + / y = +/-INF, so return |y| + fldl 12(%esp) + fabs + ret + .align 4 + SET_SIZE(hypot) diff --git a/usr/src/libm/src/i386/common/hypotf.s b/usr/src/libm/src/i386/common/hypotf.s new file mode 100644 index 0000000..96f217e --- /dev/null +++ b/usr/src/libm/src/i386/common/hypotf.s @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)hypotf.s 1.9 06/01/23 SMI" + + .file "hypotf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(hypotf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + +#undef fabs + + ENTRY(hypotf) + movl 4(%esp),%eax / eax <-- x + andl $0x7fffffff,%eax / eax <-- |x| + jz .return_abs_y / if x = +/-0, return |y| + subl $0x7f800000,%eax / eax <-- |x| - INF + jz .return_abs_x / if x = +/-INF, return |x| + movl 8(%esp),%eax / eax <-- y + andl $0x7fffffff,%eax / eax <-- |y| + jz .return_abs_x / if y = +/-0, return |x| + subl $0x7f800000,%eax / eax <-- |y| - INF +.return_abs_y: + flds 8(%esp) / y + jz .take_abs / if y = +/-INF, return |y| + fmul %st(0),%st / y*y + flds 4(%esp) / x,y*y + fmul %st(0),%st / x*x,y*y + faddp %st,%st(1) / x*x+y*y + fsqrt / sqrt(x*x+y*y) + subl $4,%esp + fstps (%esp) / round to single + flds (%esp) + fwait + addl $4,%esp + ret + +.return_abs_x: + / returns |x| + flds 4(%esp) +.take_abs: + fabs + ret + .align 4 + SET_SIZE(hypotf) diff --git a/usr/src/libm/src/i386/common/ieee_funcl.s b/usr/src/libm/src/i386/common/ieee_funcl.s new file mode 100644 index 0000000..7528201 --- /dev/null +++ b/usr/src/libm/src/i386/common/ieee_funcl.s @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ieee_funcl.s 1.10 06/01/23 SMI" + + .file "ieee_funcl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isinfl,function) +LIBM_ANSI_PRAGMA_WEAK(isnormall,function) +LIBM_ANSI_PRAGMA_WEAK(issubnormall,function) +LIBM_ANSI_PRAGMA_WEAK(iszerol,function) +LIBM_ANSI_PRAGMA_WEAK(signbitl,function) +#include "libm_synonyms.h" + + ENTRY(isinfl) + movl 12(%esp),%eax / ax <-- sign and bexp of x + notl %eax + andl $0x00007fff,%eax + jz .L6 + movl $0,%eax +.not_inf: + ret + +.L6: / here, (eax) = 0.0 + movl 8(%esp),%ecx + xorl $0x80000000,%ecx / handle unsupported implicitly + orl 4(%esp), %ecx + jnz .not_inf + movl $1,%eax + ret + .align 4 + SET_SIZE(isinfl) + + ENTRY(isnormall) + / TRUE iff (x is finite, but + / neither subnormal nor zero) + / iff (msb(sgnfcnd(x) /= 0 + / & 0 < bexp(x) < 0x7fff) + movl 8(%esp),%eax / eax <-- hi_32(sgnfcnd(x)) + andl $0x80000000,%eax / eax[31] <-- msb(sgnfcnd(x)), + / rest_of(eax) <-- 0 + jz .L8 / jump iff msb(sgnfcnd(x)) = 0 + movl 12(%esp),%eax / ax <-- sign and bexp of x + notl %eax / ax[0..14] <-- not(bexp(x)) + andl $0x00007fff,%eax / eax <-- zero_xtnd(not(bexp(x))) + jz .L8 / jump iff bexp(x) = 0x7fff or 0 + xorl $0x00007fff,%eax / treat pseudo-denormal as subnormal + jz .L8 + movl $1,%eax +.L8: + ret + .align 4 + SET_SIZE(isnormall) + + ENTRY(issubnormall) + / TRUE iff (bexp(x) = 0 & + / msb(sgnfcnd(x)) = 0 & frac(x) /= 0) + movl 8(%esp),%eax / eax <-- hi_32(sgnfcnd(x)) + testl $0x80000000,%eax / eax[31] = msb(sgnfcnd(x)); + / set ZF if it's 0. + jz .may_be_subnorm / jump iff msb(sgnfcnd(x)) = 0 +.not_subnorm: + movl $0,%eax +.quicker_out: + ret +.may_be_subnorm: + testl $0x00007fff,12(%esp) / set ZF iff bexp(x) = 0 + jnz .not_subnorm / jump iff bexp(x) /= 0 + orl 4(%esp),%eax / (eax) = 0 iff sgnfcnd(x) = 0 + jz .quicker_out + movl $1,%eax + ret + .align 4 + SET_SIZE(issubnormall) + + ENTRY(iszerol) + movl 12(%esp),%eax / ax <-- sign and bexp of x + andl $0x00007fff,%eax / eax <-- zero_xtnd(bexp(x)) + jz .may_be_zero / jump iff bexp(x) = 0 +.not_zero: + movl $0,%eax + ret +.may_be_zero: / here, (eax) = 0 + orl 8(%esp),%eax / is hi_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff hi_32(sgnfcnd(x)) /= 0 + orl 4(%esp),%eax / is lo_32(sgnfcnd(x)) = 0? + jnz .not_zero / jump iff lo_32(sgnfcnd(x)) /= 0 + movl $1,%eax + ret + .align 4 + SET_SIZE(iszerol) + + ENTRY(signbitl) + movl 10(%esp),%eax / eax[31] <-- sign_bit(x) + shrl $31,%eax / eax <-- zero_xtnd(sign_bit(x)) + ret + .align 4 + SET_SIZE(signbitl) diff --git a/usr/src/libm/src/i386/common/ilogb.s b/usr/src/libm/src/i386/common/ilogb.s new file mode 100644 index 0000000..e77d554 --- /dev/null +++ b/usr/src/libm/src/i386/common/ilogb.s @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ilogb.s 1.9 06/01/23 SMI" + + .file "ilogb.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogb,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 8 +two52: .long 0x0,0x43300000 / 2**52 + + ENTRY(ilogb) + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + testl $0x7ff00000,%eax / is bexp(x) 0? + jz .bexp_0 / jump if x is 0 or subnormal + / biased exponent is non-zero + cmpl $0x7ff00000,%eax / is bexp(x) 0x7ff? + jae .bexp_all_1 / jump if x is NaN or Inf + shrl $20,%eax / eax <-- bexp(x) + subl $1023,%eax / unbias exponent by 1023 + ret + +.bexp_all_1: + movl $0x7fffffff,%eax / x is NaN or inf, so return 0x7fffffff + jmp 0f + +.bexp_0: + orl 4(%esp),%eax / test whether x is 0 + jnz .ilogb_subnorm + movl $0x80000001,%eax / x is +/-0, so return 0x80000001 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwl,__xpg6,ecx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + +.ilogb_subnorm: / subnormal input + fldl 4(%esp) / push x + PIC_SETUP(1) + fmull PIC_L(two52) / x*2**52 + PIC_WRAPUP + subl $8,%esp / set up storage area + fstpl (%esp) / store x*2**52 in storage are + movl $0x7ff00000,%eax + andl 4(%esp),%eax + shrl $20,%eax / extract exponent of x*2**52 + subl $1075,%eax / unbias it by 1075 (= 1023 + 52) + addl $8,%esp + ret + .align 4 + SET_SIZE(ilogb) diff --git a/usr/src/libm/src/i386/common/ilogbf.s b/usr/src/libm/src/i386/common/ilogbf.s new file mode 100644 index 0000000..43104d7 --- /dev/null +++ b/usr/src/libm/src/i386/common/ilogbf.s @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ilogbf.s 1.6 06/01/23 SMI" + + .file "ilogbf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogbf,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 8 +two23: .long 0x4b000000 / 2**23 + + ENTRY(ilogbf) + movl 4(%esp),%eax / eax <-- x + testl $0x7f800000,%eax / is bexp(x) 0? + jz .bexp_0 / jump if x is 0 or subnormal + / here, biased exponent is non-zero + andl $0x7fffffff,%eax / eax <-- abs(x) + cmpl $0x7f800000,%eax / is bexp(x) 0xff? + jae .bexp_all_1 / jump if x is NaN or Inf + shrl $23,%eax / eax <-- zero_xtnd(bexp(x)) + subl $127,%eax / unbias exponent by 127 + ret + +.bexp_all_1: + movl $0x7fffffff,%eax / x is NaN or inf, so return 0x7fffffff + jmp 0f + +.bexp_0: + andl $0x7fffffff,%eax / eax <-- abs(x), and + / ZF = 1 iff x = 0.0 + jnz .ilogb_subnorm + movl $0x80000001,%eax / x is +/-0, so return 0x80000001 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwl,__xpg6,ecx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + +.ilogb_subnorm: / subnormal input + flds 4(%esp) / push x + PIC_SETUP(1) + fmuls PIC_L(two23) / x*2**23; rebias x by 127+23, + / instead of 127 + PIC_WRAPUP + subl $4,%esp / set up storage area + fstps (%esp) / store x*2**23 in storage area + fwait / (shouldn't raise exception, but + / just in case) + movl $0x7f800000,%eax / eax <-- single_bexp_mask + andl (%esp),%eax / eax[23..30] <-- bexp(x*2**23), + / rest_of(eax) <-- 0 + shrl $23,%eax / eax <-- zero_xtnd(bexp(x*2**23)) + subl $150,%eax / unbias rebiased x by 150 (= 127 + 23) + addl $4,%esp / restore stack for caller + ret + .align 4 + SET_SIZE(ilogbf) diff --git a/usr/src/libm/src/i386/common/ilogbl.s b/usr/src/libm/src/i386/common/ilogbl.s new file mode 100644 index 0000000..6c63009 --- /dev/null +++ b/usr/src/libm/src/i386/common/ilogbl.s @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)ilogbl.s 1.6 06/01/23 SMI" + + .file "ilogbl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(ilogbl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + + .data + .align 8 +two63: .long 0x0,0x43d00000 / 2**63 + + ENTRY(ilogbl) + movl 12(%esp),%eax / eax <-- sign and bexp of x + andl $0x00007fff,%eax / eax <-- bexp(x) + jz .bexp_0 / jump iff x is 0 or subnormal + / here, biased exponent is non-zero + testl $0x80000000,8(%esp) / test msb of hi_32(sgnfcnd(x)) + jz .ilogbl_not_finite / jump if unsupported format + cmpl $0x00007fff,%eax + je .ilogbl_not_finite + subl $16383,%eax / unbias exponent by 16383 = 0x3fff + ret + +.ilogbl_not_finite: + movl $0x7fffffff,%eax / x is NaN/inf/unsup + jmp 0f + +.bexp_0: + movl 8(%esp),%eax / eax <-- hi_32(sgnfcnd(x)) + orl 4(%esp),%eax / test whether x is 0 + jnz .ilogbl_subnorm / jump iff x is subnormal + movl $0x80000001,%eax / x is +/-0, so return 0x80000001 +0: + PIC_SETUP(0) + PIC_G_LOAD(movzwl,__xpg6,ecx) + PIC_WRAPUP + andl $_C99SUSv3_ilogb_0InfNaN_raises_invalid,%ecx + cmpl $0,%ecx + je 1f + fldz + fdivp %st,%st(0) / raise invalid as per SUSv3 +1: + ret + + +.ilogbl_subnorm: / subnormal or pseudo-denormal input + fldt 4(%esp) / push x, setting D-flag + PIC_SETUP(1) + fmull PIC_L(two63) / x*2**63 + PIC_WRAPUP + subl $12,%esp + fstpt (%esp) + movl $0x00007fff,%eax + andl 8(%esp),%eax / eax <-- sign and bexp of x*2**63 + subl $16445,%eax / unbias it by (16,383 + 63) + addl $12,%esp + ret + .align 4 + SET_SIZE(ilogbl) diff --git a/usr/src/libm/src/i386/common/isnan.s b/usr/src/libm/src/i386/common/isnan.s new file mode 100644 index 0000000..8e5eddb --- /dev/null +++ b/usr/src/libm/src/i386/common/isnan.s @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)isnan.s 1.9 06/01/23 SMI" + + .file "isnan.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isnan,function) + .weak _isnan + .type _isnan,@function +_isnan = __isnan + .weak _isnand + .type _isnand,@function +_isnand = __isnan + .weak isnand + .type isnand,@function +isnand = __isnan +#include "libm_synonyms.h" + + ENTRY(isnan) + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + subl $0x7ff00000,%eax / weed out finite values + jae .nan_or_inf / no jump if arg. is finite + movl $0,%eax / ansi needs (eax) = 0 + ret +.nan_or_inf: + ja .got_nan / no jump if arg. may be infinite; + / let nan waste time + / (eax) = 0 here + testl $0xffffffff,4(%esp) / ZF <-- 1 iff lo_frac. = 0 + / iff arg. is infinite + jnz .got_nan / no jump if arg. is infinite; + ret +.got_nan: + movl $1,%eax / %eax was 0, must be made 1 to + / indicate TRUE + ret + .align 4 + SET_SIZE(isnan) diff --git a/usr/src/libm/src/i386/common/isnanf.s b/usr/src/libm/src/i386/common/isnanf.s new file mode 100644 index 0000000..26aeb32 --- /dev/null +++ b/usr/src/libm/src/i386/common/isnanf.s @@ -0,0 +1,54 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)isnanf.s 1.8 06/01/23 SMI" + + .file "isnanf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isnanf,function) + .weak _isnanf + .type _isnanf,@function +_isnanf = __isnanf +#include "libm_synonyms.h" + + ENTRY(isnanf) + movl 4(%esp),%eax / eax <-- x + andl $0x7fffffff,%eax / eax <-- abs(x) + subl $0x7f800000,%eax / ZF <-- 1 iff x is infinite + jae .nan_or_inf / no jump iff arg. is finite + movl $0,%eax + ret +.nan_or_inf: + jnz .got_nan / no jump if arg. infinite; + / let nan waste time + ret / %eax = 0 here +.got_nan: + movl $1,%eax / %eax was 0, must be made 1 to + / indicate TRUE + ret + .align 4 + SET_SIZE(isnanf) + diff --git a/usr/src/libm/src/i386/common/isnanl.s b/usr/src/libm/src/i386/common/isnanl.s new file mode 100644 index 0000000..c040913 --- /dev/null +++ b/usr/src/libm/src/i386/common/isnanl.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)isnanl.s 1.8 06/01/23 SMI" + + .file "isnanl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(isnanl,function) +#include "libm_synonyms.h" + + ENTRY(isnanl) + movl 12(%esp),%eax / ax <-- sign bit and exp + andl $0x00007fff,%eax + jz .not_nan / jump if exp is all 0 + xorl $0x00007fff,%eax + jz .nan_or_inf / jump if exp is all 1 + testl $0x80000000,8(%esp) + jz .got_nan / jump if leading bit is 0 + movl $0,%eax +.not_nan: + ret +.nan_or_inf: / note that %eax = 0 from before + cmpl $0x80000000,8(%esp) / what is first half of significand? + jnz .got_nan / jump if not equal to 0x80000000 + testl $0xffffffff,4(%esp) / is second half of significand 0? + jnz .got_nan / jump if not equal to 0 + ret +.got_nan: + movl $1,%eax + ret + .align 4 + SET_SIZE(isnanl) diff --git a/usr/src/libm/src/i386/common/libm.m4 b/usr/src/libm/src/i386/common/libm.m4 new file mode 100644 index 0000000..63e7d7d --- /dev/null +++ b/usr/src/libm/src/i386/common/libm.m4 @@ -0,0 +1,445 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ +/ @(#)libm.m4 1.34 06/01/31 SMI +/ +undefine(`_C')dnl +define(`_C',`')dnl +define(NAME,$1)dnl +dnl +ifdef(`LOCALLIBM',` + .inline NAME(__ieee754_sqrt),0 + fldl (%esp) + fsqrt + .end +/ + .inline NAME(__inline_rint),0 + fldl (%esp) + movl 4(%esp),%eax + andl $0x7fffffff,%eax + cmpl $0x43300000,%eax + jae 1f + frndint +1: + fwait / in case we jumped around the frndint + .end +/ + .inline NAME(__inline_sqrtf),0 + flds (%esp) + fsqrt + .end +/ + .inline NAME(__inline_sqrt),0 + fldl (%esp) + fsqrt + .end +/ + .inline NAME(__inline_fstsw),0 + fstsw %ax + .end +/ +/ 00 - 24 bits +/ 01 - reserved +/ 10 - 53 bits +/ 11 - 64 bits +/ + .inline NAME(__swapRP),0 + subl $4,%esp + fstcw (%esp) + movw (%esp),%ax + movw %ax,%cx + andw $0xfcff,%cx + movl 4(%esp),%edx /// + andl $0x3,%edx + shlw $8,%dx + orw %dx,%cx + movl %ecx,(%esp) + fldcw (%esp) + shrw $8,%ax + andl $0x3,%eax + addl $4,%esp + .end +/ +/ 00 - Round to nearest, with even preferred +/ 01 - Round down +/ 10 - Round up +/ 11 - Chop +/ + .inline NAME(__swap87RD),0 + subl $4,%esp + fstcw (%esp) + movw (%esp),%ax + movw %ax,%cx + andw $0xf3ff,%cx + movl 4(%esp),%edx + andl $0x3,%edx + shlw $10,%dx + orw %dx,%cx + movl %ecx,(%esp) + fldcw (%esp) + shrw $10,%ax + andl $0x3,%eax + addl $4,%esp + .end +') +/ +/ Convert Top-of-Stack to long +/ + .inline NAME(__xtol),0 + subl $8,%esp / 8 bytes of stack space + fstcw 2(%esp) / byte[2:3] = old_cw + movw 2(%esp),%ax + andw $0xf3ff,%ax + orw $0x0c00,%ax / RD set to Chop + movw %ax,(%esp) / byte[0:1] = new_cw + fldcw (%esp) / set new_cw + fistpl 4(%esp) / byte[4:7] = converted long + fstcw (%esp) / restore old RD + movw (%esp),%ax + andw $0xf3ff,%ax + movw 2(%esp),%dx + andw $0x0c00,%dx + orw %ax,%dx + movw %dx,2(%esp) + fldcw 2(%esp) + movl 4(%esp),%eax + addl $8,%esp + .end +/ + .inline NAME(ceil),0 + subl $8,%esp + fstcw (%esp) + fldl 8(%esp) /// + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0400,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = up + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) + addl $8,%esp + .end +/ + .inline NAME(copysign),0 + movl 4(%esp),%eax /// eax <-- hi_32(x) + movl 12(%esp),%ecx /// ecx <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + andl $0x80000000,%ecx / ecx[31] <-- sign_bit(y) + orl %ecx,%eax / eax <-- hi_32(copysign(x,y)) + movl (%esp),%ecx /// ecx <-- lo_32(x) + / = lo_32(copysign(x,y)) + subl $8,%esp / set up loading dock for result + movl %ecx,(%esp) / copy lo_32(result) to loading dock + movl %eax,4(%esp) / copy hi_32(result) to loading dock + fldl (%esp) / load copysign(x,y) + fwait / in case fldl causes exception + addl $8,%esp / restore stack-pointer + .end +/ + .inline NAME(d_sqrt_),0 + movl (%esp),%eax + fldl (%eax) + fsqrt + .end +/ + .inline NAME(fabs),0 + fldl (%esp) /// +ifdef(`LOCALLIBM',` +#undef fabs +') + fabs + .end +/ + .inline NAME(fabsf),0 + flds (%esp) + fabs + .end +/ + .inline NAME(fabsl),0 + fldt (%esp) + fabs + .end +/ +/ branchless finite +/ + .inline NAME(finite),0 + movl 4(%esp),%eax /// eax <-- hi_32(x) + notl %eax / not(bexp) = 0 iff bexp = all 1's + andl $0x7ff00000,%eax + negl %eax + shrl $31,%eax + .end +/ + .inline NAME(floor),0 + subl $8,%esp + fstcw (%esp) + fldl 8(%esp) /// + movw (%esp),%cx + orw $0x0c00,%cx + xorw $0x0800,%cx + movw %cx,4(%esp) + fldcw 4(%esp) / set RD = down + frndint + fstcw 4(%esp) / restore RD + movw 4(%esp),%dx + andw $0xf3ff,%dx + movw (%esp),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,(%esp) + fldcw (%esp) / restore RD + addl $8,%esp + .end +/ +/ branchless isnan +/ ((0x7ff00000-[((lx|-lx)>>31)&1]|ahx)>>31)&1 = 1 iff x is NaN +/ + .inline NAME(isnan),0 + movl (%esp),%eax /// eax <-- lo_32(x) + movl %eax,%ecx + negl %ecx / ecx <-- -lo_32(x) + orl %ecx,%eax + shrl $31,%eax / 1 iff lx != 0 + movl 4(%esp),%ecx /// ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(abs(x)) + orl %ecx,%eax + subl $0x7ff00000,%eax + negl %eax + shrl $31,%eax + .end +/ + .inline NAME(isnanf),0 + movl (%esp),%eax + andl $0x7fffffff,%eax + negl %eax + addl $0x7f800000,%eax + shrl $31,%eax + .end +/ + .inline NAME(isinf),0 + movl 4(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / set first bit to 0 + cmpl $0x7ff00000,%eax + pushfl + popl %eax + cmpl $0,(%esp) / is lo_32(x) = 0? + pushfl + popl %ecx / bit 6 of ecx <-- lo_32(x) == 0 + andl %ecx,%eax + andl $0x40,%eax + shrl $6,%eax + .end +/ + .inline NAME(isnormal),0 + / TRUE iff (x is finite, but + / neither subnormal nor +/-0) + / iff (0 < bexp(x) < 0x7ff) + movl 4(%esp),%eax / eax <-- hi_32(x) + andl $0x7ff00000,%eax / eax[20..30] <-- bexp(x), + / rest_of(eax) <-- 0 + pushfl + popl %ecx / bit 6 of ecx <-- not bexp(x) + subl $0x7ff00000,%eax + pushfl + popl %eax / bit 6 of eax <-- not bexp(x) + orl %ecx,%eax + andl $0x40,%eax + xorl $0x40,%eax + shrl $6,%eax + .end +/ + .inline NAME(issubnormal),0 + / TRUE iff (bexp(x) = 0 and + / frac(x) /= 0) + movl $0,%eax + movl 4(%esp),%ecx / ecx <-- hi_32(x) + andl $0x7fffffff,%ecx / ecx <-- hi_32(abs(x)) + cmpl $0x00100000,%ecx / is bexp(x) = 0? + adcl $0,%eax / jump if bexp(x) = 0 + orl (%esp),%ecx / = 0 iff sgnfcnd(x) = 0 + / iff x = +/- 0.0 here + pushfl + popl %ecx + andl $0x40,%ecx + xorl $0x40,%ecx + shrl $6,%ecx + andl %ecx,%eax + .end +/ + .inline NAME(iszero),0 + movl 4(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(abs(x)) + orl (%esp),%eax / = 0 iff x = +/- 0.0 + pushfl + popl %eax + andl $0x40,%eax + shrl $6,%eax + .end +/ + .inline NAME(r_sqrt_),0 + movl (%esp),%eax + flds (%eax) + fsqrt + .end +/ + .inline NAME(rint),0 + fldl (%esp) + movl 4(%esp),%eax + andl $0x7fffffff,%eax + cmpl $0x43300000,%eax + jae 1f + frndint +1: + fwait / in case we jumped around frndint + .end +/ + .inline NAME(scalbn),0 + fildl 8(%esp) /// convert N to extended + fldl (%esp) /// push x + fscale + fstp %st(1) + .end +/ + .inline NAME(signbit),0 + movl 4(%esp),%eax /// high part of x + shrl $31,%eax + .end +/ + .inline NAME(signbitf),0 + movl (%esp),%eax + shrl $31,%eax + .end +/ + .inline NAME(sqrt),0 + fldl (%esp) + fsqrt + .end +/ + .inline NAME(sqrtf),0 + flds (%esp) + fsqrt + .end +/ + .inline NAME(sqrtl),0 + fldt (%esp) + fsqrt + .end +/ + .inline NAME(isnanl),0 + movl 8(%esp),%eax / ax <-- sign bit and exp + andl $0x00007fff,%eax + jz 1f / jump if exp is all 0 + xorl $0x00007fff,%eax + jz 2f / jump if exp is all 1 + testl $0x80000000,4(%esp) + jz 3f / jump if leading bit is 0 + movl $0,%eax + jmp 1f +2: / note that %eax = 0 from before + cmpl $0x80000000,4(%esp) / what is first half of significand? + jnz 3f / jump if not equal to 0x80000000 + testl $0xffffffff,(%esp) / is second half of significand 0? + jnz 3f / jump if not equal to 0 + jmp 1f +3: + movl $1,%eax +1: + .end +/ + .inline NAME(__f95_signf),0 + sub $4,%esp + mov 4(%esp),%edx + mov (%edx),%eax + and $0x7fffffff,%eax + mov 8(%esp),%edx + mov (%edx),%ecx + and $0x80000000,%ecx + or %ecx,%eax + mov %eax,(%esp) + flds (%esp) + add $4,%esp + .end +/ + .inline NAME(__f95_sign),0 + mov (%esp),%edx + fldl (%edx) + fabs + mov 4(%esp),%edx + mov 4(%edx),%eax + test %eax,%eax + jns 1f + fchs +1: + .end +/ +ifdef(`LOCALLIBM',`',`dnl + .inline exp,0 + movl 4(%esp),%ecx + andl $0x7fffffff,%ecx + cmpl $0x3fe62e42,%ecx + jae 1f + fldl (%esp) _C(x) + fldl2e _C(log2e , x) + fmulp %st,%st(1) _C(x*log2e) + f2xm1 _C(2**(x*log2(e))-1 = exp(x)-1) + fld1 _C(1 , exp(x)-1) + faddp %st,%st(1) _C(exp(x)) + jmp 3f +1: + cmpl $0x7ff00000,%ecx + jae 1f + fldl (%esp) _C(x) + fldl2e _C(log2e , x) + fmulp %st,%st(1) _C(z:=x*log2e) + fld %st(0) _C(z , z) + frndint _C([z] , z) + fxch _C(z , [z]) + fsub %st(1),%st _C(z-[z] , [z]) + f2xm1 _C(2**(z-[z])-1 , [z]) + fld1 _C(1 , 2**(z-[z])-1 , [z]) + faddp %st,%st(1) _C(2**(z-[z]) , [z]) + fscale _C(exp(x) , [z]) + fstp %st(1) _C(exp(x)) + jmp 3f +1: + ja 2f + movl (%esp),%edx + cmpl $0,%edx + jne 2f + movl 4(%esp),%eax + andl $0x80000000,%eax + jz 2f + fldz + jmp 3f +2: + fldl (%esp) +3: + .end +')dnl diff --git a/usr/src/libm/src/i386/common/llrint.s b/usr/src/libm/src/i386/common/llrint.s new file mode 100644 index 0000000..fa49cbe --- /dev/null +++ b/usr/src/libm/src/i386/common/llrint.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)llrint.s 1.4 06/01/23 SMI" + + .file "llrint.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(llrint,function) +#include "libm_synonyms.h" + + ENTRY(llrint) + movl %esp,%ecx + subl $8,%esp + fldl 4(%ecx) / load x + fistpll -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + movl -4(%ecx),%edx + addl $8,%esp + ret + .align 4 + SET_SIZE(llrint) diff --git a/usr/src/libm/src/i386/common/llrintf.s b/usr/src/libm/src/i386/common/llrintf.s new file mode 100644 index 0000000..cf81189 --- /dev/null +++ b/usr/src/libm/src/i386/common/llrintf.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)llrintf.s 1.4 06/01/23 SMI" + + .file "llrintf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(llrintf,function) +#include "libm_synonyms.h" + + ENTRY(llrintf) + movl %esp,%ecx + subl $8,%esp + flds 4(%ecx) / load x + fistpll -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + movl -4(%ecx),%edx + addl $8,%esp + ret + .align 4 + SET_SIZE(llrintf) diff --git a/usr/src/libm/src/i386/common/llrintl.s b/usr/src/libm/src/i386/common/llrintl.s new file mode 100644 index 0000000..fb47882 --- /dev/null +++ b/usr/src/libm/src/i386/common/llrintl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)llrintl.s 1.4 06/01/23 SMI" + + .file "llrintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(llrintl,function) +#include "libm_synonyms.h" + + ENTRY(llrintl) + movl %esp,%ecx + subl $8,%esp + fldt 4(%ecx) / load x + fistpll -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + movl -4(%ecx),%edx + addl $8,%esp + ret + .align 4 + SET_SIZE(llrintl) diff --git a/usr/src/libm/src/i386/common/log.s b/usr/src/libm/src/i386/common/log.s new file mode 100644 index 0000000..82bd2ec --- /dev/null +++ b/usr/src/libm/src/i386/common/log.s @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log.s 1.10 06/01/23 SMI" + + .file "log.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(log) + fldln2 / loge(2) + movl 8(%esp),%eax / eax <-- hi_32(x) + testl $0x80000000,%eax + jnz .maybe_0_or_less + testl $0x7fffffff,%eax + jz .maybe_0 + fldl 4(%esp) / arg, loge(2) + fyl2x / loge(2)*log2(arg); ln(arg) + ret + +.maybe_0: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + je .zero / no branch if x is +denormal +.neg_nan_reentry: + fldl 4(%esp) / arg, loge(2) + fyl2x / loge(2)*log2(arg); ln(arg) + ret + +.zero_or_less: + / x =< 0 + testl $0x7fffffff,%eax + jnz .less_than_0 + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + jne .less_than_0 / branch if x is -denormal +.zero: + / x = +/-0 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $16 + jmp .merge + +.maybe_0_or_less: + cmpl $0xfff00000,%eax / -INF below hi_32(x)? + ja .neg_nan_reentry + jb .zero_or_less + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx / is x NaN or -INF? + jne .neg_nan_reentry / branch if x is NaN with signbit = 1 + / x = -INF +.less_than_0: + pushl %ebp + movl %esp,%ebp + PIC_SETUP(2) + pushl $17 +.merge: + fstp %st(0) / stack empty + pushl 12(%ebp) + pushl 8(%ebp) + pushl 12(%ebp) + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(log) diff --git a/usr/src/libm/src/i386/common/log10.s b/usr/src/libm/src/i386/common/log10.s new file mode 100644 index 0000000..09c566c --- /dev/null +++ b/usr/src/libm/src/i386/common/log10.s @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log10.s 1.10 06/01/23 SMI" + + .file "log10.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(log10) + fldlg2 / log10(2) + movl 8(%esp),%eax / eax <-- hi_32(x) + testl $0x80000000,%eax + jnz .maybe_0_or_less + testl $0x7fffffff,%eax + jz .maybe_0 + fldl 4(%esp) / arg, log10(2) + fyl2x / log10(2)*log2(arg); log10(arg) + ret + +.maybe_0: + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + je .zero / no branch if x is +denormal +.neg_nan_reentry: + fldl 4(%esp) / arg, log10(2) + fyl2x / log10(2)*log2(arg); log10(arg) + ret + +.zero_or_less: + / x =< 0 + testl $0x7fffffff,%eax + jnz .less_than_0 + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx + jne .less_than_0 / branch if x is -denormal +.zero: + / x = +/-0 + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + pushl $18 + jmp .merge + +.maybe_0_or_less: + cmpl $0xfff00000,%eax / -INF below hi_32(x)? + ja .neg_nan_reentry + jb .zero_or_less + movl 4(%esp),%ecx / ecx <-- lo_32(x) + cmpl $0,%ecx / is x NaN or -INF? + jne .neg_nan_reentry / branch if x is NaN with signbit = 1 + / x = -INF +.less_than_0: + pushl %ebp + movl %esp,%ebp + PIC_SETUP(2) + pushl $19 +.merge: + fstp %st(0) / stack empty + pushl 12(%ebp) + pushl 8(%ebp) + pushl 12(%ebp) + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(log10) diff --git a/usr/src/libm/src/i386/common/log10f.s b/usr/src/libm/src/i386/common/log10f.s new file mode 100644 index 0000000..83c89a6 --- /dev/null +++ b/usr/src/libm/src/i386/common/log10f.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log10f.s 1.9 06/01/23 SMI" + + .file "log10f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10f,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(log10f) + fldlg2 + flds 4(%esp) / st = arg, st(1) = log10(2) + fyl2x / st = log10(arg) = log10(2)*log2(arg) + ret + .align 4 + SET_SIZE(log10f) diff --git a/usr/src/libm/src/i386/common/log10l.s b/usr/src/libm/src/i386/common/log10l.s new file mode 100644 index 0000000..47a9997 --- /dev/null +++ b/usr/src/libm/src/i386/common/log10l.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log10l.s 1.6 06/01/23 SMI" + + .file "log10l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log10l,function) +#include "libm_synonyms.h" + + ENTRY(log10l) + fldlg2 + fldt 4(%esp) / st = arg, st(1) = log10(2) + fyl2x / st = log10(arg) = log10(2)*log2(arg) + ret + .align 4 + SET_SIZE(log10l) diff --git a/usr/src/libm/src/i386/common/log2.s b/usr/src/libm/src/i386/common/log2.s new file mode 100644 index 0000000..e7b4f11 --- /dev/null +++ b/usr/src/libm/src/i386/common/log2.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log2.s 1.6 06/01/23 SMI" + + .file "log2.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2,function) +#include "libm_synonyms.h" + + ENTRY(log2) + fld1 / push 1.0 + fldl 4(%esp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 4 + SET_SIZE(log2) diff --git a/usr/src/libm/src/i386/common/log2f.s b/usr/src/libm/src/i386/common/log2f.s new file mode 100644 index 0000000..e6af050 --- /dev/null +++ b/usr/src/libm/src/i386/common/log2f.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log2f.s 1.6 06/01/23 SMI" + + .file "log2f.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2f,function) +#include "libm_synonyms.h" + + ENTRY(log2f) + fld1 / push 1.0 + flds 4(%esp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 4 + SET_SIZE(log2f) diff --git a/usr/src/libm/src/i386/common/log2l.s b/usr/src/libm/src/i386/common/log2l.s new file mode 100644 index 0000000..bee4d37 --- /dev/null +++ b/usr/src/libm/src/i386/common/log2l.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)log2l.s 1.6 06/01/23 SMI" + + .file "log2l.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(log2l,function) +#include "libm_synonyms.h" + + ENTRY(log2l) + fld1 / push 1.0 + fldt 4(%esp) / push x + fyl2x / st = 1.0*log2(arg) + ret + .align 4 + SET_SIZE(log2l) diff --git a/usr/src/libm/src/i386/common/logl.s b/usr/src/libm/src/i386/common/logl.s new file mode 100644 index 0000000..ac64675 --- /dev/null +++ b/usr/src/libm/src/i386/common/logl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)logl.s 1.7 06/01/23 SMI" + + .file "logl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(logl,function) +#include "libm_synonyms.h" + + ENTRY(logl) + fldln2 + fldt 4(%esp) / st = arg, st(1) = loge(2) + fyl2x / st = ln(arg) = loge(2)*log2(arg) + ret + .align 4 + SET_SIZE(logl) diff --git a/usr/src/libm/src/i386/common/lrint.s b/usr/src/libm/src/i386/common/lrint.s new file mode 100644 index 0000000..aefab57 --- /dev/null +++ b/usr/src/libm/src/i386/common/lrint.s @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)lrint.s 1.3 06/01/23 SMI" + + .file "lrint.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lrint,function) +#include "libm_synonyms.h" + + ENTRY(lrint) + movl %esp,%ecx + subl $8,%esp + fldl 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(lrint) diff --git a/usr/src/libm/src/i386/common/lrintf.s b/usr/src/libm/src/i386/common/lrintf.s new file mode 100644 index 0000000..1a65d3d --- /dev/null +++ b/usr/src/libm/src/i386/common/lrintf.s @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)lrintf.s 1.3 06/01/23 SMI" + + .file "lrintf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lrintf,function) +#include "libm_synonyms.h" + + ENTRY(lrintf) + movl %esp,%ecx + subl $8,%esp + flds 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(lrintf) diff --git a/usr/src/libm/src/i386/common/lrintl.s b/usr/src/libm/src/i386/common/lrintl.s new file mode 100644 index 0000000..4a5203c --- /dev/null +++ b/usr/src/libm/src/i386/common/lrintl.s @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)lrintl.s 1.3 06/01/23 SMI" + + .file "lrintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lrintl,function) +#include "libm_synonyms.h" + + ENTRY(lrintl) + movl %esp,%ecx + subl $8,%esp + fldt 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(lrintl) diff --git a/usr/src/libm/src/i386/common/lround.s b/usr/src/libm/src/i386/common/lround.s new file mode 100644 index 0000000..981335a --- /dev/null +++ b/usr/src/libm/src/i386/common/lround.s @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)lround.s 1.3 06/01/23 SMI" + + .file "lround.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lround,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(lround) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldl 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + fstp %st(0) +1: + fistpl -8(%ecx) + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret +2: + / x = n+0.5, recompute lround(x) as x+sign(x)*0.5 + fldl 4(%ecx) / x, 0.5, [x] + movl 8(%ecx),%eax / high part of x + andl $0x80000000,%eax + jnz 3f + fadd + fstp %st(1) + jmp 1b +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + fstp %st(1) + jmp 1b + .align 4 + SET_SIZE(lround) diff --git a/usr/src/libm/src/i386/common/lroundl.s b/usr/src/libm/src/i386/common/lroundl.s new file mode 100644 index 0000000..23e3acf --- /dev/null +++ b/usr/src/libm/src/i386/common/lroundl.s @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)lroundl.s 1.4 06/01/23 SMI" + + .file "lroundl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(lroundl,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(lroundl) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldt 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + fstp %st(0) +1: + fistpl -8(%ecx) + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret +2: + / x = n+0.5, recompute lroundl(x) as x+sign(x)*0.5 + fldt 4(%ecx) / x, 0.5, [x] + movw 12(%ecx),%ax / sign+exp part of x + andw $0x8000,%ax / look at sign bit + jnz 3f + fadd + fstp %st(1) + jmp 1b +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + fstp %st(1) + jmp 1b + .align 4 + SET_SIZE(lroundl) diff --git a/usr/src/libm/src/i386/common/nextafter.s b/usr/src/libm/src/i386/common/nextafter.s new file mode 100644 index 0000000..ed1975f --- /dev/null +++ b/usr/src/libm/src/i386/common/nextafter.s @@ -0,0 +1,133 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)nextafter.s 1.12 06/01/23 SMI" + + .file "nextafter.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafter,function) + .weak _nextafter + .type _nextafter,@function +_nextafter = __nextafter +#include "libm_synonyms.h" +#include "libm_protos.h" + + .data + .align 8 +Fmin: .long 0x1,0x0 +ftmp: .long 0,0 /// WILL WRITE INTO + + + ENTRY(nextafter) + pushl %ebp + movl %esp,%ebp + fldl 16(%ebp) / y + subl $8,%esp + fldl 8(%ebp) / load x + fucom / x : y + fstsw %ax + sahf + jp .NaN + je .equal + fstp %st(1) / x + ja .bigger + / x < y + ftst + movl $1,%ecx /// Fmin + movl %ecx,-8(%ebp) + movl $0,%ecx /// Fmin+4 + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .final + ja .addulp + jb .subulp +.bigger: + / x > y + ftst + movl $1,%ecx /// Fmin + movl %ecx,-8(%ebp) + movl $0,%ecx /// Fmin+4 + xorl $0x80000000,%ecx + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .final + jb .addulp +.subulp: + movl 8(%ebp),%eax / low x + movl 12(%ebp),%ecx / high x + subl $1,%eax / low x - ulp + movl %eax,-8(%ebp) + sbbl $0x0,%ecx + movl %ecx,-4(%ebp) + jmp .final +.addulp: + movl 8(%ebp),%eax / low x + movl 12(%ebp),%ecx / high x + addl $1,%eax / low x + ulp + movl %eax,-8(%ebp) + adcl $0x0,%ecx + movl %ecx,-4(%ebp) + +.final: + fstp %st(0) + fldl -8(%ebp) + andl $0x7ff00000,%ecx + jz .underflow + cmpl $0x7ff00000,%ecx + je .overflow + jmp .return +.overflow: + PIC_SETUP(1) + pushl $46 + fstp %st(0) / stack empty + pushl -4(%ebp) + pushl -8(%ebp) + pushl -4(%ebp) + pushl -8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + jmp .return +.underflow: + PIC_SETUP(2) + fldl PIC_L(Fmin) + fmul %st(0),%st + fstpl PIC_L(ftmp) / create underflow signal + PIC_WRAPUP + jmp .return +.equal: + fstp %st(0) / C99 says to return y when x == y + jmp .return +.NaN: + faddp %st,%st(1) / x+y,x +.return: + fwait + leave + ret + .align 4 + SET_SIZE(nextafter) diff --git a/usr/src/libm/src/i386/common/nextafterf.s b/usr/src/libm/src/i386/common/nextafterf.s new file mode 100644 index 0000000..074c549 --- /dev/null +++ b/usr/src/libm/src/i386/common/nextafterf.s @@ -0,0 +1,113 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)nextafterf.s 1.10 06/01/23 SMI" + + .file "nextafterf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafterf,function) +#include "libm_synonyms.h" + + .data + .align 4 +Fmaxf: .long 0x7f7fffff +Fminf: .long 0x1 +ftmpf: .long 0 + + + ENTRY(nextafterf) + pushl %ebp + movl %esp,%ebp + movl $0,%eax /// upper half of %eax must be initialized + flds 12(%ebp) / y + subl $4,%esp + flds 8(%ebp) / x, y + fucom / x : y + fstsw %ax + sahf + jp .NaN + je .equal + fstp %st(1) / x + ja .bigger + / x < y + ftst / x : 0 + movl $0x1,-4(%ebp) / -4(%ebp) contains Fminf + fnstsw %ax + sahf + je .final + ja .addulp + jb .subulp +.bigger: + / x > y + ftst / x : 0 + movl $0x80000001,-4(%ebp) / -4(%ebp) contains -Fminf + fnstsw %ax + sahf + je .final + jb .addulp +.subulp: + movl 8(%ebp),%eax / x + subl $1,%eax / x - ulp + movl %eax,-4(%ebp) + jmp .final +.addulp: + movl 8(%ebp),%eax / x + addl $1,%eax / x + ulp + movl %eax,-4(%ebp) + +.final: + fstp %st(0) / empty + flds -4(%ebp) / z + andl $0x7f800000,%eax + jz .underflow + cmpl $0x7f800000,%eax + je .overflow + jmp .return +.overflow: + PIC_SETUP(1) + flds PIC_L(Fmaxf) / Fmaxf, z + fmul %st(0),%st / overflow-to-Inf, z + fstps PIC_L(ftmpf) / z & create overflow signal + PIC_WRAPUP + jmp .return +.underflow: + PIC_SETUP(2) + flds PIC_L(Fminf) / Fminf, z + fmul %st(0),%st / underflow-to-0, z + fstps PIC_L(ftmpf) / z & create underflow signal + PIC_WRAPUP + jmp .return +.equal: + fstp %st(0) / C99 says to return y when x == y + jmp .return +.NaN: + faddp %st,%st(1) / x+y +.return: + fwait + leave + ret + .align 4 + SET_SIZE(nextafterf) diff --git a/usr/src/libm/src/i386/common/nextafterl.s b/usr/src/libm/src/i386/common/nextafterl.s new file mode 100644 index 0000000..103dfce --- /dev/null +++ b/usr/src/libm/src/i386/common/nextafterl.s @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)nextafterl.s 1.14 06/01/23 SMI" + + .file "nextafterl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafterl,function) +#include "libm_synonyms.h" + + .section .rodata + .align 4 +.LFmaxl: .long 0xffffffff,0xffffffff,0x00007ffe +.LFminl: .long 0x1,0x0,0x0 + + + ENTRY(nextafterl) + pushl %ebp + movl %esp,%ebp + fldt 20(%ebp) / y + subl $12,%esp + fldt 8(%ebp) / load x + fucom / x : y + fstsw %ax + sahf + jp .LNaN + je .Lequal + fstp %st(1) / x + ja .Lbigger + / x < y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains Fminl + movl $0,-8(%ebp) + movl $0,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + ja .Laddulp + jb .Lsubulp +.Lbigger: + / x > y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains -Fminl + movl $0,-8(%ebp) + movl $0x00008000,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + jb .Laddulp +.Lsubulp: + movl 12(%ebp),%edx / high word of significand of x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal / zero value implies pseudonormal + addl $1,%ecx / if pseudonormal, turn into equivalent normal +.Lnot_pseudonormal: + movl 8(%ebp),%eax / low x + subl $1,%eax / low x - ulp + movl %eax,-12(%ebp) + cmpl $0xffffffff,%eax / this means low x was 0 + jz .Lborrow + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lborrow: + cmpl $0x80000000,%edx / look at high x + je .Lsecond_borrow + subl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_borrow: + movl %ecx,%eax + andl $0x7fff,%eax / look at exp x without sign bit + cmpl $1,%eax + jbe .Lsubnormal_result / exp > 1 ==> result will be normal + movl $0xffffffff,-8(%ebp) + subl $1,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsubnormal_result: + movl $0x7fffffff,-8(%ebp) + movl %ecx,%eax + andl $0x8000,%eax / look at sign bit + jz .Lpositive + movl $0x8000,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lpositive: + movl $0,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Laddulp: + movl 12(%ebp),%edx / high x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal_2 / zero value implies pseudonormal + addl $1,%ecx +.Lnot_pseudonormal_2: + movl 8(%ebp),%eax / low x + addl $1,%eax / low x + ulp + movl %eax,-12(%ebp) + jz .Lcarry / jump if the content of %eax is 0 + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lcarry: + movl %edx,%eax + andl $0x7fffffff,%eax + cmpl $0x7fffffff,%eax / look at high x + je .Lsecond_carry + addl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_carry: + movl $0x80000000,-8(%ebp) + addl $1,%ecx + movl %ecx,-4(%ebp) +.Lfinal: + fstp %st(0) + fldt -12(%ebp) + andl $0x00007fff,%ecx + jz .Lunderflow + cmpw $0x7fff,%cx + je .Loverflow + jmp .Lreturn +.Loverflow: + PIC_SETUP(1) + fldt PIC_L(.LFmaxl) + PIC_WRAPUP + fmulp %st,%st(0) / create overflow signal + jmp .Lreturn +.Lunderflow: + PIC_SETUP(2) + fldt PIC_L(.LFminl) + PIC_WRAPUP + fmulp %st,%st(0) / create underflow signal + jmp .Lreturn +.Lequal: + fstp %st(0) / C99 says to return y when x == y + jmp .Lreturn +.LNaN: + faddp %st,%st(1) / x+y,x +.Lreturn: + fwait + leave + ret + .align 4 + SET_SIZE(nextafterl) diff --git a/usr/src/libm/src/i386/common/nexttowardl.s b/usr/src/libm/src/i386/common/nexttowardl.s new file mode 100644 index 0000000..9b8999a --- /dev/null +++ b/usr/src/libm/src/i386/common/nexttowardl.s @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)nexttowardl.s 1.4 06/01/23 SMI" + + .file "nexttowardl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nexttowardl,function) +#include "libm_synonyms.h" + + .section .rodata + .align 4 +.LFmaxl: .long 0xffffffff,0xffffffff,0x00007ffe +.LFminl: .long 0x1,0x0,0x0 + + + ENTRY(nexttowardl) + pushl %ebp + movl %esp,%ebp + fldt 20(%ebp) / y + subl $12,%esp + fldt 8(%ebp) / load x + fucom / x : y + fstsw %ax + sahf + jp .LNaN + je .Lequal + fstp %st(1) / x + ja .Lbigger + / x < y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains Fminl + movl $0,-8(%ebp) + movl $0,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + ja .Laddulp + jb .Lsubulp +.Lbigger: + / x > y + ftst + movl $1,-12(%ebp) /// -12(%ebp) contains -Fminl + movl $0,-8(%ebp) + movl $0x00008000,%ecx /// final needs this + movl %ecx,-4(%ebp) + fnstsw %ax + sahf + je .Lfinal + jb .Laddulp +.Lsubulp: + movl 12(%ebp),%edx / high word of significand of x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal / zero value implies pseudonormal + addl $1,%ecx / if pseudonormal, turn into equivalent normal +.Lnot_pseudonormal: + movl 8(%ebp),%eax / low x + subl $1,%eax / low x - ulp + movl %eax,-12(%ebp) + cmpl $0xffffffff,%eax / this means low x was 0 + jz .Lborrow + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lborrow: + cmpl $0x80000000,%edx / look at high x + je .Lsecond_borrow + subl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_borrow: + movl %ecx,%eax + andl $0x7fff,%eax / look at exp x without sign bit + cmpl $1,%eax + jbe .Lsubnormal_result / exp > 1 ==> result will be normal + movl $0xffffffff,-8(%ebp) + subl $1,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsubnormal_result: + movl $0x7fffffff,-8(%ebp) + movl %ecx,%eax + andl $0x8000,%eax / look at sign bit + jz .Lpositive + movl $0x8000,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lpositive: + movl $0,%ecx + movl %ecx,-4(%ebp) + jmp .Lfinal +.Laddulp: + movl 12(%ebp),%edx / high x + movl 16(%ebp),%ecx / x's exponent + andl $0x0000ffff,%ecx + movl %edx,%eax + not %eax + andl $0x80000000,%eax / look at explicit leading bit + orl %ecx,%eax + andl $0x80007fff,%eax + jnz .Lnot_pseudonormal_2 / zero value implies pseudonormal + addl $1,%ecx +.Lnot_pseudonormal_2: + movl 8(%ebp),%eax / low x + addl $1,%eax / low x + ulp + movl %eax,-12(%ebp) + jz .Lcarry / jump if the content of %eax is 0 + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lcarry: + movl %edx,%eax + andl $0x7fffffff,%eax + cmpl $0x7fffffff,%eax / look at high x + je .Lsecond_carry + addl $1,%edx + movl %edx,-8(%ebp) + movl %ecx,-4(%ebp) + jmp .Lfinal +.Lsecond_carry: + movl $0x80000000,-8(%ebp) + addl $1,%ecx + movl %ecx,-4(%ebp) +.Lfinal: + fstp %st(0) + fldt -12(%ebp) + andl $0x00007fff,%ecx + jz .Lunderflow + cmpw $0x7fff,%cx + je .Loverflow + jmp .Lreturn +.Loverflow: + PIC_SETUP(1) + fldt PIC_L(.LFmaxl) + PIC_WRAPUP + fmulp %st,%st(0) / create overflow signal + jmp .Lreturn +.Lunderflow: + PIC_SETUP(2) + fldt PIC_L(.LFminl) + PIC_WRAPUP + fmulp %st,%st(0) / create underflow signal + jmp .Lreturn +.Lequal: + fstp %st(0) / C99 says to return y when x == y + jmp .Lreturn +.LNaN: + faddp %st,%st(1) / x+y,x +.Lreturn: + fwait + leave + ret + .align 4 + SET_SIZE(nexttowardl) diff --git a/usr/src/libm/src/i386/common/pow.s b/usr/src/libm/src/i386/common/pow.s new file mode 100644 index 0000000..96340b9 --- /dev/null +++ b/usr/src/libm/src/i386/common/pow.s @@ -0,0 +1,472 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)pow.s 1.14 06/01/23 SMI" + + .file "pow.s" + +/ Note: 0^NaN should not signal "invalid" but this implementation +/ does because y is placed on the NPX stack. + +/ Special cases: +/ +/ x ** 0 is 1 _SVID_libm_err if x is 0 or NaN +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ -0 ** +y (odd int) is -0 +/ +-0 ** -y (except 0, NaN) _SVID_libm_err +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) _SVID_libm_err +/ if x and y are finite and x**y = 0 _SVID_libm_err (underflow) +/ if x and y are finite and x**y = inf _SVID_libm_err (overflow) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(pow,function) +#include "libm_synonyms.h" +#include "libm_protos.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 4 +negzero: + .float -0.0 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .long 0x7f800001 +pinfinity: + .long 0x7f800000 +ninfinity: + .long 0xff800000 + + + ENTRY(pow) + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + + fldl 8(%ebp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + fldl 16(%ebp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 unless x is 0 or a NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + movb %dh,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 2f + / 0^0 + pushl $20 + jmp .SVIDerr / SVID error handler +2: + cmpb $0x01,%cl /// C3=0 C2=0 C1=? C0=1 when +-NaN + jne 2f + / NaN^0 + pushl $42 + jmp .SVIDerr +2: + / (not 0 or NaN)^0 + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomp %st(2) / y, x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fcoms PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + fcoms PIC_L(negone) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is x*x + fcoms PIC_L(two) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmul / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fld %st(1) / x , y , x , y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + fstp %st(0) / y , x , y , x + ja .merge / x > 0 + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0, y is non-integral + fstp %st(0) / x , y , x + fstp %st(0) / y , x + pushl $24 + jmp .SVIDerr / SVID error handler + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucom + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je 1f / t is integral + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + subl $8,%esp + fstpl (%esp) / round to double precision + fldl (%esp) / place result on NPX stack + addl $8,%esp + + fxam / determine class of x**y + fnstsw %ax / store status in %ax + andw $0x4500,%ax + / check for overflow + cmpw $0x0500,%ax / C0=0 C1=1 C2=? C3=1 then +-inf + jne 1f + / x^y overflows + fstp %st(0) / y , x + pushl $21 + jmp .SVIDerr +1: + / check for underflow + cmpw $0x4000,%ax / C0=1 C1=0 C2=? C3=0 then +-0 + jne 1f + / x^y underflows + fstp %st(0) / y , x + pushl $22 + jmp .SVIDerr +1: + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpinf / y > 0 + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpinf / |x| > 1 + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpzero / |x| > 1 + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = +0 & y < 0 + jmp .SVIDzerotoneg + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retnzero / y > 0 + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .SVIDzerotoneg + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .SVIDzerotoneg + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.SVIDzerotoneg: + pushl $23 +.SVIDerr: + / At this point the fp stack contains y , x and the number + / of the error case has been pushed on the memory stack. + subl $16,%esp + fstpl 8(%esp) / push y + fstpl (%esp) / push x; NPX stack empty + call PIC_F(_SVID_libm_err) / report result/error according to SVID + addl $20,%esp + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 20(%ebp),%eax + andl $0x7fffffff,%eax / |y| + cmpl $0x43400000,%eax + jae 1f / |y| >= 2^53, an even int + cmpl $0x3ff00000,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + sarl $20,%ecx + subl $0x433,%ecx + negl %ecx / 52 - unbiased exponent of y + movl 16(%ebp),%eax + bsfl %eax,%eax / index of least sig. 1 bit + jne 3f / jump if 1 bit found + movl 20(%ebp),%eax + bsfl %eax,%eax + addl $32,%eax / 32 + index of least sig. 1 bit +3: + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 4 + SET_SIZE(pow) diff --git a/usr/src/libm/src/i386/common/powf.s b/usr/src/libm/src/i386/common/powf.s new file mode 100644 index 0000000..1845e04 --- /dev/null +++ b/usr/src/libm/src/i386/common/powf.s @@ -0,0 +1,442 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)powf.s 1.16 06/01/23 SMI" + + .file "powf.s" + +/ Note: 0^SNaN should not signal "invalid" but this implementation +/ does because y is placed on the NPX stack. + +/ Special cases: +/ +/ x ** 0 is 1 +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ +0 ** -y (except 0, NaN) is +inf (z flag) +/ -0 ** -y (except 0, NaN, odd int) is +inf (z flag) +/ -0 ** y (odd int) is - (+0 ** x) +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) is NaN (i flag) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(powf,function) +#include "libm_synonyms.h" +#include "libm_protos.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 4 +negzero: + .float -0.0 +half: + .float 0.5 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .long 0x7f800001 +pinfinity: + .long 0x7f800000 +ninfinity: + .long 0xff800000 + + + ENTRY(powf) + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + + flds 8(%ebp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + flds 12(%ebp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomp %st(2) / y, x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fcoms PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + fcoms PIC_L(negone) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is square(x) + fcoms PIC_L(two) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmul / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / x ** 1/2 is sqrt(x) + fcoms PIC_L(half) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fsqrt / sqrt(x) , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fld %st(1) / x , y , x , y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + fstp %st(0) / y , x , y , x + ja .merge / x > 0 + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0 & y != int so x**y = NaN (i flag) + fstp %st(0) / x , y , x + fstp %st(0) / y , x + fstp %st(0) / y , x + fstp %st(0) / y , x + fldz + fdiv %st,%st(0) / 0/0 + ret + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucom + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je 1f / px = int + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + subl $4,%esp + fstps (%esp) / round to single precision + flds (%esp) / place result on NPX stack + addl $4,%esp + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpinf / y > 0 + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpinf / |x| > 1 + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpzero / |x| > 1 + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = +0 & y < 0 so x**y = +inf + jmp .retpinfzflag / ret +inf & z flag + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retnzero / y > 0 + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + je 2f + fdiv %st,%st(1) / y / x, x (raise z flag) +2: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .retpinfzflag / ret +inf & divide-by-0 flag + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.retpinfzflag: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdivrs PIC_L(one) / 1/0 + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 12(%ebp),%eax + andl $0x7fffffff,%eax / |y| + cmpl $0x4b800000,%eax + jae 1f / |y| >= 2^24, an even int + cmpl $0x3f800000,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + sarl $23,%ecx + subl $150,%ecx + negl %ecx / 23 - unbiased exponent of y + bsfl %eax,%eax / index of least sig. 1 bit + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 4 + SET_SIZE(powf) diff --git a/usr/src/libm/src/i386/common/powl.s b/usr/src/libm/src/i386/common/powl.s new file mode 100644 index 0000000..6af5fb2 --- /dev/null +++ b/usr/src/libm/src/i386/common/powl.s @@ -0,0 +1,439 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)powl.s 1.14 06/01/23 SMI" + + .file "powl.s" + +/ Special cases: +/ +/ x ** 0 is 1 +/ 1 ** y is 1 (C99) +/ x ** NaN is NaN +/ NaN ** y (except 0) is NaN +/ x ** 1 is x +/ +-(|x| > 1) ** +inf is +inf +/ +-(|x| > 1) ** -inf is +0 +/ +-(|x| < 1) ** +inf is +0 +/ +-(|x| < 1) ** -inf is +inf +/ (-1) ** +-inf is +1 (C99) +/ +0 ** +y (except 0, NaN) is +0 +/ -0 ** +y (except 0, NaN, odd int) is +0 +/ +0 ** -y (except 0, NaN) is +inf (z flag) +/ -0 ** -y (except 0, NaN, odd int) is +inf (z flag) +/ -0 ** y (odd int) is - (+0 ** x) +/ +inf ** +y (except 0, NaN) is +inf +/ +inf ** -y (except 0, NaN) is +0 +/ -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) +/ x ** -1 is 1/x +/ x ** 2 is x*x +/ -x ** y (an integer) is (-1)**(y) * (+x)**(y) +/ x ** y (x negative & y not integer) is NaN (i flag) + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(powl,function) +#include "libm_synonyms.h" +#include "xpg6.h" + +#undef fabs + + .data + .align 4 +negzero: + .float -0.0 +half: + .float 0.5 +one: + .float 1.0 +negone: + .float -1.0 +two: + .float 2.0 +Snan: + .long 0x7f800001 +pinfinity: + .long 0x7f800000 +ninfinity: + .long 0xff800000 + + + ENTRY(powl) + pushl %ebp + movl %esp,%ebp + PIC_SETUP(1) + + fldt 8(%ebp) / x + fxam / determine class of x + fnstsw %ax / store status in %ax + movb %ah,%dh / %dh <- condition code of x + + fldt 20(%ebp) / y , x + fxam / determine class of y + fnstsw %ax / store status in %ax + movb %ah,%dl / %dl <- condition code of y + + call .pow_main /// LOCAL + PIC_WRAPUP + leave + ret + +.pow_main: + / x ** 0 is 1 + movb %dl,%cl + andb $0x45,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 + jne 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: / y is not zero + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + + / C99: 1 ** anything is 1 + fld1 / 1, y, x + fucomp %st(2) / y, x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jp 1f / so that pow(NaN1,NaN2) returns NaN2 + jne 1f + fstp %st(0) / x + ret + +1: + / x ** NaN is NaN + movb %dl,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(1) / y + ret + +1: / y is not NaN + / NaN ** y (except 0) is NaN + movb %dh,%cl + andb $0x45,%cl + cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN + jne 1f + fstp %st(0) / x + ret + +1: / x is not NaN + / x ** 1 is x + fcoms PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fstp %st(0) / x + ret + +1: / y is not 1 + / +-(|x| > 1) ** +inf is +inf + / +-(|x| > 1) ** -inf is +0 + / +-(|x| < 1) ** +inf is +0 + / +-(|x| < 1) ** -inf is +inf + / +-(|x| = 1) ** +-inf is NaN + movb %dl,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .yispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .yisninf + + / +0 ** +y (except 0, NaN) is +0 + / -0 ** +y (except 0, NaN, odd int) is +0 + / +0 ** -y (except 0, NaN) is +inf (z flag) + / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) + / -0 ** y (odd int) is - (+0 ** x) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 + je .xispzero + cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 + je .xisnzero + + / +inf ** +y (except 0, NaN) is +inf + / +inf ** -y (except 0, NaN) is +0 + / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) + movb %dh,%cl + andb $0x47,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf + je .xispinf + cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf + je .xisninf + + / x ** -1 is 1/x + fcoms PIC_L(negone) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fdivrs PIC_L(one) / 1/x , y , x + jmp .signok / check for over/underflow + +1: / y is not -1 + / x ** 2 is x*x + fcoms PIC_L(two) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fld %st(0) / x , x , y , x + fmul / x^2 , y , x + jmp .signok / check for over/underflow + +1: / y is not 2 + / x ** 1/2 is sqrt(x) + fcoms PIC_L(half) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + jne 1f + fld %st(1) / x , y , x + fsqrt / sqrt(x) , y , x + jmp .signok / check for over/underflow + +1: / y is not 1/2 + / make copies of x & y + fld %st(1) / x , y , x + fld %st(1) / y , x , y , x + + / -x ** y (an integer) is (-1)**(y) * (+x)**(y) + / x ** y (x negative & y not integer) is NaN + movl $0,%ecx / track whether to flip sign of result + fld %st(1) / x , y , x , y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + fstp %st(0) / y , x , y , x + ja .merge / x > 0 + / x < 0 + call .y_is_int + cmpl $0,%ecx + jne 1f + / x < 0 & y != int so x**y = NaN (i flag) + fstp %st(0) / x , y , x + fstp %st(0) / y , x + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdiv %st,%st(0) / 0/0 + ret + +1: / x < 0 & y = int + fxch / x , y , y , x + fchs / px = -x , y , y , x + fxch / y , px , y , x +.merge: + / px > 0 + fxch / px , y , y , x + + / x**y = exp(y*ln(x)) + fyl2x / t=y*log2(px) , y , x + fld %st(0) / t , t , y , x + frndint / [t] , t , y , x + fxch / t , [t] , y , x + fucom + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je 1f / t is integral + fsub %st(1),%st / t-[t] , [t] , y , x + f2xm1 / 2**(t-[t])-1 , [t] , y , x + fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x + fscale / 2**t = px**y , [t] , y , x + jmp 2f +1: + fstp %st(0) / t=[t] , y , x + fld1 / 1 , t , y , x + fscale / 1*2**t = x**y , t , y , x +2: + fstp %st(1) / x**y , y , x + cmpl $1,%ecx + jne .signok + fchs / change sign since x<0 & y=-int +.signok: + fstp %st(2) / y , x**y + fstp %st(0) / x**y + ret + +/ ------------------------------------------------------------------------ + +.xispinf: + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpinf / y > 0 + jmp .retpzero / y < 0 + +.xisninf: + / -inf ** +-y is -0 ** -+y + fchs / -y , x + flds PIC_L(negzero) / -0 , -y , x + fstp %st(2) / -y , -0 + jmp .xisnzero + +.yispinf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpinf / |x| > 1 + jmp .retpzero / |x| < 1 + +.yisninf: + fld %st(1) / x , y , x + fabs / |x| , y , x + fcomps PIC_L(one) / y , x + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + je .retponeorinvalid / x == -1 C99 + ja .retpzero / |x| > 1 + jmp .retpinf / |x| < 1 + +.xispzero: + / y cannot be 0 or NaN ; stack has y , x + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = +0 & y < 0 so x**y = +inf + jmp .retpinfzflag / ret +inf & z flag + +.xisnzero: + / y cannot be 0 or NaN ; stack has y , x + call .y_is_int + cmpl $1,%ecx + jne 1f / y is not an odd integer + / y is an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retnzero / y > 0 + / x = -0 & y < 0 (odd int) return -inf (z flag) + / x = -inf & y != 0 or NaN return -inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + je 2f + fdiv %st,%st(1) / y / x, x (raise z flag) +2: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(ninfinity) / -inf + ret + +1: / y is not an odd integer + ftst / compare %st(0) with 0 + fnstsw %ax / store status in %ax + sahf / 80387 flags in %ax to 80386 flags + ja .retpzero / y > 0 + / x = -0 & y < 0 (not odd int) return +inf (z flag) + / x = -inf & y not 0 or NaN return +inf (NO z flag) + movb %dh,%cl + andb $0x45,%cl + cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf + jne .retpinfzflag / ret +inf & divide-by-0 flag + jmp .retpinf / return +inf (NO z flag) + +.retpzero: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz / +0 + ret + +.retnzero: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(negzero) / -0 + ret + +.retponeorinvalid: + PIC_G_LOAD(movzwl,__xpg6,eax) + andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax + cmpl $0,%eax + je 1f + fstp %st(0) / x + fstp %st(0) / stack empty + fld1 / 1 + ret + +1: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(Snan) / Q NaN (i flag) + fwait + ret + +.retpinf: + fstp %st(0) / x + fstp %st(0) / stack empty + flds PIC_L(pinfinity) / +inf + ret + +.retpinfzflag: + fstp %st(0) / x + fstp %st(0) / stack empty + fldz + fdivrs PIC_L(one) / 1/0 + ret + +/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, +/ 0 otherwise. Assume y is not zero. Do not raise inexact or modify +/ %edx. +.y_is_int: + movl 28(%ebp),%eax + andl $0x7fff,%eax / exponent of y + cmpl $0x403f,%eax + jae 1f / |y| >= 2^64, an even int + cmpl $0x3fff,%eax + jb 2f / |y| < 1, can't be an int + movl %eax,%ecx + subl $0x403e,%ecx + negl %ecx / 63 - unbiased exponent of y + movl 20(%ebp),%eax + bsfl %eax,%eax / index of least sig. 1 bit + jne 3f / jump if 1 bit found + movl 24(%ebp),%eax + bsfl %eax,%eax + addl $32,%eax / 32 + index of least sig. 1 bit +3: + cmpl %ecx,%eax + jb 2f + ja 1f + movl $1,%ecx + ret +1: + movl $2,%ecx + ret +2: + xorl %ecx,%ecx + ret + .align 4 + SET_SIZE(powl) diff --git a/usr/src/libm/src/i386/common/remainder.s b/usr/src/libm/src/i386/common/remainder.s new file mode 100644 index 0000000..cd6d350 --- /dev/null +++ b/usr/src/libm/src/i386/common/remainder.s @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remainder.s 1.14 06/01/23 SMI" + + .file "remainder.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainder,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remainder) + pushl %ebp + movl %esp,%ebp + fldl 16(%esp) / load arg y + fldl 8(%esp) / load arg x + fucom + fnstsw %ax + sahf + jp .rem_loop / if x or y is NaN, use fprem1 + + movl 20(%esp),%eax / eax <-- hi_32(y) + andl $0x7fffffff,%eax / eax <-- hi_32(|y|) + orl 16(%esp),%eax / eax <-- lo_32(y)|hi_32(|y|) + je .yzero_or_xinf + + movl 12(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + cmpl $0x7ff00000,%eax + jne .rem_loop + cmpl $0,8(%esp) + je .yzero_or_xinf + +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check for incomplete reduction + jne .rem_loop / while incomplete, do fprem1 again + fstp %st(1) + leave + ret + +.yzero_or_xinf: + PIC_SETUP(1) + fstp %st(0) / x + fstp %st(0) / empty NPX stack + pushl $28 / case 28 in _SVID_libm_err + pushl 20(%ebp) / pass y + pushl 16(%ebp) + pushl 12(%ebp) / pass x + pushl 8(%ebp) + call PIC_F(_SVID_libm_err) + addl $20,%esp + PIC_WRAPUP + leave + ret + .align 4 + SET_SIZE(remainder) diff --git a/usr/src/libm/src/i386/common/remainderf.s b/usr/src/libm/src/i386/common/remainderf.s new file mode 100644 index 0000000..e255196 --- /dev/null +++ b/usr/src/libm/src/i386/common/remainderf.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remainderf.s 1.7 06/01/23 SMI" + + .file "remainderf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderf,function) +#include "libm_synonyms.h" + + ENTRY(remainderf) + flds 8(%esp) / load arg y + flds 4(%esp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstp %st(1) + ret + .align 4 + SET_SIZE(remainderf) diff --git a/usr/src/libm/src/i386/common/remainderl.s b/usr/src/libm/src/i386/common/remainderl.s new file mode 100644 index 0000000..a56cf34 --- /dev/null +++ b/usr/src/libm/src/i386/common/remainderl.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remainderl.s 1.7 06/01/23 SMI" + + .file "remainderl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remainderl,function) +#include "libm_synonyms.h" + + ENTRY(remainderl) + fldt 16(%esp) / load arg y + fldt 4(%esp) / load arg x +.rem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .rem_loop / while reduction incomplete, do fprem1 + fstp %st(1) + ret + .align 4 + SET_SIZE(remainderl) diff --git a/usr/src/libm/src/i386/common/remquo.s b/usr/src/libm/src/i386/common/remquo.s new file mode 100644 index 0000000..8686151 --- /dev/null +++ b/usr/src/libm/src/i386/common/remquo.s @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remquo.s 1.6 06/01/23 SMI" + + .file "remquo.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquo,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remquo) + fldl 12(%esp) / load arg y + fldl 4(%esp) / load arg x +.Lrem_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lrem_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 8(%esp),%edx / sign and bexp of x + movl 16(%esp),%ecx / sign and bexp of y + andl $0x80000000,%edx / edx <- sign(x) + andl $0x80000000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je .pos + negl %eax / negative n +.pos: + movl 20(%esp),%ecx + movl %eax,0(%ecx) / last 3 significant bits of quotient + ret + .align 4 + SET_SIZE(remquo) diff --git a/usr/src/libm/src/i386/common/remquof.s b/usr/src/libm/src/i386/common/remquof.s new file mode 100644 index 0000000..d750571 --- /dev/null +++ b/usr/src/libm/src/i386/common/remquof.s @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remquof.s 1.6 06/01/23 SMI" + + .file "remquof.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquof,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remquof) + flds 8(%esp) / load arg y + flds 4(%esp) / load arg x +.Lremf_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lremf_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 4(%esp),%edx / sign and bexp of x + movl 8(%esp),%ecx / sign and bexp of y + andl $0x80000000,%edx / edx <- sign(x) + andl $0x80000000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je .pos + negl %eax / negative n +.pos: + movl 12(%esp),%ecx + movl %eax,0(%ecx) / last 3 significant bits of quotient + ret + .align 4 + SET_SIZE(remquof) diff --git a/usr/src/libm/src/i386/common/remquol.s b/usr/src/libm/src/i386/common/remquol.s new file mode 100644 index 0000000..d12b30d --- /dev/null +++ b/usr/src/libm/src/i386/common/remquol.s @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)remquol.s 1.6 06/01/23 SMI" + + .file "remquol.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(remquol,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(remquol) + fldt 16(%esp) / load arg y + fldt 4(%esp) / load arg x +.Lreml_loop: + fprem1 / partial remainder + fstsw %ax / store status word + andw $0x400,%ax / check whether reduction complete + jne .Lreml_loop / while reduction incomplete, do fprem1 + fstsw %ax + fwait + fstp %st(1) + movw %ax,%dx + andw $0x4000,%dx / get C3 + sarw $13,%dx + movw %ax,%cx + andw $0x100,%cx / get C0 + sarw $6,%cx + addw %cx,%dx + andw $0x200,%ax / get C1 + sarw $9,%ax + addw %dx,%ax + cwtl + movl 12(%esp),%edx / sign and bexp of x + movl 24(%esp),%ecx / sign and bexp of y + andl $0x00008000,%edx / edx <- sign(x) + andl $0x00008000,%ecx / ecx <- sign(y) + cmpl %edx,%ecx + je .pos + negl %eax / negative n +.pos: + movl 28(%esp),%ecx + movl %eax,0(%ecx) / last 3 significant bits of quotient + ret + .align 4 + SET_SIZE(remquol) diff --git a/usr/src/libm/src/i386/common/rint.s b/usr/src/libm/src/i386/common/rint.s new file mode 100644 index 0000000..41c0497 --- /dev/null +++ b/usr/src/libm/src/i386/common/rint.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)rint.s 1.7 06/01/23 SMI" + + .file "rint.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rint,function) +#include "libm_synonyms.h" + + ENTRY(rint) + fldl 4(%esp) / load x + movl 8(%esp),%eax / eax <-- hi_32(x) + andl $0x7fffffff,%eax / eax <-- hi_32(|x|) + cmpl $0x43300000,%eax / is |x| >= 2**52? + jae .done / if so, branch (already integral) + frndint / [x], per rounding mode +.done: + fwait + ret + .align 4 + SET_SIZE(rint) diff --git a/usr/src/libm/src/i386/common/rintf.s b/usr/src/libm/src/i386/common/rintf.s new file mode 100644 index 0000000..10c2b1f --- /dev/null +++ b/usr/src/libm/src/i386/common/rintf.s @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)rintf.s 1.7 06/01/23 SMI" + + .file "rintf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rintf,function) +#include "libm_synonyms.h" + + ENTRY(rintf) + flds 4(%esp) / load x + movl 4(%esp),%eax / eax <-- x + andl $0x7fffffff,%eax / eax <-- |x| + cmpl $0x4b000000,%eax / is |x| >= 2**23? + jae .done / if so, branch (already integral) + frndint / [x], per rounding mode +.done: + fwait + ret + .align 4 + SET_SIZE(rintf) diff --git a/usr/src/libm/src/i386/common/rintl.s b/usr/src/libm/src/i386/common/rintl.s new file mode 100644 index 0000000..91014f9 --- /dev/null +++ b/usr/src/libm/src/i386/common/rintl.s @@ -0,0 +1,40 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)rintl.s 1.9 06/01/23 SMI" + + .file "rintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(rintl,function) +#include "libm_synonyms.h" + + ENTRY(rintl) + fldt 4(%esp) / load x + frndint / [x], per rounding mode + fwait + ret + .align 4 + SET_SIZE(rintl) diff --git a/usr/src/libm/src/i386/common/rndintl.s b/usr/src/libm/src/i386/common/rndintl.s new file mode 100644 index 0000000..f1441e6 --- /dev/null +++ b/usr/src/libm/src/i386/common/rndintl.s @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)rndintl.s 1.12 06/01/23 SMI" + + .file "rndintl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(aintl,function) +LIBM_ANSI_PRAGMA_WEAK(irintl,function) +LIBM_ANSI_PRAGMA_WEAK(anintl,function) +LIBM_ANSI_PRAGMA_WEAK(nintl,function) +#include "libm_synonyms.h" +#undef fabs + + ENTRY(aintl) + movl %esp,%eax + subl $8,%esp + fstcw -8(%eax) + fldt 4(%eax) + movw -8(%eax),%cx + orw $0x0c00,%cx + movw %cx,-4(%eax) + fldcw -4(%eax) / set RD = to_zero + frndint + fstcw -4(%eax) + movw -4(%eax),%dx + andw $0xf3ff,%dx + movw -8(%eax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%eax) + fldcw -8(%eax) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(aintl) + + ENTRY(irintl) + movl %esp,%ecx + subl $8,%esp + fldt 4(%ecx) / load x + fistpl -8(%ecx) / [x] + fwait + movl -8(%ecx),%eax + addl $8,%esp + ret + .align 4 + SET_SIZE(irintl) + + .data + .align 4 +half: .float 0.5 + + ENTRY(anintl) +.Lanintl: + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldt 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp .L0 + je .L0 + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(half) + PIC_WRAPUP + fnstsw %ax + sahf + jae .halfway / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +.L0: + addl $8,%esp + fstp %st(0) + ret +.halfway: + / x = n+0.5, recompute anint(x) as x+sign(x)*0.5 + fldt 4(%ecx) / x, 0.5, [x] + movw 12(%ecx),%ax / sign+exp part of x + andw $0x8000,%ax / look at sign bit + jnz .x_neg + fadd + addl $8,%esp + fstp %st(1) + ret +.x_neg: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addl $8,%esp + fstp %st(1) + ret + .align 4 + SET_SIZE(anintl) + + ENTRY(nintl) + pushl %ebp + movl %esp,%ebp + subl $8,%esp + pushl 16(%ebp) + pushl 12(%ebp) + pushl 8(%ebp) + call .Lanintl /// LOCAL + fistpl -8(%ebp) + fwait + movl -8(%ebp),%eax + leave + ret + .align 4 + SET_SIZE(nintl) diff --git a/usr/src/libm/src/i386/common/round.s b/usr/src/libm/src/i386/common/round.s new file mode 100644 index 0000000..491ee02 --- /dev/null +++ b/usr/src/libm/src/i386/common/round.s @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)round.s 1.3 06/01/23 SMI" + + .file "round.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(round,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(round) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldl 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + addl $8,%esp + fstp %st(0) + ret +2: + / x = n+0.5, recompute round(x) as x+sign(x)*0.5 + fldl 4(%ecx) / x, 0.5, [x] + movl 8(%ecx),%eax / high part of x + andl $0x80000000,%eax + jnz 3f + fadd + addl $8,%esp + fstp %st(1) + ret +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addl $8,%esp + fstp %st(1) + ret + .align 4 + SET_SIZE(round) diff --git a/usr/src/libm/src/i386/common/roundl.s b/usr/src/libm/src/i386/common/roundl.s new file mode 100644 index 0000000..cbd9534 --- /dev/null +++ b/usr/src/libm/src/i386/common/roundl.s @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)roundl.s 1.4 06/01/23 SMI" + + .file "roundl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(roundl,function) +#include "libm_synonyms.h" +#undef fabs + + .section .rodata + .align 4 +.Lhalf: .float 0.5 + + ENTRY(roundl) + movl %esp,%ecx + subl $8,%esp + fstcw -8(%ecx) + fldt 4(%ecx) + movw -8(%ecx),%dx + andw $0xf3ff,%dx + movw %dx,-4(%ecx) + fldcw -4(%ecx) / set RD = to_nearest + fld %st(0) + frndint / [x],x + fstcw -4(%ecx) + movw -4(%ecx),%dx + andw $0xf3ff,%dx + movw -8(%ecx),%ax + andw $0x0c00,%ax + orw %dx,%ax + movw %ax,-8(%ecx) + fldcw -8(%ecx) / restore RD + fucom / check if x is already an integer + fstsw %ax + sahf + jp 0f + je 0f + fxch / x,[x] + fsub %st(1),%st / x-[x],[x] + fabs / |x-[x]|,[x] + PIC_SETUP(1) + fcoms PIC_L(.Lhalf) + PIC_WRAPUP + fnstsw %ax + sahf + jae 2f / if |x-[x]| = 0.5 goto halfway, + / most cases will not take branch. +0: + addl $8,%esp + fstp %st(0) + ret +2: + / x = n+0.5, recompute roundl(x) as x+sign(x)*0.5 + fldt 4(%ecx) / x, 0.5, [x] + movw 12(%ecx),%ax / sign+exp of x + andw $0x8000,%ax / look at sign bit + jnz 3f + fadd + addl $8,%esp + fstp %st(1) + ret +3: + / here, x is negative, so return x-0.5 + fsubp %st,%st(1) / x-0.5,[x] + addl $8,%esp + fstp %st(1) + ret + .align 4 + SET_SIZE(roundl) diff --git a/usr/src/libm/src/i386/common/scalbln.s b/usr/src/libm/src/i386/common/scalbln.s new file mode 100644 index 0000000..894bf7c --- /dev/null +++ b/usr/src/libm/src/i386/common/scalbln.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalbln.s 1.3 06/01/23 SMI" + + .file "scalbln.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbln,function) +#include "libm_synonyms.h" + + ENTRY(scalbln) + fildl 12(%esp) / convert N to extended + fldl 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbln) diff --git a/usr/src/libm/src/i386/common/scalblnf.s b/usr/src/libm/src/i386/common/scalblnf.s new file mode 100644 index 0000000..c59aa61 --- /dev/null +++ b/usr/src/libm/src/i386/common/scalblnf.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalblnf.s 1.3 06/01/23 SMI" + + .file "scalblnf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalblnf,function) +#include "libm_synonyms.h" + + ENTRY(scalblnf) + fildl 8(%esp) / convert N to extended + flds 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalblnf) diff --git a/usr/src/libm/src/i386/common/scalblnl.s b/usr/src/libm/src/i386/common/scalblnl.s new file mode 100644 index 0000000..c5f667d --- /dev/null +++ b/usr/src/libm/src/i386/common/scalblnl.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalblnl.s 1.3 06/01/23 SMI" + + .file "scalblnl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalblnl,function) +#include "libm_synonyms.h" + + ENTRY(scalblnl) + fildl 16(%esp) / convert 32-bit integer N + / to extended-double + fldt 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalblnl) diff --git a/usr/src/libm/src/i386/common/scalbn.s b/usr/src/libm/src/i386/common/scalbn.s new file mode 100644 index 0000000..5a71a42 --- /dev/null +++ b/usr/src/libm/src/i386/common/scalbn.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalbn.s 1.6 06/01/23 SMI" + + .file "scalbn.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbn,function) +#include "libm_synonyms.h" + + ENTRY(scalbn) + fildl 12(%esp) / convert N to extended + fldl 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbn) diff --git a/usr/src/libm/src/i386/common/scalbnf.s b/usr/src/libm/src/i386/common/scalbnf.s new file mode 100644 index 0000000..6f4753b --- /dev/null +++ b/usr/src/libm/src/i386/common/scalbnf.s @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalbnf.s 1.3 06/01/23 SMI" + + .file "scalbnf.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbnf,function) +#include "libm_synonyms.h" + + ENTRY(scalbnf) + fildl 8(%esp) / convert N to extended + flds 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbnf) diff --git a/usr/src/libm/src/i386/common/scalbnl.s b/usr/src/libm/src/i386/common/scalbnl.s new file mode 100644 index 0000000..a9d1451 --- /dev/null +++ b/usr/src/libm/src/i386/common/scalbnl.s @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)scalbnl.s 1.3 06/01/23 SMI" + + .file "scalbnl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(scalbnl,function) +#include "libm_synonyms.h" + + ENTRY(scalbnl) + fildl 16(%esp) / convert 32-bit integer N + / to extended-double + fldt 4(%esp) / push x + fscale + fstp %st(1) + ret + .align 4 + SET_SIZE(scalbnl) diff --git a/usr/src/libm/src/i386/common/sin.s b/usr/src/libm/src/i386/common/sin.s new file mode 100644 index 0000000..999d595 --- /dev/null +++ b/usr/src/libm/src/i386/common/sin.s @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)sin.s 1.10 06/01/23 SMI" + + .file "sin.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sin,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(sin) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + cmpl $1,%eax + jl .sin0 + je .sin1 + cmpl $2,%eax + je .sin2 + fcos + fchs + ret +.sin2: + fsin + fchs + ret +.sin1: + fcos + ret +.sin0: + fsin + ret + .align 4 + SET_SIZE(sin) diff --git a/usr/src/libm/src/i386/common/sincos.s b/usr/src/libm/src/i386/common/sincos.s new file mode 100644 index 0000000..52a179c --- /dev/null +++ b/usr/src/libm/src/i386/common/sincos.s @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)sincos.s 1.10 06/01/23 SMI" + + .file "sincos.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sincos,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(sincos) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + fsincos + cmpl $1,%eax + jl .sincos0 + je .sincos1 + cmpl $2,%eax + je .sincos2 + / n=3 + fchs + movl 12(%esp),%eax + fstpl 0(%eax) + movl 16(%esp),%eax + fstpl 0(%eax) + fwait + ret +.sincos2: + / n=2 + fchs + movl 16(%esp),%eax + fstpl 0(%eax) + fchs + movl 12(%esp),%eax + fstpl 0(%eax) + fwait + ret +.sincos1: + / n=1 + movl 12(%esp),%eax + fstpl 0(%eax) + fchs + movl 16(%esp),%eax + fstpl 0(%eax) + fwait + ret +.sincos0: + / n=0 + movl 16(%esp),%eax + fstpl 0(%eax) + movl 12(%esp),%eax + fstpl 0(%eax) + fwait + ret + .align 4 + SET_SIZE(sincos) diff --git a/usr/src/libm/src/i386/common/sqrtl.s b/usr/src/libm/src/i386/common/sqrtl.s new file mode 100644 index 0000000..0412f67 --- /dev/null +++ b/usr/src/libm/src/i386/common/sqrtl.s @@ -0,0 +1,39 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)sqrtl.s 1.9 06/01/23 SMI" + + .file "sqrtl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(sqrtl,function) +#include "libm_synonyms.h" + + ENTRY(sqrtl) + fldt 4(%esp) + fsqrt + ret + .align 4 + SET_SIZE(sqrtl) diff --git a/usr/src/libm/src/i386/common/tan.s b/usr/src/libm/src/i386/common/tan.s new file mode 100644 index 0000000..38ba163 --- /dev/null +++ b/usr/src/libm/src/i386/common/tan.s @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)tan.s 1.11 06/01/23 SMI" + + .file "tan.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(tan,function) +#include "libm_synonyms.h" +#include "libm_protos.h" + + ENTRY(tan) + PIC_SETUP(1) + call PIC_F(__reduction) + PIC_WRAPUP + andl $1,%eax + cmpl $0,%eax + je .tan1 + fptan + fdivp %st,%st(1) + fchs + ret +.tan1: + fptan + fstp %st(0) + ret + .align 4 + SET_SIZE(tan) diff --git a/usr/src/libm/src/i386/common/trunc.s b/usr/src/libm/src/i386/common/trunc.s new file mode 100644 index 0000000..8c947a7 --- /dev/null +++ b/usr/src/libm/src/i386/common/trunc.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)trunc.s 1.3 06/01/23 SMI" + + .file "trunc.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(trunc,function) +#include "libm_synonyms.h" + + ENTRY(trunc) + movl %esp,%eax + subl $8,%esp + fstcw -8(%eax) + fldl 4(%eax) + movw -8(%eax),%cx + orw $0x0c00,%cx + movw %cx,-4(%eax) + fldcw -4(%eax) / set RD = to_zero + frndint + fstcw -4(%eax) + movw -4(%eax),%dx + andw $0xf3ff,%dx + movw -8(%eax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%eax) + fldcw -8(%eax) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(trunc) diff --git a/usr/src/libm/src/i386/common/truncl.s b/usr/src/libm/src/i386/common/truncl.s new file mode 100644 index 0000000..e7f75b3 --- /dev/null +++ b/usr/src/libm/src/i386/common/truncl.s @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)truncl.s 1.3 06/01/23 SMI" + + .file "truncl.s" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(truncl,function) +#include "libm_synonyms.h" + + ENTRY(truncl) + movl %esp,%eax + subl $8,%esp + fstcw -8(%eax) + fldt 4(%eax) + movw -8(%eax),%cx + orw $0x0c00,%cx + movw %cx,-4(%eax) + fldcw -4(%eax) / set RD = to_zero + frndint + fstcw -4(%eax) + movw -4(%eax),%dx + andw $0xf3ff,%dx + movw -8(%eax),%cx + andw $0x0c00,%cx + orw %dx,%cx + movw %cx,-8(%eax) + fldcw -8(%eax) / restore RD + addl $8,%esp + ret + .align 4 + SET_SIZE(truncl) diff --git a/usr/src/libm/src/m9x/__fenv_amd64.il b/usr/src/libm/src/m9x/__fenv_amd64.il new file mode 100644 index 0000000..bae2414 --- /dev/null +++ b/usr/src/libm/src/m9x/__fenv_amd64.il @@ -0,0 +1,349 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ +/ @(#)__fenv_amd64.il 1.5 06/01/31 SMI +/ + .inline __fenv_getcwsw,1 + fstsw (%rdi) + fstcw 2(%rdi) + .end + + .inline __fenv_setcwsw,1 + movw (%rdi),%dx + movw 2(%rdi),%cx + subq $32,%rsp + fstenv (%rsp) + movw %cx,(%rsp) + movw %dx,4(%rsp) + fldenv (%rsp) + fwait + addq $32,%rsp + .end + + .inline __fenv_getmxcsr,1 + stmxcsr (%rdi) + .end + + .inline __fenv_setmxcsr,1 + ldmxcsr (%rdi) + .end + + .inline f2xm1,1 + fldt (%rsp) + f2xm1 + .end + + .inline fyl2x,2 + fldt (%rsp) + fldt 16(%rsp) + fyl2x + .end + + .inline fptan,1 + fldt (%rsp) + fptan + fstpt (%rsp) + .end + + .inline fpatan,2 + fldt (%rsp) + fldt 16(%rsp) + fpatan + .end + + .inline fxtract,1 + fldt (%rsp) + fxtract + .end + + .inline fprem1,2 + fldt (%rsp) + fldt 16(%rsp) + fprem1 + fstp %st(1) + .end + + .inline fprem,2 + fldt (%rsp) + fldt 16(%rsp) + fprem + fstp %st(1) + .end + + .inline fyl2xp1,2 + fldt (%rsp) + fldt 16(%rsp) + fyl2xp1 + .end + + .inline fsqrt,1 + fldt (%rsp) + fsqrt + .end + + .inline fsincos,1 + fldt (%rsp) + fsincos + .end + + .inline frndint,1 + fldt (%rsp) + frndint + .end + + .inline fscale,2 + fldt (%rsp) + fldt 16(%rsp) + fscale + fstp %st(1) + .end + + .inline fsin,1 + fldt (%rsp) + fsin + .end + + .inline fcos,1 + fldt (%rsp) + fcos + .end + + .inline sse_cmpeqss,3 + movss (%rdi),%xmm0 + cmpeqss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpltss,3 + movss (%rdi),%xmm0 + cmpltss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpless,3 + movss (%rdi),%xmm0 + cmpless (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpunordss,3 + movss (%rdi),%xmm0 + cmpunordss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_minss,3 + movss (%rdi),%xmm0 + minss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_maxss,3 + movss (%rdi),%xmm0 + maxss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_addss,3 + movss (%rdi),%xmm0 + addss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_subss,3 + movss (%rdi),%xmm0 + subss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_mulss,3 + movss (%rdi),%xmm0 + mulss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_divss,3 + movss (%rdi),%xmm0 + divss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_sqrtss,2 + sqrtss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_ucomiss,2 + movss (%rdi),%xmm0 + ucomiss (%rsi),%xmm0 + .end + + .inline sse_comiss,2 + movss (%rdi),%xmm0 + comiss (%rsi),%xmm0 + .end + + .inline sse_cvtss2sd,2 + cvtss2sd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvtsi2ss,2 + cvtsi2ss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvttss2si,2 + cvttss2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtss2si,2 + cvtss2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtsi2ssq,2 + cvtsi2ssq (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvttss2siq,2 + cvttss2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cvtss2siq,2 + cvtss2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cmpeqsd,3 + movsd (%rdi),%xmm0 + cmpeqsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmpltsd,3 + movsd (%rdi),%xmm0 + cmpltsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmplesd,3 + movsd (%rdi),%xmm0 + cmplesd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmpunordsd,3 + movsd (%rdi),%xmm0 + cmpunordsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_minsd,3 + movsd (%rdi),%xmm0 + minsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_maxsd,3 + movsd (%rdi),%xmm0 + maxsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_addsd,3 + movsd (%rdi),%xmm0 + addsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_subsd,3 + movsd (%rdi),%xmm0 + subsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_mulsd,3 + movsd (%rdi),%xmm0 + mulsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_divsd,3 + movsd (%rdi),%xmm0 + divsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_sqrtsd,2 + sqrtsd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_ucomisd,2 + movsd (%rdi),%xmm0 + ucomisd (%rsi),%xmm0 + .end + + .inline sse_comisd,2 + movsd (%rdi),%xmm0 + comisd (%rsi),%xmm0 + .end + + .inline sse_cvtsd2ss,2 + cvtsd2ss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvtsi2sd,2 + cvtsi2sd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvttsd2si,2 + cvttsd2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtsd2si,2 + cvtsd2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtsi2sdq,2 + cvtsi2sdq (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvttsd2siq,2 + cvttsd2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cvtsd2siq,2 + cvtsd2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end diff --git a/usr/src/libm/src/m9x/__fenv_i386.il b/usr/src/libm/src/m9x/__fenv_i386.il new file mode 100644 index 0000000..253d4db --- /dev/null +++ b/usr/src/libm/src/m9x/__fenv_i386.il @@ -0,0 +1,411 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ +/ @(#)__fenv_i386.il 1.8 06/01/31 SMI +/ + .inline __fenv_getcwsw,1 + movl (%esp),%eax + fstsw (%eax) + fstcw 2(%eax) + .end + + .inline __fenv_setcwsw,1 + movl (%esp),%eax + movw (%eax),%dx + movw 2(%eax),%cx + subl $28,%esp + fstenv (%esp) + movw %cx,(%esp) + movw %dx,4(%esp) + fldenv (%esp) + fwait + addl $28,%esp + .end + + .inline __fenv_getmxcsr,1 + movl (%esp),%eax + stmxcsr (%eax) + .end + + .inline __fenv_setmxcsr,1 + movl (%esp),%eax + ldmxcsr (%eax) + .end + + .inline f2xm1,1 + fldt (%esp) + f2xm1 + .end + + .inline fyl2x,2 + fldt (%esp) + fldt 12(%esp) + fyl2x + .end + + .inline fptan,1 + fldt (%esp) + fptan + fstpt (%esp) + .end + + .inline fpatan,2 + fldt (%esp) + fldt 12(%esp) + fpatan + .end + + .inline fxtract,1 + fldt (%esp) + fxtract + .end + + .inline fprem1,2 + fldt (%esp) + fldt 12(%esp) + fprem1 + fstp %st(1) + .end + + .inline fprem,2 + fldt (%esp) + fldt 12(%esp) + fprem + fstp %st(1) + .end + + .inline fyl2xp1,2 + fldt (%esp) + fldt 12(%esp) + fyl2xp1 + .end + + .inline fsqrt,1 + fldt (%esp) + fsqrt + .end + + .inline fsincos,1 + fldt (%esp) + fsincos + .end + + .inline frndint,1 + fldt (%esp) + frndint + .end + + .inline fscale,2 + fldt (%esp) + fldt 12(%esp) + fscale + fstp %st(1) + .end + + .inline fsin,1 + fldt (%esp) + fsin + .end + + .inline fcos,1 + fldt (%esp) + fcos + .end + + .inline sse_cmpeqss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpeqss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpltss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpltss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpless,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpless (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpunordss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpunordss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_minss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + minss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_maxss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + maxss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_addss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + addss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_subss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + subss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_mulss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + mulss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_divss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + divss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_sqrtss,2 + movl (%esp),%eax + movl 4(%esp),%edx + sqrtss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_ucomiss,2 + movl (%esp),%eax + movl 4(%esp),%edx + movss (%eax),%xmm0 + ucomiss (%edx),%xmm0 + .end + + .inline sse_comiss,2 + movl (%esp),%eax + movl 4(%esp),%edx + movss (%eax),%xmm0 + comiss (%edx),%xmm0 + .end + + .inline sse_cvtss2sd,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtss2sd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_cvtsi2ss,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsi2ss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_cvttss2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvttss2si (%eax),%ecx + movw %ecx,(%edx) + .end + + .inline sse_cvtss2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtss2si (%eax),%ecx + movw %ecx,(%edx) + .end + + .inline sse_cmpeqsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpeqsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmpltsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpltsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmplesd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmplesd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmpunordsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpunordsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_minsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + minsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_maxsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + maxsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_addsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + addsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_subsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + subsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_mulsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + mulsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_divsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + divsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_sqrtsd,2 + movl (%esp),%eax + movl 4(%esp),%edx + sqrtsd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_ucomisd,2 + movl (%esp),%eax + movl 4(%esp),%edx + movsd (%eax),%xmm0 + ucomisd (%edx),%xmm0 + .end + + .inline sse_comisd,2 + movl (%esp),%eax + movl 4(%esp),%edx + movsd (%eax),%xmm0 + comisd (%edx),%xmm0 + .end + + .inline sse_cvtsd2ss,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsd2ss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_cvtsi2sd,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsi2sd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_cvttsd2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvttsd2si (%eax),%ecx + movw %ecx,(%edx) + .end + + .inline sse_cvtsd2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsd2si (%eax),%ecx + movw %ecx,(%edx) + .end diff --git a/usr/src/libm/src/m9x/__fenv_sparc.il b/usr/src/libm/src/m9x/__fenv_sparc.il new file mode 100644 index 0000000..d942a33 --- /dev/null +++ b/usr/src/libm/src/m9x/__fenv_sparc.il @@ -0,0 +1,40 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, include this CDDL HEADER in each +! file and include the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! +! @(#)__fenv_sparc.il 1.5 06/01/31 SMI +! + .inline __fenv_getfsr,1 + st %fsr,[%o0] + .end + + .inline __fenv_setfsr,1 + ld [%o0],%fsr + .end + + .inline __fenv_getfsrx,1 + stx %fsr,[%o0] + .end + + .inline __fenv_setfsrx,1 + ldx [%o0],%fsr + .end diff --git a/usr/src/libm/src/m9x/__fex_hdlr.c b/usr/src/libm/src/m9x/__fex_hdlr.c new file mode 100644 index 0000000..4b87e3f --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_hdlr.c @@ -0,0 +1,850 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_hdlr.c 1.12 06/01/31 SMI" + +#include "fenv_synonyms.h" +#undef lint +#include +#include +#if defined(__i386) && !defined(__amd64) +/* for now, pick up local copy of Solaris 10 sys/regset.h; we can get rid + of this once we no longer need to build on Solaris 8 */ +#include "regset.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "fex_handler.h" + +#if defined(__sparc) && !defined(__sparcv9) +#include +#endif + +/* 2.x signal.h doesn't declare sigemptyset or sigismember + if they're #defined (see sys/signal.h) */ +extern int sigemptyset(sigset_t *); +extern int sigismember(const sigset_t *, int); + +/* external globals */ +void (*__mt_fex_sync)() = NULL; /* for synchronization with libmtsk */ +#pragma weak __mt_fex_sync + +#ifdef LIBM_MT_FEX_SYNC +void (*__libm_mt_fex_sync)() = NULL; /* new, improved version of above */ +#pragma weak __libm_mt_fex_sync +#endif + +/* private variables */ +static fex_handler_t main_handlers; +static int handlers_initialized = 0; +static thread_key_t handlers_key; +static mutex_t handlers_key_lock = DEFAULTMUTEX; + +static struct sigaction oact = { 0, SIG_DFL }; +static mutex_t hdlr_lock = DEFAULTMUTEX; +static int hdlr_installed = 0; + +/* private const data */ +static const int te_bit[FEX_NUM_EXC] = { + 1 << fp_trap_inexact, + 1 << fp_trap_division, + 1 << fp_trap_underflow, + 1 << fp_trap_overflow, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid +}; + +/* +* Return the traps to be enabled given the current handling modes +* and flags +*/ +static int +__fex_te_needed(struct fex_handler_data *thr_handlers, unsigned long fsr) +{ + int i, ex, te; + + /* set traps for handling modes */ + te = 0; + for (i = 0; i < FEX_NUM_EXC; i++) + if (thr_handlers[i].__mode != FEX_NONSTOP) + te |= te_bit[i]; + + /* add traps for retrospective diagnostics */ + if (fex_get_log()) { + ex = (int)__fenv_get_ex(fsr); + if (!(ex & FE_INEXACT)) + te |= (1 << fp_trap_inexact); + if (!(ex & FE_UNDERFLOW)) + te |= (1 << fp_trap_underflow); + if (!(ex & FE_OVERFLOW)) + te |= (1 << fp_trap_overflow); + if (!(ex & FE_DIVBYZERO)) + te |= (1 << fp_trap_division); + if (!(ex & FE_INVALID)) + te |= (1 << fp_trap_invalid); + } + + return te; +} + +/* +* The following function synchronizes with libmtsk (SPARC only, for now) +*/ +static void +__fex_sync_with_libmtsk(int begin, int master) +{ + static fenv_t master_env; + static int env_initialized = 0; + static mutex_t env_lock = DEFAULTMUTEX; + + if (begin) { + mutex_lock(&env_lock); + if (master) { + (void) fegetenv(&master_env); + env_initialized = 1; + } + else if (env_initialized) + (void) fesetenv(&master_env); + mutex_unlock(&env_lock); + } + else if (master && fex_get_log()) + __fex_update_te(); +} + +#ifdef LIBM_MT_FEX_SYNC +/* +* The following function may be used for synchronization with any +* internal project that manages multiple threads +*/ +enum __libm_mt_fex_sync_actions { + __libm_mt_fex_start_master = 0, + __libm_mt_fex_start_slave, + __libm_mt_fex_finish_master, + __libm_mt_fex_finish_slave +}; + +struct __libm_mt_fex_sync_data { + fenv_t master_env; + int initialized; + mutex_t lock; +}; + +static void +__fex_sync_with_threads(enum __libm_mt_fex_sync_actions action, + struct __libm_mt_fex_sync_data *thr_env) +{ + switch (action) { + case __libm_mt_fex_start_master: + mutex_lock(&thr_env->lock); + (void) fegetenv(&thr_env->master_env); + thr_env->initialized = 1; + mutex_unlock(&thr_env->lock); + break; + + case __libm_mt_fex_start_slave: + mutex_lock(&thr_env->lock); + if (thr_env->initialized) + (void) fesetenv(&thr_env->master_env); + mutex_unlock(&thr_env->lock); + break; + + case __libm_mt_fex_finish_master: +#ifdef __i386 + __fex_update_te(); +#else + if (fex_get_log()) + __fex_update_te(); +#endif + break; + + case __libm_mt_fex_finish_slave: +#ifdef __i386 + /* clear traps, making all accrued flags visible in status word */ + { + unsigned long fsr; + __fenv_getfsr(&fsr); + __fenv_set_te(fsr, 0); + __fenv_setfsr(&fsr); + } +#endif + break; + } +} +#endif + +#if defined(__sparc) + +/* +* Code for setting or clearing interval mode on US-III and above. +* This is embedded as data so we don't have to mark the library +* as a v8plusb/v9b object. (I could have just used one entry and +* modified the second word to set the bits I want, but that would +* have required another mutex.) +*/ +static const unsigned int siam[][2] = { + { 0x81c3e008, 0x81b01020 }, /* retl, siam 0 */ + { 0x81c3e008, 0x81b01024 }, /* retl, siam 4 */ + { 0x81c3e008, 0x81b01025 }, /* retl, siam 5 */ + { 0x81c3e008, 0x81b01026 }, /* retl, siam 6 */ + { 0x81c3e008, 0x81b01027 } /* retl, siam 7 */ +}; + +/* +* If a handling mode is in effect, apply it; otherwise invoke the +* saved handler +*/ +static void +__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap) +{ + struct fex_handler_data *thr_handlers; + struct sigaction act; + void (*handler)(), (*siamp)(); + int mode, i; + enum fex_exception e; + fex_info_t info; + unsigned long fsr, tmpfsr, addr; + unsigned int gsr; + + /* determine which exception occurred */ + switch (sip->si_code) { + case FPE_FLTDIV: + e = fex_division; + break; + case FPE_FLTOVF: + e = fex_overflow; + break; + case FPE_FLTUND: + e = fex_underflow; + break; + case FPE_FLTRES: + e = fex_inexact; + break; + case FPE_FLTINV: + if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0) + goto not_ieee; + break; + default: + /* not an IEEE exception */ + goto not_ieee; + } + + /* get the handling mode */ + mode = FEX_NOHANDLER; + handler = oact.sa_handler; /* for log; just looking, no need to lock */ + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + /* make an entry in the log of retro. diag. if need be */ + i = ((int)uap->uc_mcontext.fpregs.fpu_fsr >> 5) & 0x1f; + __fex_mklog(uap, (char *)sip->si_addr, i, e, mode, (void *)handler); + + /* handle the exception based on the mode */ + if (mode == FEX_NOHANDLER) + goto not_ieee; + else if (mode == FEX_ABORT) + abort(); + else if (mode == FEX_SIGNAL) { + handler(sig, sip, uap); + return; + } + + /* custom or nonstop mode; disable traps and clear flags */ + __fenv_getfsr(&fsr); + __fenv_set_te(fsr, 0); + __fenv_set_ex(fsr, 0); + + /* if interval mode was set, clear it, then substitute the + interval rounding direction and clear ns mode in the fsr */ +#ifdef __sparcv9 + gsr = uap->uc_mcontext.asrs[3]; +#else + gsr = 0; + if (uap->uc_mcontext.xrs.xrs_id == XRS_ID) + gsr = (*(unsigned long long*)((prxregset_t*)uap->uc_mcontext. + xrs.xrs_ptr)->pr_un.pr_v8p.pr_filler); +#endif + gsr = (gsr >> 25) & 7; + if (gsr & 4) { + siamp = (void (*)()) siam[0]; + siamp(); + tmpfsr = fsr; + fsr = (fsr & ~0xc0400000ul) | ((gsr & 3) << 30); + } + __fenv_setfsr(&fsr); + + /* decode the operation */ + __fex_get_op(sip, uap, &info); + + /* if a custom mode handler is installed, invoke it */ + if (mode == FEX_CUSTOM) { + /* if we got here from feraiseexcept, pass dummy info */ + addr = (unsigned long)sip->si_addr; + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept ) { + info.op = fex_other; + info.op1.type = info.op2.type = info.res.type = + fex_nodata; + } + + /* restore interval mode if it was set, and put the original + rounding direction and ns mode back in the fsr */ + if (gsr & 4) { + __fenv_setfsr(&tmpfsr); + siamp = (void (*)()) siam[1 + (gsr & 3)]; + siamp(); + } + + handler(1 << (int)e, &info); + + /* restore modes in case the user's handler changed them */ + if (gsr & 4) { + siamp = (void (*)()) siam[0]; + siamp(); + } + __fenv_setfsr(&fsr); + } + + /* stuff the result */ + __fex_st_result(sip, uap, &info); + + /* "or" in any exception flags and update traps */ + fsr = uap->uc_mcontext.fpregs.fpu_fsr; + fsr |= ((info.flags & 0x1f) << 5); + i = __fex_te_needed(thr_handlers, fsr); + __fenv_set_te(fsr, i); + uap->uc_mcontext.fpregs.fpu_fsr = fsr; + return; + +not_ieee: + /* revert to the saved handler (if any) */ + mutex_lock(&hdlr_lock); + act = oact; + mutex_unlock(&hdlr_lock); + switch ((unsigned long)act.sa_handler) { + case (unsigned long)SIG_DFL: + /* simulate trap with no handler installed */ + sigaction(SIGFPE, &act, NULL); + kill(getpid(), SIGFPE); + break; +#if !defined(__lint) + case (unsigned long)SIG_IGN: + break; +#endif + default: + act.sa_handler(sig, sip, uap); + } +} + +#elif defined(__i386) + +#if defined(__amd64) +#define test_sse_hw 1 +#else +extern int _sse_hw; +#define test_sse_hw &_sse_hw && _sse_hw +#endif + +#if !defined(REG_PC) +#define REG_PC EIP +#endif + +/* +* If a handling mode is in effect, apply it; otherwise invoke the +* saved handler +*/ +static void +__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap) +{ + struct fex_handler_data *thr_handlers; + struct sigaction act; + void (*handler)(), (*simd_handler[4])(); + int mode, simd_mode[4], i, len, accrued, *ap; + unsigned int cwsw, oldcwsw, mxcsr, oldmxcsr; + enum fex_exception e, simd_e[4]; + fex_info_t info, simd_info[4]; + unsigned long addr; + siginfo_t osip = *sip; + sseinst_t inst; + + /* check for an exception caused by an SSE instruction */ + if (!(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & 0x80)) { + len = __fex_parse_sse(uap, &inst); + if (len == 0) + goto not_ieee; + + /* disable all traps and clear flags */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & ~0x3f) | 0x003f0000; + __fenv_setcwsw(&cwsw); + __fenv_getmxcsr(&oldmxcsr); + mxcsr = (oldmxcsr & ~0x3f) | 0x1f80; + __fenv_setmxcsr(&mxcsr); + + if ((int)inst.op & SIMD) { + __fex_get_simd_op(uap, &inst, simd_e, simd_info); + + thr_handlers = __fex_get_thr_handlers(); + addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC]; + accrued = uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.mxcsr; + + e = (enum fex_exception)-1; + mode = FEX_NONSTOP; + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + e = simd_e[i]; + simd_mode[i] = FEX_NOHANDLER; + simd_handler[i] = oact.sa_handler; + if (thr_handlers && + thr_handlers[(int)e].__mode != + FEX_NOHANDLER) { + simd_mode[i] = + thr_handlers[(int)e].__mode; + simd_handler[i] = + thr_handlers[(int)e].__handler; + } + accrued &= ~te_bit[(int)e]; + switch (simd_mode[i]) { + case FEX_ABORT: + mode = FEX_ABORT; + break; + case FEX_SIGNAL: + if (mode != FEX_ABORT) + mode = FEX_SIGNAL; + handler = simd_handler[i]; + break; + case FEX_NOHANDLER: + if (mode != FEX_ABORT && mode != + FEX_SIGNAL) + mode = FEX_NOHANDLER; + break; + } + } + if (e == (enum fex_exception)-1) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } + accrued |= uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.status; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + __fex_mklog(uap, (char *)addr, accrued, + simd_e[i], simd_mode[i], + (void *)simd_handler[i]); + } + + if (mode == FEX_NOHANDLER) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } else if (mode == FEX_ABORT) { + abort(); + } else if (mode == FEX_SIGNAL) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + handler(sig, &osip, uap); + return; + } + + *ap = 0; + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + if (simd_mode[i] == FEX_CUSTOM) { + handler(1 << (int)simd_e[i], + &simd_info[i]); + __fenv_setcwsw(&cwsw); + __fenv_setmxcsr(&mxcsr); + } + } + + __fex_st_simd_result(uap, &inst, simd_e, simd_info); + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + accrued |= simd_info[i].flags; + } + + if ((int)inst.op & INTREG) { + /* set MMX mode */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.sw &= ~0x3800; + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.fctw = 0; +#else + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[1] &= ~0x3800; + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[2] = 0; +#endif + } + } else { + e = __fex_get_sse_op(uap, &inst, &info); + if ((int)e < 0) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } + + mode = FEX_NOHANDLER; + handler = oact.sa_handler; + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != + FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC]; + accrued = uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.mxcsr & ~te_bit[(int)e]; + accrued |= uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.status; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + __fex_mklog(uap, (char *)addr, accrued, e, mode, + (void *)handler); + + if (mode == FEX_NOHANDLER) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } else if (mode == FEX_ABORT) { + abort(); + } else if (mode == FEX_SIGNAL) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + handler(sig, &osip, uap); + return; + } else if (mode == FEX_CUSTOM) { + *ap = 0; + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept ) { + info.op = fex_other; + info.op1.type = info.op2.type = + info.res.type = fex_nodata; + } + handler(1 << (int)e, &info); + __fenv_setcwsw(&cwsw); + __fenv_setmxcsr(&mxcsr); + } + + __fex_st_sse_result(uap, &inst, e, &info); + accrued |= info.flags; + +#ifdef __amd64 + /* + * In 64-bit mode, the 32-bit convert-to-integer + * instructions zero the upper 32 bits of the + * destination. (We do this here and not in + * __fex_st_sse_result because __fex_st_sse_result + * can be called from __fex_st_simd_result, too.) + */ + if (inst.op == cvtss2si || inst.op == cvttss2si || + inst.op == cvtsd2si || inst.op == cvttsd2si) + inst.op1->i[1] = 0; +#endif + } + + /* advance the pc past the SSE instruction */ + uap->uc_mcontext.gregs[REG_PC] += len; + goto update_state; + } + + /* determine which exception occurred */ + __fex_get_x86_exc(sip, uap); + switch (sip->si_code) { + case FPE_FLTDIV: + e = fex_division; + break; + case FPE_FLTOVF: + e = fex_overflow; + break; + case FPE_FLTUND: + e = fex_underflow; + break; + case FPE_FLTRES: + e = fex_inexact; + break; + case FPE_FLTINV: + if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0) + goto not_ieee; + break; + default: + /* not an IEEE exception */ + goto not_ieee; + } + + /* get the handling mode */ + mode = FEX_NOHANDLER; + handler = oact.sa_handler; /* for log; just looking, no need to lock */ + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + /* make an entry in the log of retro. diag. if need be */ +#if defined(__amd64) + addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.rip; +#else + addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[3]; +#endif + accrued = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & + ~te_bit[(int)e]; + if (test_sse_hw) + accrued |= uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state. + mxcsr; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + __fex_mklog(uap, (char *)addr, accrued, e, mode, (void *)handler); + + /* handle the exception based on the mode */ + if (mode == FEX_NOHANDLER) + goto not_ieee; + else if (mode == FEX_ABORT) + abort(); + else if (mode == FEX_SIGNAL) { + handler(sig, &osip, uap); + return; + } + + /* disable all traps and clear flags */ + __fenv_getcwsw(&cwsw); + cwsw = (cwsw & ~0x3f) | 0x003f0000; + __fenv_setcwsw(&cwsw); + if (test_sse_hw) { + __fenv_getmxcsr(&mxcsr); + mxcsr = (mxcsr & ~0x3f) | 0x1f80; + __fenv_setmxcsr(&mxcsr); + } + *ap = 0; + + /* decode the operation */ + __fex_get_op(sip, uap, &info); + + /* if a custom mode handler is installed, invoke it */ + if (mode == FEX_CUSTOM) { + /* if we got here from feraiseexcept, pass dummy info */ + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept ) { + info.op = fex_other; + info.op1.type = info.op2.type = info.res.type = + fex_nodata; + } + + handler(1 << (int)e, &info); + + /* restore modes in case the user's handler changed them */ + __fenv_setcwsw(&cwsw); + if (test_sse_hw) + __fenv_setmxcsr(&mxcsr); + } + + /* stuff the result */ + __fex_st_result(sip, uap, &info); + accrued |= info.flags; + +update_state: + accrued &= 0x3d; + i = __fex_te_needed(thr_handlers, accrued); + *ap = accrued & i; +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw |= 0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw &= ~i; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] |= + (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] |= 0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] &= ~i; +#endif + if (test_sse_hw) { + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr |= + 0x1e80 | (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= + ~(i << 7); + } + return; + +not_ieee: + /* revert to the saved handler (if any) */ + mutex_lock(&hdlr_lock); + act = oact; + mutex_unlock(&hdlr_lock); + switch ((unsigned long)act.sa_handler) { + case (unsigned long)SIG_DFL: + /* simulate trap with no handler installed */ + sigaction(SIGFPE, &act, NULL); + kill(getpid(), SIGFPE); + break; +#if !defined(__lint) + case (unsigned long)SIG_IGN: + break; +#endif + default: + act.sa_handler(sig, &osip, uap); + } +} + +#else +#error Unknown architecture +#endif + +/* +* Return a pointer to the thread-specific handler data, and +* initialize it if necessary +*/ +struct fex_handler_data * +__fex_get_thr_handlers() +{ + struct fex_handler_data *ptr; + unsigned long fsr; + int i, te; + + if (thr_main()) { + if (!handlers_initialized) { + /* initialize to FEX_NOHANDLER if trap is enabled, + FEX_NONSTOP if trap is disabled */ + __fenv_getfsr(&fsr); + te = (int)__fenv_get_te(fsr); + for (i = 0; i < FEX_NUM_EXC; i++) + main_handlers[i].__mode = + ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP); + handlers_initialized = 1; + } + return main_handlers; + } + else { + ptr = NULL; + mutex_lock(&handlers_key_lock); + if (thr_getspecific(handlers_key, (void **)&ptr) != 0 && + thr_keycreate(&handlers_key, free) != 0) { + mutex_unlock(&handlers_key_lock); + return NULL; + } + mutex_unlock(&handlers_key_lock); + if (!ptr) { + if ((ptr = (struct fex_handler_data *) + malloc(sizeof(fex_handler_t))) == NULL) { + return NULL; + } + if (thr_setspecific(handlers_key, (void *)ptr) != 0) { + (void)free(ptr); + return NULL; + } + /* initialize to FEX_NOHANDLER if trap is enabled, + FEX_NONSTOP if trap is disabled */ + __fenv_getfsr(&fsr); + te = (int)__fenv_get_te(fsr); + for (i = 0; i < FEX_NUM_EXC; i++) + ptr[i].__mode = ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP); + } + return ptr; + } +} + +/* +* Update the trap enable bits according to the selected modes +*/ +void +__fex_update_te() +{ + struct fex_handler_data *thr_handlers; + struct sigaction act, tmpact; + sigset_t blocked; + unsigned long fsr; + int te; + + /* determine which traps are needed */ + thr_handlers = __fex_get_thr_handlers(); + __fenv_getfsr(&fsr); + te = __fex_te_needed(thr_handlers, fsr); + + /* install __fex_hdlr as necessary */ + if (!hdlr_installed && te) { + act.sa_handler = __fex_hdlr; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGFPE, &act, &tmpact); + if (tmpact.sa_handler != __fex_hdlr) + { + mutex_lock(&hdlr_lock); + oact = tmpact; + mutex_unlock(&hdlr_lock); + } + hdlr_installed = 1; + } + + /* set the new trap enable bits (only if SIGFPE is not blocked) */ + if (sigprocmask(0, NULL, &blocked) == 0 && + !sigismember(&blocked, SIGFPE)) { + __fenv_set_te(fsr, te); + __fenv_setfsr(&fsr); + } + + /* synchronize with libmtsk */ + __mt_fex_sync = __fex_sync_with_libmtsk; + +#ifdef LIBM_MT_FEX_SYNC + /* synchronize with other projects */ + __libm_mt_fex_sync = __fex_sync_with_threads; +#endif +} diff --git a/usr/src/libm/src/m9x/__fex_i386.c b/usr/src/libm/src/m9x/__fex_i386.c new file mode 100644 index 0000000..62ec4ba --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_i386.c @@ -0,0 +1,1671 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_i386.c 1.15 06/01/31 SMI" + +#include "fenv_synonyms.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fex_handler.h" + +#if defined(__amd64) +#define test_sse_hw 1 +#else +/* + * The following variable lives in libc on Solaris 10, where it + * gets set to a nonzero value at startup time on systems with SSE. + */ +int _sse_hw = 0; +#pragma weak _sse_hw +#define test_sse_hw &_sse_hw && _sse_hw +#endif + +static int accrued = 0; +static thread_key_t accrued_key; +static mutex_t accrued_key_lock = DEFAULTMUTEX; + +int * +__fex_accrued() +{ + int *p; + + if (thr_main()) + return &accrued; + else { + p = NULL; + mutex_lock(&accrued_key_lock); + if (thr_getspecific(accrued_key, (void **)&p) != 0 && + thr_keycreate(&accrued_key, free) != 0) { + mutex_unlock(&accrued_key_lock); + return NULL; + } + mutex_unlock(&accrued_key_lock); + if (!p) { + if ((p = (int*) malloc(sizeof(int))) == NULL) + return NULL; + if (thr_setspecific(accrued_key, (void *)p) != 0) { + (void)free(p); + return NULL; + } + *p = 0; + } + return p; + } +} + +void +__fenv_getfsr(unsigned long *fsr) +{ + unsigned int cwsw, mxcsr; + + __fenv_getcwsw(&cwsw); + /* clear reserved bits for no particularly good reason */ + cwsw &= ~0xe0c00000u; + if (test_sse_hw) { + /* pick up exception flags (excluding denormal operand + flag) from mxcsr */ + __fenv_getmxcsr(&mxcsr); + cwsw |= (mxcsr & 0x3d); + } + cwsw |= *__fex_accrued(); + *fsr = cwsw ^ 0x003f0000u; +} + +void +__fenv_setfsr(const unsigned long *fsr) +{ + unsigned int cwsw, mxcsr; + int te; + + /* save accrued exception flags corresponding to enabled exceptions */ + cwsw = (unsigned int)*fsr; + te = __fenv_get_te(cwsw); + *__fex_accrued() = cwsw & te; + cwsw = (cwsw & ~te) ^ 0x003f0000; + if (test_sse_hw) { + /* propagate rounding direction, masks, and exception flags + (excluding denormal operand mask and flag) to mxcsr */ + __fenv_getmxcsr(&mxcsr); + mxcsr = (mxcsr & ~0x7ebd) | ((cwsw >> 13) & 0x6000) | + ((cwsw >> 9) & 0x1e80) | (cwsw & 0x3d); + __fenv_setmxcsr(&mxcsr); + } + __fenv_setcwsw(&cwsw); +} + +/* Offsets into the fp environment save area (assumes 32-bit protected mode) */ +#define CW 0 /* control word */ +#define SW 1 /* status word */ +#define TW 2 /* tag word */ +#define IP 3 /* instruction pointer */ +#define OP 4 /* opcode */ +#define EA 5 /* operand address */ + +/* macro for accessing fp registers in the save area */ +#if defined(__amd64) +#define fpreg(u,x) *(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.st) +#else +#define fpreg(u,x) *(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7]) +#endif + +/* +* Fix sip->si_code; the Solaris x86 kernel can get it wrong +*/ +void +__fex_get_x86_exc(siginfo_t *sip, ucontext_t *uap) +{ + unsigned sw, cw; + + sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; +#if defined(__amd64) + cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw; +#else + cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[CW]; +#endif + if ((sw & FE_INVALID) && !(cw & (1 << fp_trap_invalid))) + /* store 0 for stack fault, FPE_FLTINV for IEEE invalid op */ + sip->si_code = ((sw & 0x40)? 0 : FPE_FLTINV); + else if ((sw & FE_DIVBYZERO) && !(cw & (1 << fp_trap_division))) + sip->si_code = FPE_FLTDIV; + else if ((sw & FE_OVERFLOW) && !(cw & (1 << fp_trap_overflow))) + sip->si_code = FPE_FLTOVF; + else if ((sw & FE_UNDERFLOW) && !(cw & (1 << fp_trap_underflow))) + sip->si_code = FPE_FLTUND; + else if ((sw & FE_INEXACT) && !(cw & (1 << fp_trap_inexact))) + sip->si_code = FPE_FLTRES; + else + sip->si_code = 0; +} + +static enum fp_class_type +my_fp_classf(float *x) +{ + int i = *(int*)x & ~0x80000000; + + if (i < 0x7f800000) { + if (i < 0x00800000) + return ((i == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7f800000) + return fp_infinity; + else if (i & 0x400000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_class(double *x) +{ + int i = *(1+(int*)x) & ~0x80000000; + + if (i < 0x7ff00000) { + if (i < 0x00100000) + return (((i | *(int*)x) == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7ff00000 && *(int*)x == 0) + return fp_infinity; + else if (i & 0x80000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_classl(long double *x) +{ + int i = *(2+(int*)x) & 0x7fff; + + if (i < 0x7fff) { + if (i < 1) { + if (*(1+(int*)x) < 0) return fp_normal; /* pseudo-denormal */ + return (((*(1+(int*)x) | *(int*)x) == 0)? + fp_zero : fp_subnormal); + } + return ((*(1+(int*)x) < 0)? fp_normal : + (enum fp_class_type) -1); /* unsupported format */ + } + else if (*(1+(int*)x) == 0x80000000 && *(int*)x == 0) + return fp_infinity; + else if (*(1+(unsigned*)x) >= 0xc0000000) + return fp_quiet; + else if (*(1+(int*)x) < 0) + return fp_signaling; + else + return (enum fp_class_type) -1; /* unsupported format */ +} + +/* +* Determine which type of invalid operation exception occurred +*/ +enum fex_exception +__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap) +{ + unsigned op; + unsigned long ea; + enum fp_class_type t1, t2; + + /* get the opcode and data address */ +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* if the instruction is fld, the source must be snan (it can't be + an unsupported format, since fldt doesn't raise any exceptions) */ + switch (op & 0x7f8) { + case 0x100: + case 0x140: + case 0x180: + case 0x500: + case 0x540: + case 0x580: + return fex_inv_snan; + } + + /* otherwise st is one of the operands; see if it's snan */ + t1 = my_fp_classl(&fpreg(uap, 0)); + if (t1 == fp_signaling) + return fex_inv_snan; + else if (t1 == (enum fp_class_type) -1) + return (enum fex_exception) -1; + + /* determine the class of the second operand if there is one */ + t2 = fp_normal; + switch (op & 0x7e0) { + case 0x600: + case 0x620: + case 0x640: + case 0x660: + case 0x680: + case 0x6a0: + /* short memory operand */ + if (!ea) + return (enum fex_exception) -1; + if (*(short *)ea == 0) + t2 = fp_zero; + break; + + case 0x200: + case 0x220: + case 0x240: + case 0x260: + case 0x280: + case 0x2a0: + /* int memory operand */ + if (!ea) + return (enum fex_exception) -1; + if (*(int *)ea == 0) + t2 = fp_zero; + break; + + case 0x000: + case 0x020: + case 0x040: + case 0x060: + case 0x080: + case 0x0a0: + /* single precision memory operand */ + if (!ea) + return (enum fex_exception) -1; + t2 = my_fp_classf((float *)ea); + break; + + case 0x400: + case 0x420: + case 0x440: + case 0x460: + case 0x480: + case 0x4a0: + /* double precision memory operand */ + if (!ea) + return (enum fex_exception) -1; + t2 = my_fp_class((double *)ea); + break; + + case 0x0c0: + case 0x0e0: + case 0x3e0: + case 0x4c0: + case 0x4e0: + case 0x5e0: + case 0x6c0: + case 0x6e0: + case 0x7e0: + /* register operand determined by opcode */ + switch (op & 0x7f8) { + case 0x3e0: + case 0x3f8: + case 0x5f0: + case 0x5f8: + case 0x7e0: + case 0x7f8: + /* weed out nonexistent opcodes */ + break; + + default: + t2 = my_fp_classl(&fpreg(uap, op & 7)); + } + break; + + case 0x1e0: + case 0x2e0: + /* special forms */ + switch (op) { + case 0x1f1: /* fyl2x */ + case 0x1f3: /* fpatan */ + case 0x1f5: /* fprem1 */ + case 0x1f8: /* fprem */ + case 0x1f9: /* fyl2xp1 */ + case 0x1fd: /* fscale */ + case 0x2e9: /* fucompp */ + t2 = my_fp_classl(&fpreg(uap, 1)); + break; + } + break; + } + + /* see if the second op is snan */ + if (t2 == fp_signaling) + return fex_inv_snan; + else if (t2 == (enum fp_class_type) -1) + return (enum fex_exception) -1; + + /* determine the type of operation */ + switch (op & 0x7f8) { + case 0x000: + case 0x020: + case 0x028: + case 0x040: + case 0x060: + case 0x068: + case 0x080: + case 0x0a0: + case 0x0a8: + case 0x0c0: + case 0x0e0: + case 0x0e8: + case 0x400: + case 0x420: + case 0x428: + case 0x440: + case 0x460: + case 0x468: + case 0x480: + case 0x4a0: + case 0x4a8: + case 0x4c0: + case 0x4e0: + case 0x4e8: + case 0x6c0: + case 0x6e0: + case 0x6e8: + /* fadd, fsub, fsubr */ + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_isi; + break; + + case 0x008: + case 0x048: + case 0x088: + case 0x0c8: + case 0x208: + case 0x248: + case 0x288: + case 0x408: + case 0x448: + case 0x488: + case 0x4c8: + case 0x608: + case 0x648: + case 0x688: + case 0x6c8: + /* fmul */ + if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero && + t1 == fp_infinity)) + return fex_inv_zmi; + break; + + case 0x030: + case 0x038: + case 0x070: + case 0x078: + case 0x0b0: + case 0x0b8: + case 0x0f0: + case 0x0f8: + case 0x230: + case 0x238: + case 0x270: + case 0x278: + case 0x2b0: + case 0x2b8: + case 0x430: + case 0x438: + case 0x470: + case 0x478: + case 0x4b0: + case 0x4b8: + case 0x4f0: + case 0x4f8: + case 0x630: + case 0x638: + case 0x670: + case 0x678: + case 0x6b0: + case 0x6b8: + case 0x6f0: + case 0x6f8: + /* fdiv */ + if (t1 == fp_zero && t2 == fp_zero) + return fex_inv_zdz; + else if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_idi; + break; + + case 0x1f0: + case 0x1f8: + /* fsqrt, other special ops */ + return fex_inv_sqrt; + + case 0x010: + case 0x018: + case 0x050: + case 0x058: + case 0x090: + case 0x098: + case 0x0d0: + case 0x0d8: + case 0x210: + case 0x218: + case 0x250: + case 0x258: + case 0x290: + case 0x298: + case 0x2e8: + case 0x3f0: + case 0x410: + case 0x418: + case 0x450: + case 0x458: + case 0x490: + case 0x498: + case 0x4d0: + case 0x4d8: + case 0x5e0: + case 0x5e8: + case 0x610: + case 0x618: + case 0x650: + case 0x658: + case 0x690: + case 0x698: + case 0x6d0: + case 0x6d8: + case 0x7f0: + /* fcom */ + if (t1 == fp_quiet || t2 == fp_quiet) + return fex_inv_cmp; + break; + + case 0x1e0: + /* ftst */ + if (op == 0x1e4 && t1 == fp_quiet) + return fex_inv_cmp; + break; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + case 0x710: + case 0x718: + case 0x730: + case 0x738: + case 0x750: + case 0x758: + case 0x770: + case 0x778: + case 0x790: + case 0x798: + case 0x7b0: + case 0x7b8: + /* fist, fbst */ + return fex_inv_int; + } + + return (enum fex_exception) -1; +} + +/* scale factors for exponent unwrapping */ +static const long double + two12288 = 1.139165225263043370845938579315932009e+3699l, /* 2^12288 */ + twom12288 = 8.778357852076208839765066529179033145e-3700l, /* 2^-12288 */ + twom12288mulp = 8.778357852076208839289190796475222545e-3700l; + /* (")*(1-2^-64) */ + +/* inline templates */ +extern long double f2xm1(long double); +extern long double fyl2x(long double, long double); +extern long double fptan(long double); +extern long double fpatan(long double, long double); +extern long double fxtract(long double); +extern long double fprem1(long double, long double); +extern long double fprem(long double, long double); +extern long double fyl2xp1(long double, long double); +extern long double fsqrt(long double); +extern long double fsincos(long double); +extern long double frndint(long double); +extern long double fscale(long double, long double); +extern long double fsin(long double); +extern long double fcos(long double); + +/* +* Get the operands, generate the default untrapped result with +* exceptions, and set a code indicating the type of operation +*/ +void +__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + fex_numeric_t t; + long double op2v, x; + unsigned int cwsw, ex, sw, op; + unsigned long ea; + volatile int c; + + /* get the exception type, status word, opcode, and data address */ + ex = sip->si_code; + sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* initialize res to the default untrapped result and ex to the + corresponding flags (assume trapping is disabled and flags + are clear) */ + + /* single operand instructions */ + info->op = fex_cnvt; + info->op2.type = fex_nodata; + switch (op & 0x7f8) { + /* load instructions */ + case 0x100: + case 0x140: + case 0x180: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_float; + info->op1.val.f = *(float *)ea; + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.f; + goto done; + + case 0x500: + case 0x540: + case 0x580: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_double; + info->op1.val.d = *(double *)ea; + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.d; + goto done; + + /* store instructions */ + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + info->res.type = fex_float; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.f = *(float *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.f = (float) info->op1.val.q; + goto done; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + info->res.type = fex_int; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.i = *(int *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.i = (int) info->op1.val.q; + goto done; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + info->res.type = fex_double; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.d = *(double *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.d = (double) info->op1.val.q; + goto done; + + case 0x710: + case 0x718: + case 0x750: + case 0x758: + case 0x790: + case 0x798: + info->res.type = fex_int; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.i = *(short *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.i = (short) info->op1.val.q; + goto done; + + case 0x730: + case 0x770: + case 0x7b0: + /* fbstp; don't bother */ + info->op = fex_other; + info->op1.type = info->res.type = fex_nodata; + info->flags = 0; + return; + + case 0x738: + case 0x778: + case 0x7b8: + info->res.type = fex_llong; + if (ex == FPE_FLTRES) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.l = *(long long *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.l = (long long) info->op1.val.q; + goto done; + } + + /* all other ops (except compares) have destinations on the stack + so overflow, underflow, and inexact will stomp their operands */ + if (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES) { + /* find the trapped result */ + info->op1.type = info->op2.type = fex_nodata; + info->res.type = fex_ldouble; + switch (op & 0x7f8) { + case 0x1f0: + /* fptan pushes 1.0 afterward, so result is in st(1) */ + info->res.val.q = ((op == 0x1f2)? fpreg(uap, 1) : + fpreg(uap, 0)); + break; + + case 0x4c0: + case 0x4c8: + case 0x4e0: + case 0x4e8: + case 0x4f0: + case 0x4f8: + info->res.val.q = fpreg(uap, op & 7); + break; + + case 0x6c0: + case 0x6c8: + case 0x6e0: + case 0x6e8: + case 0x6f0: + case 0x6f8: + /* stack was popped afterward */ + info->res.val.q = fpreg(uap, (op - 1) & 7); + break; + + default: + info->res.val.q = fpreg(uap, 0); + } + + /* reconstruct default untrapped result */ + if (ex == FPE_FLTOVF) { + /* generate an overflow with the sign of the result */ + x = two12288; + *(4+(short*)&x) |= (*(4+(short*)&info->res.val.q) & 0x8000); + info->res.val.q = x * two12288; + info->flags = FE_OVERFLOW | FE_INEXACT; + __fenv_getcwsw(&cwsw); + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); + } + else if (ex == FPE_FLTUND) { + /* undo the scaling; we can't distinguish a chopped result + from an exact one without futzing around to trap all in- + exact exceptions so as to keep the flag clear, so we just + punt */ + if (sw & 0x200) /* result was rounded up */ + info->res.val.q = (info->res.val.q * twom12288) * twom12288mulp; + else + info->res.val.q = (info->res.val.q * twom12288) * twom12288; + __fenv_getcwsw(&cwsw); + info->flags = (cwsw & FE_INEXACT) | FE_UNDERFLOW; + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); + } + else + info->flags = FE_INEXACT; + + /* determine the operation code */ + switch (op) { + case 0x1f0: /* f2xm1 */ + case 0x1f1: /* fyl2x */ + case 0x1f2: /* fptan */ + case 0x1f3: /* fpatan */ + case 0x1f5: /* fprem1 */ + case 0x1f8: /* fprem */ + case 0x1f9: /* fyl2xp1 */ + case 0x1fb: /* fsincos */ + case 0x1fc: /* frndint */ + case 0x1fd: /* fscale */ + case 0x1fe: /* fsin */ + case 0x1ff: /* fcos */ + info->op = fex_other; + return; + + case 0x1fa: /* fsqrt */ + info->op = fex_sqrt; + return; + } + + info->op = fex_other; + switch (op & 0x7c0) { + case 0x000: + case 0x040: + case 0x080: + case 0x0c0: + case 0x200: + case 0x240: + case 0x280: + case 0x400: + case 0x440: + case 0x480: + case 0x4c0: + case 0x600: + case 0x640: + case 0x680: + case 0x6c0: + switch (op & 0x38) { + case 0x00: + info->op = fex_add; + break; + + case 0x08: + info->op = fex_mul; + break; + + case 0x20: + case 0x28: + info->op = fex_sub; + break; + + case 0x30: + case 0x38: + info->op = fex_div; + break; + } + } + return; + } + + /* for other exceptions, the operands are preserved, so we can + just emulate the operation with traps disabled */ + + /* one operand is always in st */ + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + + /* oddball instructions */ + info->op = fex_other; + switch (op) { + case 0x1e4: /* ftst */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = 0.0l; + info->res.type = fex_nodata; + c = (info->op1.val.q < info->op2.val.q); + goto done; + + case 0x1f0: /* f2xm1 */ + info->res.type = fex_ldouble; + info->res.val.q = f2xm1(info->op1.val.q); + goto done; + + case 0x1f1: /* fyl2x */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fyl2x(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f2: /* fptan */ + info->res.type = fex_ldouble; + info->res.val.q = fptan(info->op1.val.q); + goto done; + + case 0x1f3: /* fpatan */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fpatan(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f4: /* fxtract */ + info->res.type = fex_ldouble; + info->res.val.q = fxtract(info->op1.val.q); + goto done; + + case 0x1f5: /* fprem1 */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fprem1(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f8: /* fprem */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fprem(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f9: /* fyl2xp1 */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fyl2xp1(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1fa: /* fsqrt */ + info->op = fex_sqrt; + info->res.type = fex_ldouble; + info->res.val.q = fsqrt(info->op1.val.q); + goto done; + + case 0x1fb: /* fsincos */ + info->res.type = fex_ldouble; + info->res.val.q = fsincos(info->op1.val.q); + goto done; + + case 0x1fc: /* frndint */ + info->res.type = fex_ldouble; + info->res.val.q = frndint(info->op1.val.q); + goto done; + + case 0x1fd: /* fscale */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fscale(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1fe: /* fsin */ + info->res.type = fex_ldouble; + info->res.val.q = fsin(info->op1.val.q); + goto done; + + case 0x1ff: /* fcos */ + info->res.type = fex_ldouble; + info->res.val.q = fcos(info->op1.val.q); + goto done; + + case 0x2e9: /* fucompp */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_nodata; + c = (info->op1.val.q == info->op2.val.q); + goto done; + } + + /* fucom[p], fcomi[p], fucomi[p] */ + switch (op & 0x7f8) { + case 0x3e8: + case 0x5e0: + case 0x5e8: + case 0x7e8: /* unordered compares */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + info->res.type = fex_nodata; + c = (info->op1.val.q == info->op2.val.q); + goto done; + + case 0x3f0: + case 0x7f0: /* ordered compares */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + info->res.type = fex_nodata; + c = (info->op1.val.q < info->op2.val.q); + goto done; + } + + /* all other instructions come in groups of the form + fadd, fmul, fcom, fcomp, fsub, fsubr, fdiv, fdivr */ + + /* get the second operand */ + switch (op & 0x7c0) { + case 0x000: + case 0x040: + case 0x080: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_float; + info->op2.val.f = *(float *)ea; + op2v = (long double) info->op2.val.f; + break; + + case 0x0c0: + info->op2.type = fex_ldouble; + op2v = info->op2.val.q = fpreg(uap, op & 7); + break; + + case 0x200: + case 0x240: + case 0x280: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_int; + info->op2.val.i = *(int *)ea; + op2v = (long double) info->op2.val.i; + break; + + case 0x400: + case 0x440: + case 0x480: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_double; + info->op2.val.d = *(double *)ea; + op2v = (long double) info->op2.val.d; + break; + + case 0x4c0: + case 0x6c0: + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + op2v = info->op2.val.q; + break; + + case 0x600: + case 0x640: + case 0x680: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_int; + info->op2.val.i = *(short *)ea; + op2v = (long double) info->op2.val.i; + break; + + default: + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + + /* distinguish different operations in the group */ + info->res.type = fex_ldouble; + switch (op & 0x38) { + case 0x00: + info->op = fex_add; + info->res.val.q = info->op1.val.q + op2v; + break; + + case 0x08: + info->op = fex_mul; + info->res.val.q = info->op1.val.q * op2v; + break; + + case 0x10: + case 0x18: + info->op = fex_cmp; + info->res.type = fex_nodata; + c = (info->op1.val.q < op2v); + break; + + case 0x20: + info->op = fex_sub; + info->res.val.q = info->op1.val.q - op2v; + break; + + case 0x28: + info->op = fex_sub; + info->res.val.q = op2v - info->op1.val.q; + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + break; + + case 0x30: + info->op = fex_div; + info->res.val.q = info->op1.val.q / op2v; + break; + + case 0x38: + info->op = fex_div; + info->res.val.q = op2v / info->op1.val.q; + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + break; + + default: + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + +done: + __fenv_getcwsw(&cwsw); + info->flags = cwsw & FE_ALL_EXCEPT; + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); +} + +/* pop the saved stack */ +static void pop(ucontext_t *uap) +{ + unsigned top; + + fpreg(uap, 0) = fpreg(uap, 1); + fpreg(uap, 1) = fpreg(uap, 2); + fpreg(uap, 2) = fpreg(uap, 3); + fpreg(uap, 3) = fpreg(uap, 4); + fpreg(uap, 4) = fpreg(uap, 5); + fpreg(uap, 5) = fpreg(uap, 6); + fpreg(uap, 6) = fpreg(uap, 7); +#if defined(__amd64) + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10) + & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw |= (3 << top); + top = (top + 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800) + | (top << 10); +#else + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10) + & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] |= (3 << top); + top = (top + 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800) + | (top << 10); +#endif +} + +/* push x onto the saved stack */ +static void push(long double x, ucontext_t *uap) +{ + unsigned top; + + fpreg(uap, 7) = fpreg(uap, 6); + fpreg(uap, 6) = fpreg(uap, 5); + fpreg(uap, 5) = fpreg(uap, 4); + fpreg(uap, 4) = fpreg(uap, 3); + fpreg(uap, 3) = fpreg(uap, 2); + fpreg(uap, 2) = fpreg(uap, 1); + fpreg(uap, 1) = fpreg(uap, 0); + fpreg(uap, 0) = x; +#if defined(__amd64) + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10) + & 0xe; + top = (top - 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw &= ~(3 << top); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800) + | (top << 10); +#else + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10) + & 0xe; + top = (top - 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] &= ~(3 << top); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800) + | (top << 10); +#endif +} + +/* scale factors for exponent wrapping */ +static const float + fun = 7.922816251e+28f, /* 2^96 */ + fov = 1.262177448e-29f; /* 2^-96 */ +static const double + dun = 1.552518092300708935e+231, /* 2^768 */ + dov = 6.441148769597133308e-232; /* 2^-768 */ + +/* +* Store the specified result; if no result is given but the exception +* is underflow or overflow, use the default trapped result +*/ +void +__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + fex_numeric_t r; + unsigned ex, op, ea, stack; + + /* get the exception type, opcode, and data address */ + ex = sip->si_code; +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; /*???*/ +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* if the instruction is a compare, set the condition codes + to unordered and update the stack */ + switch (op & 0x7f8) { + case 0x010: + case 0x050: + case 0x090: + case 0x0d0: + case 0x210: + case 0x250: + case 0x290: + case 0x410: + case 0x450: + case 0x490: + case 0x4d0: + case 0x5e0: + case 0x610: + case 0x650: + case 0x690: + /* f[u]com */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + return; + + case 0x018: + case 0x058: + case 0x098: + case 0x0d8: + case 0x218: + case 0x258: + case 0x298: + case 0x418: + case 0x458: + case 0x498: + case 0x4d8: + case 0x5e8: + case 0x618: + case 0x658: + case 0x698: + case 0x6d0: + /* f[u]comp */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + pop(uap); + return; + + case 0x2e8: + case 0x6d8: + /* f[u]compp */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + pop(uap); + pop(uap); + return; + + case 0x1e0: + if (op == 0x1e4) { /* ftst */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + return; + } + break; + + case 0x3e8: + case 0x3f0: + /* f[u]comi */ +#if defined(__amd64) + uap->uc_mcontext.gregs[REG_PS] |= 0x45; +#else + uap->uc_mcontext.gregs[EFL] |= 0x45; +#endif + return; + + case 0x7e8: + case 0x7f0: + /* f[u]comip */ +#if defined(__amd64) + uap->uc_mcontext.gregs[REG_PS] |= 0x45; +#else + uap->uc_mcontext.gregs[EFL] |= 0x45; +#endif + pop(uap); + return; + } + + /* if there is no result available and the exception is overflow + or underflow, use the wrapped result */ + r = info->res; + if (r.type == fex_nodata) { + if (ex == FPE_FLTOVF || ex == FPE_FLTUND) { + /* for store instructions, do the scaling and store */ + switch (op & 0x7f8) { + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + if (!ea) + return; + if (ex == FPE_FLTOVF) + *(float *)ea = (fpreg(uap, 0) * fov) * fov; + else + *(float *)ea = (fpreg(uap, 0) * fun) * fun; + if ((op & 8) != 0) + pop(uap); + break; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + if (!ea) + return; + if (ex == FPE_FLTOVF) + *(double *)ea = (fpreg(uap, 0) * dov) * dov; + else + *(double *)ea = (fpreg(uap, 0) * dun) * dun; + if ((op & 8) != 0) + pop(uap); + break; + } + } +#ifdef DEBUG + else if (ex != FPE_FLTRES) + printf( "No result supplied, stack may be hosed\n" ); +#endif + return; + } + + /* otherwise convert the supplied result to the correct type, + put it in the destination, and update the stack as need be */ + + /* store instructions */ + switch (op & 0x7f8) { + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(float *)ea = (float) r.val.i; + break; + + case fex_llong: + *(float *)ea = (float) r.val.l; + break; + + case fex_float: + *(float *)ea = r.val.f; + break; + + case fex_double: + *(float *)ea = (float) r.val.d; + break; + + case fex_ldouble: + *(float *)ea = (float) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(int *)ea = r.val.i; + break; + + case fex_llong: + *(int *)ea = (int) r.val.l; + break; + + case fex_float: + *(int *)ea = (int) r.val.f; + break; + + case fex_double: + *(int *)ea = (int) r.val.d; + break; + + case fex_ldouble: + *(int *)ea = (int) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(double *)ea = (double) r.val.i; + break; + + case fex_llong: + *(double *)ea = (double) r.val.l; + break; + + case fex_float: + *(double *)ea = (double) r.val.f; + break; + + case fex_double: + *(double *)ea = r.val.d; + break; + + case fex_ldouble: + *(double *)ea = (double) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x710: + case 0x718: + case 0x750: + case 0x758: + case 0x790: + case 0x798: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(short *)ea = (short) r.val.i; + break; + + case fex_llong: + *(short *)ea = (short) r.val.l; + break; + + case fex_float: + *(short *)ea = (short) r.val.f; + break; + + case fex_double: + *(short *)ea = (short) r.val.d; + break; + + case fex_ldouble: + *(short *)ea = (short) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x730: + case 0x770: + case 0x7b0: + /* fbstp; don't bother */ + if (ea && ex != FPE_FLTRES) + pop(uap); + return; + + case 0x738: + case 0x778: + case 0x7b8: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(long long *)ea = (long long) r.val.i; + break; + + case fex_llong: + *(long long *)ea = r.val.l; + break; + + case fex_float: + *(long long *)ea = (long long) r.val.f; + break; + + case fex_double: + *(long long *)ea = (long long) r.val.d; + break; + + case fex_ldouble: + *(long long *)ea = (long long) r.val.q; + break; + } + if (ex != FPE_FLTRES) + pop(uap); + return; + } + + /* for all other instructions, the result goes into a register */ + switch (r.type) { + case fex_int: + r.val.q = (long double) r.val.i; + break; + + case fex_llong: + r.val.q = (long double) r.val.l; + break; + + case fex_float: + r.val.q = (long double) r.val.f; + break; + + case fex_double: + r.val.q = (long double) r.val.d; + break; + } + + /* for load instructions, push the result onto the stack */ + switch (op & 0x7f8) { + case 0x100: + case 0x140: + case 0x180: + case 0x500: + case 0x540: + case 0x580: + if (ea) + push(r.val.q, uap); + return; + } + + /* for all other instructions, if the exception is overflow, + underflow, or inexact, the stack has already been updated */ + stack = (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES); + switch (op & 0x7f8) { + case 0x1f0: /* oddballs */ + switch (op) { + case 0x1f1: /* fyl2x */ + case 0x1f3: /* fpatan */ + case 0x1f9: /* fyl2xp1 */ + /* pop the stack, leaving the result in st */ + if (!stack) + pop(uap); + fpreg(uap, 0) = r.val.q; + return; + + case 0x1f2: /* fpatan */ + /* fptan pushes 1.0 afterward */ + if (stack) + fpreg(uap, 1) = r.val.q; + else { + fpreg(uap, 0) = r.val.q; + push(1.0L, uap); + } + return; + + case 0x1f4: /* fxtract */ + case 0x1fb: /* fsincos */ + /* leave the supplied result in st */ + if (stack) + fpreg(uap, 0) = r.val.q; + else { + fpreg(uap, 0) = 0.0; /* punt */ + push(r.val.q, uap); + } + return; + } + + /* all others leave the stack alone and the result in st */ + fpreg(uap, 0) = r.val.q; + return; + + case 0x4c0: + case 0x4c8: + case 0x4e0: + case 0x4e8: + case 0x4f0: + case 0x4f8: + fpreg(uap, op & 7) = r.val.q; + return; + + case 0x6c0: + case 0x6c8: + case 0x6e0: + case 0x6e8: + case 0x6f0: + case 0x6f8: + /* stack is popped afterward */ + if (stack) + fpreg(uap, (op - 1) & 7) = r.val.q; + else { + fpreg(uap, op & 7) = r.val.q; + pop(uap); + } + return; + + default: + fpreg(uap, 0) = r.val.q; + return; + } +} diff --git a/usr/src/libm/src/m9x/__fex_sparc.c b/usr/src/libm/src/m9x/__fex_sparc.c new file mode 100644 index 0000000..7682afa --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_sparc.c @@ -0,0 +1,864 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_sparc.c 1.7 06/01/31 SMI" + +#if defined(__sparc) +#include "fenv_synonyms.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __sparcv9 + +#define FPreg(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X] + +#define FPREG(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_dregs[(X>>1)| \ + ((X&1)<<4)] + +#else + +#include + +#define FPxreg(X) &((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfr.pr_regs[X] + +#define FPreg(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X] + +#define FPREG(X) ((X & 1)? FPxreg(X - 1) : FPreg(X)) + +#endif /* __sparcv9 */ + +#include "fex_handler.h" + +/* avoid dependence on libsunmath */ +static enum fp_class_type +my_fp_classl(long double *a) +{ + int msw = *(int*)a & ~0x80000000; + + if (msw >= 0x7fff0000) { + if (((msw & 0xffff) | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0) + return fp_infinity; + else if (msw & 0x8000) + return fp_quiet; + else + return fp_signaling; + } else if (msw < 0x10000) { + if ((msw | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0) + return fp_zero; + else + return fp_subnormal; + } else + return fp_normal; +} + +/* +* Determine which type of invalid operation exception occurred +*/ +enum fex_exception +__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap) +{ + unsigned instr, opf, rs1, rs2; + enum fp_class_type t1, t2; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + + /* determine the classes of the operands */ + switch (opf & 3) { + case 1: /* single */ + t1 = fp_classf(*(float*)FPreg(rs1)); + t2 = fp_classf(*(float*)FPreg(rs2)); + break; + + case 2: /* double */ + t1 = fp_class(*(double*)FPREG(rs1)); + t2 = fp_class(*(double*)FPREG(rs2)); + break; + + case 3: /* quad */ + t1 = my_fp_classl((long double*)FPREG(rs1)); + t2 = my_fp_classl((long double*)FPREG(rs2)); + break; + + default: /* integer operands never cause an invalid operation */ + return (enum fex_exception) -1; + } + + /* if rs2 is snan, return immediately */ + if (t2 == fp_signaling) + return fex_inv_snan; + + /* determine the type of operation */ + switch ((instr >> 19) & 0x183f) { + case 0x1034: /* add, subtract, multiply, divide, square root, convert */ + switch (opf & 0x1fc) { + case 0x40: + case 0x44: /* add or subtract */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_isi; + + case 0x48: + case 0x68: + case 0x6c: /* multiply */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_zmi; + + case 0x4c: /* divide */ + if (t1 == fp_signaling) + return fex_inv_snan; + else if (t1 == fp_zero) + return fex_inv_zdz; + else + return fex_inv_idi; + + case 0x28: /* square root */ + return fex_inv_sqrt; + + case 0x80: + case 0xd0: /* convert to integer */ + return fex_inv_int; + } + break; + + case 0x1035: /* compare */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_cmp; + } + + return (enum fex_exception) -1; +} + +#ifdef __sparcv9 +extern void _Qp_sqrt(long double *, const long double *); +#else +extern long double _Q_sqrt(long double); +#endif + +/* +* Get the operands, generate the default untrapped result with +* exceptions, and set a code indicating the type of operation +*/ +void +__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + unsigned long fsr; + unsigned instr, opf, rs1, rs2; + volatile int c; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + + /* get the operands */ + switch (opf & 3) { + case 0: /* integer */ + info->op1.type = fex_nodata; + if (opf & 0x40) { + info->op2.type = fex_int; + info->op2.val.i = *(int*)FPreg(rs2); + } + else { + info->op2.type = fex_llong; + info->op2.val.l = *(long long*)FPREG(rs2); + } + break; + + case 1: /* single */ + info->op1.type = info->op2.type = fex_float; + info->op1.val.f = *(float*)FPreg(rs1); + info->op2.val.f = *(float*)FPreg(rs2); + break; + + case 2: /* double */ + info->op1.type = info->op2.type = fex_double; + info->op1.val.d = *(double*)FPREG(rs1); + info->op2.val.d = *(double*)FPREG(rs2); + break; + + case 3: /* quad */ + info->op1.type = info->op2.type = fex_ldouble; + info->op1.val.q = *(long double*)FPREG(rs1); + info->op2.val.q = *(long double*)FPREG(rs2); + break; + } + + /* initialize res to the default untrapped result and ex to the + corresponding flags (assume trapping is disabled and flags + are clear) */ + info->op = fex_other; + info->res.type = fex_nodata; + switch ((instr >> 19) & 0x183f) { + case 0x1035: /* compare */ + info->op = fex_cmp; + switch (opf) { + case 0x51: /* compare single */ + c = (info->op1.val.f == info->op2.val.f); + break; + + case 0x52: /* compare double */ + c = (info->op1.val.d == info->op2.val.d); + break; + + case 0x53: /* compare quad */ + c = (info->op1.val.q == info->op2.val.q); + break; + + case 0x55: /* compare single with exception */ + c = (info->op1.val.f < info->op2.val.f); + break; + + case 0x56: /* compare double with exception */ + c = (info->op1.val.d < info->op2.val.d); + break; + + case 0x57: /* compare quad with exception */ + c = (info->op1.val.q < info->op2.val.q); + break; + } + break; + + case 0x1034: /* add, subtract, multiply, divide, square root, convert */ + switch (opf) { + case 0x41: /* add single */ + info->op = fex_add; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f + info->op2.val.f; + break; + + case 0x42: /* add double */ + info->op = fex_add; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d + info->op2.val.d; + break; + + case 0x43: /* add quad */ + info->op = fex_add; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q + info->op2.val.q; + break; + + case 0x45: /* subtract single */ + info->op = fex_sub; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f - info->op2.val.f; + break; + + case 0x46: /* subtract double */ + info->op = fex_sub; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d - info->op2.val.d; + break; + + case 0x47: /* subtract quad */ + info->op = fex_sub; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q - info->op2.val.q; + break; + + case 0x49: /* multiply single */ + info->op = fex_mul; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f * info->op2.val.f; + break; + + case 0x4a: /* multiply double */ + info->op = fex_mul; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d * info->op2.val.d; + break; + + case 0x4b: /* multiply quad */ + info->op = fex_mul; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q * info->op2.val.q; + break; + + case 0x69: /* fsmuld */ + info->op = fex_mul; + info->res.type = fex_double; + info->res.val.d = (double)info->op1.val.f * (double)info->op2.val.f; + break; + + case 0x6e: /* fdmulq */ + info->op = fex_mul; + info->res.type = fex_ldouble; + info->res.val.q = (long double)info->op1.val.d * + (long double)info->op2.val.d; + break; + + case 0x4d: /* divide single */ + info->op = fex_div; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f / info->op2.val.f; + break; + + case 0x4e: /* divide double */ + info->op = fex_div; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d / info->op2.val.d; + break; + + case 0x4f: /* divide quad */ + info->op = fex_div; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q / info->op2.val.q; + break; + + case 0x29: /* square root single */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_float; + info->res.val.f = sqrtf(info->op1.val.f); + break; + + case 0x2a: /* square root double */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_double; + info->res.val.d = sqrt(info->op1.val.d); + break; + + case 0x2b: /* square root quad */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_ldouble; +#ifdef __sparcv9 + _Qp_sqrt(&info->res.val.q, &info->op1.val.q); +#else + info->res.val.q = _Q_sqrt(info->op1.val.q); +#endif + break; + + default: /* conversions */ + info->op = fex_cnvt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + switch (opf) { + case 0xd1: /* convert single to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.f; + break; + + case 0xd2: /* convert double to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.d; + break; + + case 0xd3: /* convert quad to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.q; + break; + + case 0x81: /* convert single to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.f; + break; + + case 0x82: /* convert double to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.d; + break; + + case 0x83: /* convert quad to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.q; + break; + + case 0xc4: /* convert int to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.i; + break; + + case 0x84: /* convert long long to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.l; + break; + + case 0x88: /* convert long long to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.l; + break; + + case 0xc6: /* convert double to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.d; + break; + + case 0xc7: /* convert quad to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.q; + break; + + case 0xc9: /* convert single to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.f; + break; + + case 0xcb: /* convert quad to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.q; + break; + + case 0xcd: /* convert single to quad */ + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.f; + break; + + case 0xce: /* convert double to quad */ + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.d; + break; + } + } + break; + } + __fenv_getfsr(&fsr); + info->flags = (int)__fenv_get_ex(fsr); + __fenv_set_ex(fsr, 0); + __fenv_setfsr(&fsr); +} + +/* +* Store the specified result; if no result is given but the exception +* is underflow or overflow, supply the default trapped result +*/ +void +__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + unsigned instr, opf, rs1, rs2, rd; + long double qscl; + double dscl; + float fscl; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + rd = (instr >> 25) & 0x1f; + + /* if the instruction is a compare, just set fcc to unordered */ + if (((instr >> 19) & 0x183f) == 0x1035) { + if (rd == 0) + uap->uc_mcontext.fpregs.fpu_fsr |= 0xc00; + else { +#ifdef __sparcv9 + uap->uc_mcontext.fpregs.fpu_fsr |= (3l << ((rd << 1) + 30)); +#else + ((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfsr |= (3 << ((rd - 1) << 1)); +#endif + } + return; + } + + /* if there is no result available, try to generate the untrapped + default */ + if (info->res.type == fex_nodata) { + /* set scale factors for exponent wrapping */ + switch (sip->si_code) { + case FPE_FLTOVF: + fscl = 1.262177448e-29f; /* 2^-96 */ + dscl = 6.441148769597133308e-232; /* 2^-768 */ + qscl = 8.778357852076208839765066529179033145e-3700l;/* 2^-12288 */ + break; + + case FPE_FLTUND: + fscl = 7.922816251e+28f; /* 2^96 */ + dscl = 1.552518092300708935e+231; /* 2^768 */ + qscl = 1.139165225263043370845938579315932009e+3699l;/* 2^12288 */ + break; + + default: + /* user may have blown away the default result by mistake, + so try to regenerate it */ + (void) __fex_get_op(sip, uap, info); + if (info->res.type != fex_nodata) + goto stuff; + /* couldn't do it */ + return; + } + + /* get the operands */ + switch (opf & 3) { + case 1: /* single */ + info->op1.val.f = *(float*)FPreg(rs1); + info->op2.val.f = *(float*)FPreg(rs2); + break; + + case 2: /* double */ + info->op1.val.d = *(double*)FPREG(rs1); + info->op2.val.d = *(double*)FPREG(rs2); + break; + + case 3: /* quad */ + info->op1.val.q = *(long double*)FPREG(rs1); + info->op2.val.q = *(long double*)FPREG(rs2); + break; + } + + /* generate the wrapped result */ + switch (opf) { + case 0x41: /* add single */ + info->res.type = fex_float; + info->res.val.f = fscl * ( fscl * info->op1.val.f + + fscl * info->op2.val.f ); + break; + + case 0x42: /* add double */ + info->res.type = fex_double; + info->res.val.d = dscl * ( dscl * info->op1.val.d + + dscl * info->op2.val.d ); + break; + + case 0x43: /* add quad */ + info->res.type = fex_ldouble; + info->res.val.q = qscl * ( qscl * info->op1.val.q + + qscl * info->op2.val.q ); + break; + + case 0x45: /* subtract single */ + info->res.type = fex_float; + info->res.val.f = fscl * ( fscl * info->op1.val.f - + fscl * info->op2.val.f ); + break; + + case 0x46: /* subtract double */ + info->res.type = fex_double; + info->res.val.d = dscl * ( dscl * info->op1.val.d - + dscl * info->op2.val.d ); + break; + + case 0x47: /* subtract quad */ + info->res.type = fex_ldouble; + info->res.val.q = qscl * ( qscl * info->op1.val.q - + qscl * info->op2.val.q ); + break; + + case 0x49: /* multiply single */ + info->res.type = fex_float; + info->res.val.f = ( fscl * info->op1.val.f ) * + ( fscl * info->op2.val.f ); + break; + + case 0x4a: /* multiply double */ + info->res.type = fex_double; + info->res.val.d = ( dscl * info->op1.val.d ) * + ( dscl * info->op2.val.d ); + break; + + case 0x4b: /* multiply quad */ + info->res.type = fex_ldouble; + info->res.val.q = ( qscl * info->op1.val.q ) * + ( qscl * info->op2.val.q ); + break; + + case 0x4d: /* divide single */ + info->res.type = fex_float; + info->res.val.f = ( fscl * info->op1.val.f ) / + ( info->op2.val.f / fscl ); + break; + + case 0x4e: /* divide double */ + info->res.type = fex_double; + info->res.val.d = ( dscl * info->op1.val.d ) / + ( info->op2.val.d / dscl ); + break; + + case 0x4f: /* divide quad */ + info->res.type = fex_ldouble; + info->res.val.q = ( qscl * info->op1.val.q ) / + ( info->op2.val.q / qscl ); + break; + + case 0xc6: /* convert double to single */ + info->res.type = fex_float; + info->res.val.f = (float) ( fscl * ( fscl * info->op1.val.d ) ); + break; + + case 0xc7: /* convert quad to single */ + info->res.type = fex_float; + info->res.val.f = (float) ( fscl * ( fscl * info->op1.val.q ) ); + break; + + case 0xcb: /* convert quad to double */ + info->res.type = fex_double; + info->res.val.d = (double) ( dscl * ( dscl * info->op1.val.q ) ); + break; + } + + if (info->res.type == fex_nodata) + /* couldn't do it */ + return; + } + +stuff: + /* stick the result in the destination */ + if (opf & 0x80) { /* conversion */ + if (opf & 0x10) { /* result is an int */ + switch (info->res.type) { + case fex_llong: + info->res.val.i = (int) info->res.val.l; + break; + + case fex_float: + info->res.val.i = (int) info->res.val.f; + break; + + case fex_double: + info->res.val.i = (int) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.i = (int) info->res.val.q; + break; + } + *(int*)FPreg(rd) = info->res.val.i; + return; + } + + switch (opf & 0xc) { + case 0: /* result is long long */ + switch (info->res.type) { + case fex_int: + info->res.val.l = (long long) info->res.val.i; + break; + + case fex_float: + info->res.val.l = (long long) info->res.val.f; + break; + + case fex_double: + info->res.val.l = (long long) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.l = (long long) info->res.val.q; + break; + } + *(long long*)FPREG(rd) = info->res.val.l; + break; + + case 0x4: /* result is float */ + switch (info->res.type) { + case fex_int: + info->res.val.f = (float) info->res.val.i; + break; + + case fex_llong: + info->res.val.f = (float) info->res.val.l; + break; + + case fex_double: + info->res.val.f = (float) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.f = (float) info->res.val.q; + break; + } + *(float*)FPreg(rd) = info->res.val.f; + break; + + case 0x8: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 0xc: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } + return; + } + + if ((opf & 0xf0) == 0x60) { /* fsmuld, fdmulq */ + switch (opf & 0xc0) { + case 0x8: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 0xc: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } + return; + } + + switch (opf & 3) { /* other arithmetic op */ + case 1: /* result is float */ + switch (info->res.type) { + case fex_int: + info->res.val.f = (float) info->res.val.i; + break; + + case fex_llong: + info->res.val.f = (float) info->res.val.l; + break; + + case fex_double: + info->res.val.f = (float) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.f = (float) info->res.val.q; + break; + } + *(float*)FPreg(rd) = info->res.val.f; + break; + + case 2: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 3: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } +} +#endif /* defined(__sparc) */ diff --git a/usr/src/libm/src/m9x/__fex_sse.c b/usr/src/libm/src/m9x/__fex_sse.c new file mode 100644 index 0000000..e1743a9 --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_sse.c @@ -0,0 +1,1581 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_sse.c 1.3 06/01/31 SMI" + +#include "fenv_synonyms.h" +#if defined(__i386) && !defined(__amd64) +/* for now, pick up local copy of Solaris 10 sys/regset.h; we can get rid + of this once we no longer need to build on Solaris 8 */ +#include "regset.h" +#endif +#include +#include +#include +#include "fex_handler.h" + +#if !defined(REG_PC) +#define REG_PC EIP +#endif + +#if !defined(REG_PS) +#define REG_PS EFL +#endif + +#ifdef __amd64 +#define regno(X) ((X < 4)? REG_RAX - X : \ + ((X > 4)? REG_RAX + 1 - X : REG_RSP)) +#else +#define regno(X) (EAX - X) +#endif + +/* + * Support for SSE instructions + */ + +/* + * Decode an SSE instruction. Fill in *inst and return the length of the + * instruction in bytes. Return 0 if the instruction is not recognized. + */ +int +__fex_parse_sse(ucontext_t *uap, sseinst_t *inst) +{ + unsigned char *ip; + char *addr; + int i, dbl, simd, rex, modrm, sib, r; + + i = 0; + ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; + + /* look for pseudo-prefixes */ + dbl = 0; + simd = SIMD; + if (ip[i] == 0xF3) { + simd = 0; + i++; + } else if (ip[i] == 0x66) { + dbl = DOUBLE; + i++; + } else if (ip[i] == 0xF2) { + dbl = DOUBLE; + simd = 0; + i++; + } + + /* look for AMD64 REX prefix */ + rex = 0; + if (ip[i] >= 0x40 && ip[i] <= 0x4F) { + rex = ip[i]; + i++; + } + + /* parse opcode */ + if (ip[i++] != 0x0F) + return 0; + switch (ip[i++]) { + case 0x2A: + inst->op = (int)cvtsi2ss + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2C: + inst->op = (int)cvttss2si + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2D: + inst->op = (int)cvtss2si + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2E: + /* oddball: scalar instruction in a SIMD opcode group */ + if (!simd) + return 0; + inst->op = (int)ucomiss + dbl; + break; + + case 0x2F: + /* oddball: scalar instruction in a SIMD opcode group */ + if (!simd) + return 0; + inst->op = (int)comiss + dbl; + break; + + case 0x51: + inst->op = (int)sqrtss + simd + dbl; + break; + + case 0x58: + inst->op = (int)addss + simd + dbl; + break; + + case 0x59: + inst->op = (int)mulss + simd + dbl; + break; + + case 0x5A: + inst->op = (int)cvtss2sd + simd + dbl; + break; + + case 0x5B: + if (dbl) { + if (simd) + inst->op = cvtps2dq; + else + return 0; + } else { + inst->op = (simd)? cvtdq2ps : cvttps2dq; + } + break; + + case 0x5C: + inst->op = (int)subss + simd + dbl; + break; + + case 0x5D: + inst->op = (int)minss + simd + dbl; + break; + + case 0x5E: + inst->op = (int)divss + simd + dbl; + break; + + case 0x5F: + inst->op = (int)maxss + simd + dbl; + break; + + case 0xC2: + inst->op = (int)cmpss + simd + dbl; + break; + + case 0xE6: + if (simd) { + if (dbl) + inst->op = cvttpd2dq; + else + return 0; + } else { + inst->op = (dbl)? cvtpd2dq : cvtdq2pd; + } + break; + + default: + return 0; + } + + /* locate operands */ + modrm = ip[i++]; + + if (inst->op == cvtss2si || inst->op == cvttss2si || + inst->op == cvtsd2si || inst->op == cvttsd2si || + inst->op == cvtss2siq || inst->op == cvttss2siq || + inst->op == cvtsd2siq || inst->op == cvttsd2siq) { + /* op1 is a gp register */ + r = ((rex & 4) << 1) | ((modrm >> 3) & 7); + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; + } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || + inst->op == cvtpd2pi || inst->op == cvttpd2pi) { + /* op1 is a mmx register */ +#ifdef __amd64 + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.st[(modrm >> 3) & 7]; +#else + inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + + (char *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[7]); +#endif + } else { + /* op1 is a xmm register */ + r = ((rex & 4) << 1) | ((modrm >> 3) & 7); + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.xmm[r]; + } + + if ((modrm >> 6) == 3) { + if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || + inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { + /* op2 is a gp register */ + r = ((rex & 1) << 3) | (modrm & 7); + inst->op2 = (sseoperand_t *)&uap->uc_mcontext. + gregs[regno(r)]; + } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { + /* op2 is a mmx register */ +#ifdef __amd64 + inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.st[modrm & 7]; +#else + inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + + (char *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[7]); +#endif + } else { + /* op2 is a xmm register */ + r = ((rex & 1) << 3) | (modrm & 7); + inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.xmm[r]; + } + } else if ((modrm & 0xc7) == 0x05) { +#ifdef __amd64 + /* address of next instruction + offset */ + r = i + 4; + if (inst->op == cmpss || inst->op == cmpps || + inst->op == cmpsd || inst->op == cmppd) + r++; + inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); +#else + /* absolute address */ + inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); +#endif + i += 4; + } else { + /* complex address */ + if ((modrm & 7) == 4) { + /* parse sib byte */ + sib = ip[i++]; + if ((sib & 7) == 5 && (modrm >> 6) == 0) { + /* start with absolute address */ + addr = (char *)(*(int *)(ip + i)); + i += 4; + } else { + /* start with base */ + r = ((rex & 1) << 3) | (sib & 7); + addr = (char *)uap->uc_mcontext.gregs[regno(r)]; + } + r = ((rex & 2) << 2) | ((sib >> 3) & 7); + if (r != 4) { + /* add scaled index */ + addr += uap->uc_mcontext.gregs[regno(r)] + << (sib >> 6); + } + } else { + r = ((rex & 1) << 3) | (modrm & 7); + addr = (char *)uap->uc_mcontext.gregs[regno(r)]; + } + + /* add displacement, if any */ + if ((modrm >> 6) == 1) { + addr += (char)ip[i++]; + } else if ((modrm >> 6) == 2) { + addr += *(int *)(ip + i); + i += 4; + } + inst->op2 = (sseoperand_t *)addr; + } + + if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || + inst->op == cmppd) { + /* get the immediate operand */ + inst->imm = ip[i++]; + } + + return i; +} + +static enum fp_class_type +my_fp_classf(float *x) +{ + int i = *(int *)x & ~0x80000000; + + if (i < 0x7f800000) { + if (i < 0x00800000) + return ((i == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7f800000) + return fp_infinity; + else if (i & 0x400000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_class(double *x) +{ + int i = *(1+(int *)x) & ~0x80000000; + + if (i < 0x7ff00000) { + if (i < 0x00100000) + return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7ff00000 && *(int *)x == 0) + return fp_infinity; + else if (i & 0x80000) + return fp_quiet; + else + return fp_signaling; +} + +/* + * Inspect a scalar SSE instruction that incurred an invalid operation + * exception to determine which type of exception it was. + */ +static enum fex_exception +__fex_get_sse_invalid_type(sseinst_t *inst) +{ + enum fp_class_type t1, t2; + + /* check op2 for signaling nan */ + t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : + my_fp_classf(&inst->op2->f[0]); + if (t2 == fp_signaling) + return fex_inv_snan; + + /* eliminate all single-operand instructions */ + switch (inst->op) { + case cvtsd2ss: + case cvtss2sd: + /* hmm, this shouldn't have happened */ + return (enum fex_exception) -1; + + case sqrtss: + case sqrtsd: + return fex_inv_sqrt; + + case cvtss2si: + case cvtsd2si: + case cvttss2si: + case cvttsd2si: + case cvtss2siq: + case cvtsd2siq: + case cvttss2siq: + case cvttsd2siq: + return fex_inv_int; + } + + /* check op1 for signaling nan */ + t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : + my_fp_classf(&inst->op1->f[0]); + if (t1 == fp_signaling) + return fex_inv_snan; + + /* check two-operand instructions for other cases */ + switch (inst->op) { + case cmpss: + case cmpsd: + case minss: + case minsd: + case maxss: + case maxsd: + case comiss: + case comisd: + return fex_inv_cmp; + + case addss: + case addsd: + case subss: + case subsd: + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_isi; + break; + + case mulss: + case mulsd: + if ((t1 == fp_zero && t2 == fp_infinity) || + (t2 == fp_zero && t1 == fp_infinity)) + return fex_inv_zmi; + break; + + case divss: + case divsd: + if (t1 == fp_zero && t2 == fp_zero) + return fex_inv_zdz; + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_idi; + } + + return (enum fex_exception)-1; +} + +/* inline templates */ +extern void sse_cmpeqss(float *, float *, int *); +extern void sse_cmpltss(float *, float *, int *); +extern void sse_cmpless(float *, float *, int *); +extern void sse_cmpunordss(float *, float *, int *); +extern void sse_minss(float *, float *, float *); +extern void sse_maxss(float *, float *, float *); +extern void sse_addss(float *, float *, float *); +extern void sse_subss(float *, float *, float *); +extern void sse_mulss(float *, float *, float *); +extern void sse_divss(float *, float *, float *); +extern void sse_sqrtss(float *, float *); +extern void sse_ucomiss(float *, float *); +extern void sse_comiss(float *, float *); +extern void sse_cvtss2sd(float *, double *); +extern void sse_cvtsi2ss(int *, float *); +extern void sse_cvttss2si(float *, int *); +extern void sse_cvtss2si(float *, int *); +#ifdef __amd64 +extern void sse_cvtsi2ssq(long long *, float *); +extern void sse_cvttss2siq(float *, long long *); +extern void sse_cvtss2siq(float *, long long *); +#endif +extern void sse_cmpeqsd(double *, double *, long long *); +extern void sse_cmpltsd(double *, double *, long long *); +extern void sse_cmplesd(double *, double *, long long *); +extern void sse_cmpunordsd(double *, double *, long long *); +extern void sse_minsd(double *, double *, double *); +extern void sse_maxsd(double *, double *, double *); +extern void sse_addsd(double *, double *, double *); +extern void sse_subsd(double *, double *, double *); +extern void sse_mulsd(double *, double *, double *); +extern void sse_divsd(double *, double *, double *); +extern void sse_sqrtsd(double *, double *); +extern void sse_ucomisd(double *, double *); +extern void sse_comisd(double *, double *); +extern void sse_cvtsd2ss(double *, float *); +extern void sse_cvtsi2sd(int *, double *); +extern void sse_cvttsd2si(double *, int *); +extern void sse_cvtsd2si(double *, int *); +#ifdef __amd64 +extern void sse_cvtsi2sdq(long long *, double *); +extern void sse_cvttsd2siq(double *, long long *); +extern void sse_cvtsd2siq(double *, long long *); +#endif + +/* + * Fill in *info with the operands, default untrapped result, and + * flags produced by a scalar SSE instruction, and return the type + * of trapped exception (if any). On entry, the mxcsr must have + * all exceptions masked and all flags clear. The same conditions + * will hold on exit. + * + * This routine does not work if the instruction specified by *inst + * is not a scalar instruction. + */ +enum fex_exception +__fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) +{ + unsigned int e, te, mxcsr, oldmxcsr, subnorm; + + /* + * Perform the operation with traps disabled and check the + * exception flags. If the underflow trap was enabled, also + * check for an exact subnormal result. + */ + __fenv_getmxcsr(&oldmxcsr); + subnorm = 0; + if ((int)inst->op & DOUBLE) { + if (inst->op == cvtsi2sd) { + info->op1.type = fex_int; + info->op1.val.i = inst->op2->i[0]; + info->op2.type = fex_nodata; + } else if (inst->op == cvtsi2sdq) { + info->op1.type = fex_llong; + info->op1.val.l = inst->op2->l[0]; + info->op2.type = fex_nodata; + } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || + inst->op == cvttsd2si || inst->op == cvtsd2si || + inst->op == cvttsd2siq || inst->op == cvtsd2siq) { + info->op1.type = fex_double; + info->op1.val.d = inst->op2->d[0]; + info->op2.type = fex_nodata; + } else { + info->op1.type = fex_double; + info->op1.val.d = inst->op1->d[0]; + info->op2.type = fex_double; + info->op2.val.d = inst->op2->d[0]; + } + info->res.type = fex_double; + switch (inst->op) { + case cmpsd: + info->op = fex_cmp; + info->res.type = fex_llong; + switch (inst->imm & 3) { + case 0: + sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 1: + sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 2: + sse_cmplesd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 3: + sse_cmpunordsd(&info->op1.val.d, + &info->op2.val.d, &info->res.val.l); + } + if (inst->imm & 4) + info->res.val.l ^= 0xffffffffffffffffull; + break; + + case minsd: + info->op = fex_other; + sse_minsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + break; + + case maxsd: + info->op = fex_other; + sse_maxsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + break; + + case addsd: + info->op = fex_add; + sse_addsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case subsd: + info->op = fex_sub; + sse_subsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case mulsd: + info->op = fex_mul; + sse_mulsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case divsd: + info->op = fex_div; + sse_divsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case sqrtsd: + info->op = fex_sqrt; + sse_sqrtsd(&info->op1.val.d, &info->res.val.d); + break; + + case cvtsd2ss: + info->op = fex_cnvt; + info->res.type = fex_float; + sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case cvtsi2sd: + info->op = fex_cnvt; + sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); + break; + + case cvttsd2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); + break; + + case cvtsd2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); + break; + +#ifdef __amd64 + case cvtsi2sdq: + info->op = fex_cnvt; + sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); + break; + + case cvttsd2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); + break; + + case cvtsd2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); + break; +#endif + + case ucomisd: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_ucomisd(&info->op1.val.d, &info->op2.val.d); + break; + + case comisd: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_comisd(&info->op1.val.d, &info->op2.val.d); + break; + } + } else { + if (inst->op == cvtsi2ss) { + info->op1.type = fex_int; + info->op1.val.i = inst->op2->i[0]; + info->op2.type = fex_nodata; + } else if (inst->op == cvtsi2ssq) { + info->op1.type = fex_llong; + info->op1.val.l = inst->op2->l[0]; + info->op2.type = fex_nodata; + } else if (inst->op == sqrtss || inst->op == cvtss2sd || + inst->op == cvttss2si || inst->op == cvtss2si || + inst->op == cvttss2siq || inst->op == cvtss2siq) { + info->op1.type = fex_float; + info->op1.val.f = inst->op2->f[0]; + info->op2.type = fex_nodata; + } else { + info->op1.type = fex_float; + info->op1.val.f = inst->op1->f[0]; + info->op2.type = fex_float; + info->op2.val.f = inst->op2->f[0]; + } + info->res.type = fex_float; + switch (inst->op) { + case cmpss: + info->op = fex_cmp; + info->res.type = fex_int; + switch (inst->imm & 3) { + case 0: + sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 1: + sse_cmpltss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 2: + sse_cmpless(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 3: + sse_cmpunordss(&info->op1.val.f, + &info->op2.val.f, &info->res.val.i); + } + if (inst->imm & 4) + info->res.val.i ^= 0xffffffffu; + break; + + case minss: + info->op = fex_other; + sse_minss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + break; + + case maxss: + info->op = fex_other; + sse_maxss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + break; + + case addss: + info->op = fex_add; + sse_addss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case subss: + info->op = fex_sub; + sse_subss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case mulss: + info->op = fex_mul; + sse_mulss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case divss: + info->op = fex_div; + sse_divss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case sqrtss: + info->op = fex_sqrt; + sse_sqrtss(&info->op1.val.f, &info->res.val.f); + break; + + case cvtss2sd: + info->op = fex_cnvt; + info->res.type = fex_double; + sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); + break; + + case cvtsi2ss: + info->op = fex_cnvt; + sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); + break; + + case cvttss2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvttss2si(&info->op1.val.f, &info->res.val.i); + break; + + case cvtss2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvtss2si(&info->op1.val.f, &info->res.val.i); + break; + +#ifdef __amd64 + case cvtsi2ssq: + info->op = fex_cnvt; + sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); + break; + + case cvttss2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); + break; + + case cvtss2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); + break; +#endif + + case ucomiss: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_ucomiss(&info->op1.val.f, &info->op2.val.f); + break; + + case comiss: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_comiss(&info->op1.val.f, &info->op2.val.f); + break; + } + } + __fenv_getmxcsr(&mxcsr); + info->flags = mxcsr & 0x3d; + __fenv_setmxcsr(&oldmxcsr); + + /* determine which exception would have been trapped */ + te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr + >> 7) & 0x3d; + e = mxcsr & te; + if (e & FE_INVALID) + return __fex_get_sse_invalid_type(inst); + if (e & FE_DIVBYZERO) + return fex_division; + if (e & FE_OVERFLOW) + return fex_overflow; + if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) + return fex_underflow; + if (e & FE_INEXACT) + return fex_inexact; + return (enum fex_exception)-1; +} + +/* + * Emulate a SIMD SSE instruction to determine which exceptions occur + * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the + * trapped exception that would occur if the i-th part of the SIMD + * instruction were executed in isolation; set e[i] to -1 if no + * trapped exception would occur in this part. Also fill in info[i] + * with the corresponding operands, default untrapped result, and + * flags. + * + * This routine does not work if the instruction specified by *inst + * is not a SIMD instruction. + */ +void +__fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, + fex_info_t *info) +{ + sseinst_t dummy; + int i; + + e[0] = e[1] = e[2] = e[3] = -1; + + /* perform each part of the SIMD operation */ + switch (inst->op) { + case cmpps: + dummy.op = cmpss; + dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case minps: + dummy.op = minss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case maxps: + dummy.op = maxss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case addps: + dummy.op = addss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case subps: + dummy.op = subss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case mulps: + dummy.op = mulss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case divps: + dummy.op = divss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case sqrtps: + dummy.op = sqrtss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtdq2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttps2dq: + dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2dq: + dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpi2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttps2pi: + dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2pi: + dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cmppd: + dummy.op = cmpsd; + dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case minpd: + dummy.op = minsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case maxpd: + dummy.op = maxsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case addpd: + dummy.op = addsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case subpd: + dummy.op = subsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case mulpd: + dummy.op = mulsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case divpd: + dummy.op = divsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case sqrtpd: + dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpi2pd: + case cvtdq2pd: + dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttpd2pi: + case cvttpd2dq: + dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpd2pi: + case cvtpd2dq: + dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2pd: + dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpd2ps: + dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + } +} + +/* + * Store the result value from *info in the destination of the scalar + * SSE instruction specified by *inst. If no result is given but the + * exception is underflow or overflow, supply the default trapped result. + * + * This routine does not work if the instruction specified by *inst + * is not a scalar instruction. + */ +void +__fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, + fex_info_t *info) +{ + int i; + long long l; + float f, fscl; + double d, dscl; + + /* for compares that write eflags, just set the flags + to indicate "unordered" */ + if (inst->op == ucomiss || inst->op == comiss || + inst->op == ucomisd || inst->op == comisd) { + uap->uc_mcontext.gregs[REG_PS] |= 0x45; + return; + } + + /* if info doesn't specify a result value, try to generate + the default trapped result */ + if (info->res.type == fex_nodata) { + /* set scale factors for exponent wrapping */ + switch (e) { + case fex_overflow: + fscl = 1.262177448e-29f; /* 2^-96 */ + dscl = 6.441148769597133308e-232; /* 2^-768 */ + break; + + case fex_underflow: + fscl = 7.922816251e+28f; /* 2^96 */ + dscl = 1.552518092300708935e+231; /* 2^768 */ + break; + + default: + (void) __fex_get_sse_op(uap, inst, info); + if (info->res.type == fex_nodata) + return; + goto stuff; + } + + /* generate the wrapped result */ + if (inst->op == cvtsd2ss) { + info->op1.type = fex_double; + info->op1.val.d = inst->op2->d[0]; + info->op2.type = fex_nodata; + info->res.type = fex_float; + info->res.val.f = (float)(fscl * (fscl * + info->op1.val.d)); + } else if ((int)inst->op & DOUBLE) { + info->op1.type = fex_double; + info->op1.val.d = inst->op1->d[0]; + info->op2.type = fex_double; + info->op2.val.d = inst->op2->d[0]; + info->res.type = fex_double; + switch (inst->op) { + case addsd: + info->res.val.d = dscl * (dscl * + info->op1.val.d + dscl * info->op2.val.d); + break; + + case subsd: + info->res.val.d = dscl * (dscl * + info->op1.val.d - dscl * info->op2.val.d); + break; + + case mulsd: + info->res.val.d = (dscl * info->op1.val.d) * + (dscl * info->op2.val.d); + break; + + case divsd: + info->res.val.d = (dscl * info->op1.val.d) / + (info->op2.val.d / dscl); + break; + + default: + return; + } + } else { + info->op1.type = fex_float; + info->op1.val.f = inst->op1->f[0]; + info->op2.type = fex_float; + info->op2.val.f = inst->op2->f[0]; + info->res.type = fex_float; + switch (inst->op) { + case addss: + info->res.val.f = fscl * (fscl * + info->op1.val.f + fscl * info->op2.val.f); + break; + + case subss: + info->res.val.f = fscl * (fscl * + info->op1.val.f - fscl * info->op2.val.f); + break; + + case mulss: + info->res.val.f = (fscl * info->op1.val.f) * + (fscl * info->op2.val.f); + break; + + case divss: + info->res.val.f = (fscl * info->op1.val.f) / + (info->op2.val.f / fscl); + break; + + default: + return; + } + } + } + + /* put the result in the destination */ +stuff: + if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si + || inst->op == cvttsd2si || inst->op == cvtsd2si) { + switch (info->res.type) { + case fex_int: + i = info->res.val.i; + break; + + case fex_llong: + i = info->res.val.l; + break; + + case fex_float: + i = info->res.val.f; + break; + + case fex_double: + i = info->res.val.d; + break; + + case fex_ldouble: + i = info->res.val.q; + break; + } + inst->op1->i[0] = i; + } else if (inst->op == cmpsd || inst->op == cvttss2siq || + inst->op == cvtss2siq || inst->op == cvttsd2siq || + inst->op == cvtsd2siq) { + switch (info->res.type) { + case fex_int: + l = info->res.val.i; + break; + + case fex_llong: + l = info->res.val.l; + break; + + case fex_float: + l = info->res.val.f; + break; + + case fex_double: + l = info->res.val.d; + break; + + case fex_ldouble: + l = info->res.val.q; + break; + } + inst->op1->l[0] = l; + } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || + inst->op == cvtss2sd) { + switch (info->res.type) { + case fex_int: + d = info->res.val.i; + break; + + case fex_llong: + d = info->res.val.l; + break; + + case fex_float: + d = info->res.val.f; + break; + + case fex_double: + d = info->res.val.d; + break; + + case fex_ldouble: + d = info->res.val.q; + break; + } + inst->op1->d[0] = d; + } else { + switch (info->res.type) { + case fex_int: + f = info->res.val.i; + break; + + case fex_llong: + f = info->res.val.l; + break; + + case fex_float: + f = info->res.val.f; + break; + + case fex_double: + f = info->res.val.d; + break; + + case fex_ldouble: + f = info->res.val.q; + break; + } + inst->op1->f[0] = f; + } +} + +/* + * Store the results from a SIMD instruction. For each i, store + * the result value from info[i] in the i-th part of the destination + * of the SIMD SSE instruction specified by *inst. If no result + * is given but the exception indicated by e[i] is underflow or + * overflow, supply the default trapped result. + * + * This routine does not work if the instruction specified by *inst + * is not a SIMD instruction. + */ +void +__fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, + fex_info_t *info) +{ + sseinst_t dummy; + int i; + + /* store each part */ + switch (inst->op) { + case cmpps: + dummy.op = cmpss; + dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case minps: + dummy.op = minss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case maxps: + dummy.op = maxss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case addps: + dummy.op = addss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case subps: + dummy.op = subss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case mulps: + dummy.op = mulss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case divps: + dummy.op = divss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case sqrtps: + dummy.op = sqrtss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtdq2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttps2dq: + dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtps2dq: + dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpi2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttps2pi: + dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtps2pi: + dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cmppd: + dummy.op = cmpsd; + dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case minpd: + dummy.op = minsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case maxpd: + dummy.op = maxsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case addpd: + dummy.op = addsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case subpd: + dummy.op = subsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case mulpd: + dummy.op = mulsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case divpd: + dummy.op = divsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case sqrtpd: + dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpi2pd: + case cvtdq2pd: + dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttpd2pi: + case cvttpd2dq: + dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* for cvttpd2dq, zero the high 64 bits of the destination */ + if (inst->op == cvttpd2dq) + inst->op1->l[1] = 0ll; + break; + + case cvtpd2pi: + case cvtpd2dq: + dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* for cvtpd2dq, zero the high 64 bits of the destination */ + if (inst->op == cvtpd2dq) + inst->op1->l[1] = 0ll; + break; + + case cvtps2pd: + dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpd2ps: + dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* zero the high 64 bits of the destination */ + inst->op1->l[1] = 0ll; + } +} diff --git a/usr/src/libm/src/m9x/__fex_sym.c b/usr/src/libm/src/m9x/__fex_sym.c new file mode 100644 index 0000000..7942493 --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_sym.c @@ -0,0 +1,306 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_sym.c 1.7 06/01/31 SMI" + +#include "fenv_synonyms.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__sparcv9) || defined(__amd64) + +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE + +#else + +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_BIND ELF32_ST_BIND +#define ELF_ST_TYPE ELF32_ST_TYPE + +#endif /* __sparcv9 */ + +/* semi-permanent data established by __fex_sym_init */ +static prmap_t *pm = NULL; /* prmap_t array */ +static int npm = 0; /* number of entries in pm */ + +/* transient data modified by __fex_sym */ +static prmap_t *lpm = NULL; /* prmap_t found in last call */ +static Elf_Phdr *ph = NULL; /* program header array */ +static int phsize = 0; /* size of ph */ +static int nph; /* number of entries in ph */ +static char *stbuf = NULL; /* symbol and string table buffer */ +static int stbufsize = 0; /* size of stbuf */ +static int stoffset; /* offset of string table in stbuf */ +static int nsyms; /* number of symbols in stbuf */ + +/* get a current prmap_t list (must call this before each stack trace) */ +void +__fex_sym_init() +{ + struct stat statbuf; + long n; + int i; + + /* clear out the previous prmap_t list */ + if (pm != NULL) + free(pm); + pm = lpm = NULL; + npm = 0; + + /* get the current prmap_t list */ + if (stat("/proc/self/map", &statbuf) < 0 || statbuf.st_size <= 0 || + (pm = (prmap_t*)malloc(statbuf.st_size)) == NULL) + return; + if ((i = open("/proc/self/map", O_RDONLY)) < 0) + { + free(pm); + pm = NULL; + return; + } + n = read(i, pm, statbuf.st_size); + close(i); + if (n != statbuf.st_size) + { + free(pm); + pm = NULL; + } + else + npm = (int) (n / sizeof(prmap_t)); +} + +/* read ELF program headers and symbols; return -1 on error, 0 otherwise */ +static int +__fex_read_syms(int fd) +{ + Elf_Ehdr h; + Elf_Shdr *sh; + int i, size; + + /* read the ELF header */ + if (read(fd, &h, sizeof(h)) != sizeof(h)) + return -1; + if (h.e_ident[EI_MAG0] != ELFMAG0 || + h.e_ident[EI_MAG1] != ELFMAG1 || + h.e_ident[EI_MAG2] != ELFMAG2 || + h.e_ident[EI_MAG3] != ELFMAG3 || + h.e_phentsize != sizeof(Elf_Phdr) || + h.e_shentsize != sizeof(Elf_Shdr)) + return -1; + + /* get space for the program headers */ + size = h.e_phnum * h.e_phentsize; + if (size > phsize) + { + if (ph) + free(ph); + phsize = nph = 0; + if ((ph = (Elf_Phdr*)malloc(size)) == NULL) + return -1; + phsize = size; + } + + /* read the program headers */ + if (lseek(fd, h.e_phoff, SEEK_SET) != h.e_phoff || + read(fd, ph, size) != (ssize_t)size) + { + nph = 0; + return -1; + } + nph = h.e_phnum; + + /* read the section headers */ + size = h.e_shnum * h.e_shentsize; + if ((sh = (Elf_Shdr*)malloc(size)) == NULL) + return -1; + if (lseek(fd, h.e_shoff, SEEK_SET) != h.e_shoff || + read(fd, sh, size) != (ssize_t)size) + { + free(sh); + return -1; + } + + /* find the symtab section header */ + for (i = 0; i < h.e_shnum; i++) + { + if (sh[i].sh_type == SHT_SYMTAB) + break; /* assume there is only one */ + } + if (i == h.e_shnum || sh[i].sh_size == 0 || + sh[i].sh_entsize != sizeof(Elf_Sym) || + sh[i].sh_link < 1 || sh[i].sh_link >= h.e_shnum || + sh[sh[i].sh_link].sh_type != SHT_STRTAB || + sh[sh[i].sh_link].sh_size == 0) + { + free(sh); + return -1; + } + + /* get space for the symbol and string tables */ + size = (int) (sh[i].sh_size + sh[sh[i].sh_link].sh_size); + if (size > stbufsize) + { + if (stbuf) + free(stbuf); + stbufsize = nsyms = 0; + if ((stbuf = (char*)malloc(size)) == NULL) + { + free(sh); + return -1; + } + stbufsize = size; + } + + /* read the symbol and string tables */ + if (lseek(fd, sh[i].sh_offset, SEEK_SET) != sh[i].sh_offset || + read(fd, stbuf, sh[i].sh_size) != sh[i].sh_size || + lseek(fd, sh[sh[i].sh_link].sh_offset, SEEK_SET) != + sh[sh[i].sh_link].sh_offset || + read(fd, stbuf + sh[i].sh_size, sh[sh[i].sh_link].sh_size) != + sh[sh[i].sh_link].sh_size) + { + free(sh); + return -1; + } + nsyms = (int) (sh[i].sh_size / sh[i].sh_entsize); + stoffset = (int) sh[i].sh_size; + + free(sh); + return 0; +} + +/* find the symbol corresponding to the given text address; + return NULL on error, symbol address otherwise */ +char * +__fex_sym(char *a, char **name) +{ + Elf_Sym *s; + unsigned long fo, va, value; + int fd, i, j, nm; + char fname[PRMAPSZ+20]; + + /* see if the last prmap_t found contains the indicated address */ + if (lpm) + { + if (a >= (char*)lpm->pr_vaddr && a < (char*)lpm->pr_vaddr + + lpm->pr_size) + goto cont; + } + + /* look for a prmap_t that contains the indicated address */ + for (i = 0; i < npm; i++) + { + if (a >= (char*)pm[i].pr_vaddr && a < (char*)pm[i].pr_vaddr + + pm[i].pr_size) + break; + } + if (i == npm) + return NULL; + + /* get an open file descriptor for the mapped object */ + if (pm[i].pr_mapname[0] == '\0') + return NULL; + strcpy(fname, "/proc/self/object/"); + strncat(fname, pm[i].pr_mapname, PRMAPSZ); + fd = open(fname, O_RDONLY); + if (fd < 0) + return NULL; + + /* read the program headers and symbols */ + lpm = NULL; + j = __fex_read_syms(fd); + close(fd); + if (j < 0) + return NULL; + lpm = &pm[i]; + +cont: + /* compute the file offset corresponding to the mapped address */ + fo = (a - (char*)lpm->pr_vaddr) + lpm->pr_offset; + + /* find the program header containing the file offset */ + for (i = 0; i < nph; i++) + { + if (ph[i].p_type == PT_LOAD && fo >= ph[i].p_offset && + fo < ph[i].p_offset + ph[i].p_filesz) + break; + } + if (i == nph) + return NULL; + + /* compute the virtual address corresponding to the file offset */ + va = (fo - ph[i].p_offset) + ph[i].p_vaddr; + + /* find the symbol in this segment with the highest value + less than or equal to the virtual address */ + s = (Elf_Sym*)stbuf; + value = nm = 0; + for (j = 0; j < nsyms; j++) + { + if (s[j].st_name == 0 || s[j].st_shndx == SHN_UNDEF || + (ELF_ST_BIND(s[j].st_info) != STB_LOCAL && + ELF_ST_BIND(s[j].st_info) != STB_GLOBAL && + ELF_ST_BIND(s[j].st_info) != STB_WEAK) || + (ELF_ST_TYPE(s[j].st_info) != STT_NOTYPE && + ELF_ST_TYPE(s[j].st_info) != STT_OBJECT && + ELF_ST_TYPE(s[j].st_info) != STT_FUNC)) + { + continue; + } + + if (s[j].st_value < ph[i].p_vaddr || s[j].st_value >= ph[i].p_vaddr + + ph[i].p_memsz) + { + continue; + } + + if (s[j].st_value < value || s[j].st_value > va) + continue; + + value = s[j].st_value; + nm = s[j].st_name; + } + if (nm == 0) + return NULL; + + /* pass back the name and return the mapped address of the symbol */ + *name = stbuf + stoffset + nm; + fo = (value - ph[i].p_vaddr) + ph[i].p_offset; + return (char*)lpm->pr_vaddr + (fo - lpm->pr_offset); +} diff --git a/usr/src/libm/src/m9x/fdim.c b/usr/src/libm/src/m9x/fdim.c new file mode 100644 index 0000000..5f888e1 --- /dev/null +++ b/usr/src/libm/src/m9x/fdim.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fdim.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fdim = __fdim +#endif + +/* + * fdim(x,y) returns x - y if x > y, +0 if x <= y, and NaN if x and + * y are unordered. + * + * fdim(x,y) raises overflow or inexact if x > y and x - y overflows + * or is inexact. It raises invalid if either operand is a signaling + * NaN. Otherwise, it raises no exceptions. + */ + +#include "libm.h" /* for islessequal macro */ + +double +__fdim(double x, double y) { +#if defined(COMPARISON_MACRO_BUG) + if (x == x && y == y && x <= y) { /* } */ +#else + if (islessequal(x, y)) { +#endif + x = 0.0; + y = -x; + } + return (x - y); +} diff --git a/usr/src/libm/src/m9x/fdimf.c b/usr/src/libm/src/m9x/fdimf.c new file mode 100644 index 0000000..84f56e5 --- /dev/null +++ b/usr/src/libm/src/m9x/fdimf.c @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fdimf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fdimf = __fdimf +#endif + +#include "libm.h" /* for islessequal macro */ + +float +__fdimf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f0,%f1 + * st %g0,[scratch] ! use fzero instead of st/ld + * ld [scratch],%f2 ! if VIS is available + * fnegs %f2,%f3 + * fmovsle %fcc0,%f2,%f0 + * fmovsle %fcc0,%f3,%f1 + * fsubs %f0,%f1,%f0 + */ +#if defined(COMPARISON_MACRO_BUG) + if (x == x && y == y && x <= y) { /* } */ +#else + if (islessequal(x, y)) { +#endif + x = 0.0f; + y = -x; + } + return (x - y); +} diff --git a/usr/src/libm/src/m9x/fdiml.c b/usr/src/libm/src/m9x/fdiml.c new file mode 100644 index 0000000..3fffdc4 --- /dev/null +++ b/usr/src/libm/src/m9x/fdiml.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fdiml.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fdiml = __fdiml +#endif + +#include "libm.h" /* for islessequal macro */ + +long double +__fdiml(long double x, long double y) { +#if defined(COMPARISON_MACRO_BUG) + if (x == x && y == y && x <= y) { +#else + if (islessequal(x, y)) { +#endif + x = 0.0l; + y = -x; + } + return (x - y); +} diff --git a/usr/src/libm/src/m9x/feexcept.c b/usr/src/libm/src/m9x/feexcept.c new file mode 100644 index 0000000..c4979f1 --- /dev/null +++ b/usr/src/libm/src/m9x/feexcept.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)feexcept.c 1.8 06/01/31 SMI" + +#pragma weak feclearexcept = __feclearexcept +#pragma weak feraiseexcept = __feraiseexcept +#pragma weak fetestexcept = __fetestexcept +#pragma weak fegetexceptflag = __fegetexceptflag +#pragma weak fesetexceptflag = __fesetexceptflag + +#pragma weak feclearexcept96 = __feclearexcept +#pragma weak feraiseexcept96 = __feraiseexcept +#pragma weak fetestexcept96 = __fetestexcept +#pragma weak fegetexceptflag96 = __fegetexceptflag +#pragma weak fesetexceptflag96 = __fesetexceptflag + +#include "fenv_synonyms.h" +#include +#include +#include +#include +#include "fex_handler.h" + +int feclearexcept(int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, __fenv_get_ex(fsr) & ~e); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); + return 0; +} + +/* +* note - __fex_hdlr depends on fetestexcept following feraiseexcept +*/ +int feraiseexcept(int e) +{ + volatile double t; + unsigned long fsr; + + if (e & FE_INVALID) { + t = 0.0; + t /= 0.0; + } + if (e & FE_DIVBYZERO) { + t = 1.0e300; + t /= 0.0; + } + if (e & FE_OVERFLOW) { + /* if overflow is not trapped, avoid raising inexact */ + __fenv_getfsr(&fsr); + if (!(__fenv_get_te(fsr) & (1 << fp_trap_overflow))) { + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_OVERFLOW); + __fenv_setfsr(&fsr); + } + else { + t = 1.0e300; + t *= 1.0e300; + } + } + if (e & FE_UNDERFLOW) { + /* if underflow is not trapped, avoid raising inexact */ + __fenv_getfsr(&fsr); + if (!(__fenv_get_te(fsr) & (1 << fp_trap_underflow))) { + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_UNDERFLOW); + __fenv_setfsr(&fsr); + } + else { + t = 1.0e-307; + t -= 1.001e-307; + } + } + if (e & FE_INEXACT) { + t = 1.0e300; + t += 1.0e-307; + } + return 0; +} + +int fetestexcept(int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return (int)__fenv_get_ex(fsr) & e; +} + +int fegetexceptflag(fexcept_t *p, int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + *p = (int)__fenv_get_ex(fsr) & e; + return 0; +} + +int fesetexceptflag(const fexcept_t *p, int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, (((int)__fenv_get_ex(fsr) & ~e) | (*p & e)) & + FE_ALL_EXCEPT); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); + return 0; +} diff --git a/usr/src/libm/src/m9x/fenv.c b/usr/src/libm/src/m9x/fenv.c new file mode 100644 index 0000000..0054871 --- /dev/null +++ b/usr/src/libm/src/m9x/fenv.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fenv.c 1.9 06/01/31 SMI" + +#pragma weak fex_merge_flags = __fex_merge_flags + +#pragma weak feholdexcept = __feholdexcept +#pragma weak feupdateenv = __feupdateenv +#pragma weak fegetenv = __fegetenv +#pragma weak fesetenv = __fesetenv + +#pragma weak feholdexcept96 = __feholdexcept96 +#pragma weak feupdateenv96 = __feupdateenv +#pragma weak fegetenv96 = __fegetenv +#pragma weak fesetenv96 = __fesetenv + +#include "fenv_synonyms.h" +#include +#include +#include +#include "fex_handler.h" + +const fenv_t __fenv_dfl_env = { + { + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + }, +#ifdef __i386 + 0x13000000 +#else + 0 +#endif +}; + +int feholdexcept(fenv_t *p) +{ + (void) fegetenv(p); + (void) feclearexcept(FE_ALL_EXCEPT); + return !fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL); +} + +int feholdexcept96(fenv_t *p) +{ + (void) fegetenv(p); + (void) feclearexcept(FE_ALL_EXCEPT); + return fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL); +} + +int feupdateenv(const fenv_t *p) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + (void) fesetenv(p); + (void) feraiseexcept((int)__fenv_get_ex(fsr)); + return 0; +} + +int fegetenv(fenv_t *p) +{ + fex_getexcepthandler(&p->__handlers, FEX_ALL); + __fenv_getfsr(&p->__fsr); + return 0; +} + +int fesetenv(const fenv_t *p) +{ + __fenv_setfsr(&p->__fsr); + fex_setexcepthandler(&p->__handlers, FEX_ALL); + return 0; +} + +void fex_merge_flags(const fenv_t *p) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | __fenv_get_ex(p->__fsr)); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); +} diff --git a/usr/src/libm/src/m9x/fenv_synonyms.h b/usr/src/libm/src/m9x/fenv_synonyms.h new file mode 100644 index 0000000..14b32fe --- /dev/null +++ b/usr/src/libm/src/m9x/fenv_synonyms.h @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fenv_synonyms.h 1.7 06/01/31 SMI" + +/* feexcept.c */ +#define feclearexcept __feclearexcept +#define feraiseexcept __feraiseexcept +#define fetestexcept __fetestexcept +#define fegetexceptflag __fegetexceptflag +#define fesetexceptflag __fesetexceptflag + +/* fenv.c */ +#define feholdexcept __feholdexcept +#define feholdexcept96 __feholdexcept96 +#define feupdateenv __feupdateenv +#define fegetenv __fegetenv +#define fesetenv __fesetenv +#define fex_merge_flags __fex_merge_flags + +#ifdef __i386 +/* feprec.c */ +#define fegetprec __fegetprec +#define fesetprec __fesetprec +#endif + +/* feround.c */ +#define fegetround __fegetround +#define fesetround __fesetround +#define fesetround96 __fesetround96 + +/* fex_handler.c */ +#define fex_get_handling __fex_get_handling +#define fex_set_handling __fex_set_handling +#define fex_getexcepthandler __fex_getexcepthandler +#define fex_setexcepthandler __fex_setexcepthandler + +/* fex_log.c */ +#define fex_get_log __fex_get_log +#define fex_set_log __fex_set_log +#define fex_get_log_depth __fex_get_log_depth +#define fex_set_log_depth __fex_set_log_depth +#define fex_log_entry __fex_log_entry + +/* libc, libthread */ +#define close _close +#define getcontext _getcontext +#define getpid _getpid +#define kill _kill +#define lseek _lseek +#define mutex_lock _mutex_lock +#define mutex_unlock _mutex_unlock +#define open _open +#define read _read +#define sigaction _sigaction +#define sigemptyset _sigemptyset +#define sigismember _sigismember +#define sigprocmask _sigprocmask +#define stat _stat +#define thr_getspecific _thr_getspecific +#define thr_keycreate _thr_keycreate +#define thr_main _thr_main +#define thr_setspecific _thr_setspecific +#define write _write + +/* ??? see V9 /usr/include/stdio.h */ +#ifdef __sparcv9 +#define fileno _fileno +#endif + +#ifdef __sparc +/* libm, libsunmath */ +#define fp_class __fp_class +#define fp_classf __fp_classf +#define sqrt __sqrt +#define sqrtf __sqrtf +#endif diff --git a/usr/src/libm/src/m9x/feprec.c b/usr/src/libm/src/m9x/feprec.c new file mode 100644 index 0000000..56a64e2 --- /dev/null +++ b/usr/src/libm/src/m9x/feprec.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)feprec.c 1.5 06/01/31 SMI" + +#pragma weak fegetprec = __fegetprec +#pragma weak fesetprec = __fesetprec + +#include "fenv_synonyms.h" +#include +#include +#include +#include "fex_handler.h" + +int fegetprec(void) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return __fenv_get_rp(fsr); +} + +int fesetprec(int r) +{ + unsigned long fsr; + + if (r != FE_FLTPREC && r != FE_DBLPREC && r != FE_LDBLPREC) + return 0; + __fenv_getfsr(&fsr); + __fenv_set_rp(fsr, r); + __fenv_setfsr(&fsr); + return 1; +} diff --git a/usr/src/libm/src/m9x/feround.c b/usr/src/libm/src/m9x/feround.c new file mode 100644 index 0000000..2f0bc99 --- /dev/null +++ b/usr/src/libm/src/m9x/feround.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)feround.c 1.9 06/01/31 SMI" + +#pragma weak fegetround = __fegetround +#pragma weak fesetround = __fesetround + +#pragma weak fegetround96 = __fegetround +#pragma weak fesetround96 = __fesetround96 + +#include "fenv_synonyms.h" +#include +#include +#include +#include "fex_handler.h" + +#if defined(__i386) && !defined(__amd64) +#include +#endif + +int fegetround(void) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return (int)__fenv_get_rd(fsr); +} + +int fesetround(int r) +{ + unsigned long fsr; + + if (r & ~3) + return -1; + __fenv_getfsr(&fsr); + __fenv_set_rd(fsr, r); + __fenv_setfsr(&fsr); +#if defined(__i386) && !defined(__amd64) + FLT_ROUNDS = (0x2D >> (r << 1)) & 3; /* 0->1, 1->3, 2->2, 3->0 */ +#endif + return 0; +} + +int fesetround96(int r) +{ + unsigned long fsr; + + if (r & ~3) + return 0; + __fenv_getfsr(&fsr); + __fenv_set_rd(fsr, r); + __fenv_setfsr(&fsr); +#if defined(__i386) && !defined(__amd64) + FLT_ROUNDS = (0x2D >> (r << 1)) & 3; /* 0->1, 1->3, 2->2, 3->0 */ +#endif + return 1; +} diff --git a/usr/src/libm/src/m9x/fex_handler.c b/usr/src/libm/src/m9x/fex_handler.c new file mode 100644 index 0000000..3491e1c --- /dev/null +++ b/usr/src/libm/src/m9x/fex_handler.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fex_handler.c 1.5 06/01/31 SMI" + +#pragma weak fex_get_handling = __fex_get_handling +#pragma weak fex_set_handling = __fex_set_handling +#pragma weak fex_getexcepthandler = __fex_getexcepthandler +#pragma weak fex_setexcepthandler = __fex_setexcepthandler + +#include "fenv_synonyms.h" +#include +#include +#include +#include "fex_handler.h" + +int fex_get_handling(int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + return thr_handlers[i].__mode; + return FEX_NOHANDLER; +} + +int fex_set_handling(int e, int mode, void (*handler)()) +{ + struct fex_handler_data *thr_handlers; + int i; + + if (e & ~((1 << FEX_NUM_EXC) - 1)) + return 0; + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) { + if (e & (1 << i)) { + thr_handlers[i].__mode = mode; + thr_handlers[i].__handler = handler; + } + } + __fex_update_te(); + return 1; +} + +void fex_getexcepthandler(fex_handler_t *buf, int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + (*buf)[i] = thr_handlers[i]; +} + +void fex_setexcepthandler(const fex_handler_t *buf, int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + thr_handlers[i] = (*buf)[i]; + __fex_update_te(); +} diff --git a/usr/src/libm/src/m9x/fex_handler.h b/usr/src/libm/src/m9x/fex_handler.h new file mode 100644 index 0000000..9f8c259 --- /dev/null +++ b/usr/src/libm/src/m9x/fex_handler.h @@ -0,0 +1,215 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fex_handler.h 1.8 06/01/31 SMI" + +/* the following enums must match the bit positions in fenv.h */ +enum fex_exception { + fex_inexact = 0, + fex_division = 1, + fex_underflow = 2, + fex_overflow = 3, + fex_inv_zdz = 4, + fex_inv_idi = 5, + fex_inv_isi = 6, + fex_inv_zmi = 7, + fex_inv_sqrt = 8, + fex_inv_snan = 9, + fex_inv_int = 10, + fex_inv_cmp = 11 +}; + + +/* auxiliary functions in __fex_hdlr.c */ +extern struct fex_handler_data *__fex_get_thr_handlers(void); +extern void __fex_update_te(void); + +/* auxiliary functions in __fex_sym.c */ +extern void __fex_sym_init(void); +extern char *__fex_sym(char *, char **); + +/* auxiliary functions in fex_log.c */ +extern void __fex_mklog(ucontext_t *, char *, int, enum fex_exception, + int, void *); + +/* system-dependent auxiliary functions */ +extern enum fex_exception __fex_get_invalid_type(siginfo_t *, ucontext_t *); +extern void __fex_get_op(siginfo_t *, ucontext_t *, fex_info_t *); +extern void __fex_st_result(siginfo_t *, ucontext_t *, fex_info_t *); + +/* inline templates and macros for accessing fp state */ +#ifdef __sparcv9 +#define __fenv_getfsr __fenv_getfsrx +#define __fenv_setfsr __fenv_setfsrx +#endif +extern void __fenv_getfsr(unsigned long *); +extern void __fenv_setfsr(const unsigned long *); + +#if defined(__sparc) + +#define __fenv_get_rd(X) ((X>>30)&0x3) +#define __fenv_set_rd(X,Y) X=(X&~0xc0000000ul)|((Y)<<30) + +#define __fenv_get_te(X) ((X>>23)&0x1f) +#define __fenv_set_te(X,Y) X=(X&~0x0f800000ul)|((Y)<<23) + +#define __fenv_get_ex(X) ((X>>5)&0x1f) +#define __fenv_set_ex(X,Y) X=(X&~0x000003e0ul)|((Y)<<5) + +#elif defined(__i386) + +extern void __fenv_getcwsw(unsigned int *); +extern void __fenv_setcwsw(const unsigned int *); + +extern void __fenv_getmxcsr(unsigned int *); +extern void __fenv_setmxcsr(const unsigned int *); + +#define __fenv_get_rd(X) ((X>>26)&3) +#define __fenv_set_rd(X,Y) X=(X&~0x0c000000)|((Y)<<26) + +#define __fenv_get_rp(X) ((X>>24)&3) +#define __fenv_set_rp(X,Y) X=(X&~0x03000000)|((Y)<<24) + +#define __fenv_get_te(X) ((X>>16)&0x3d) +#define __fenv_set_te(X,Y) X=(X&~0x003d0000)|((Y)<<16) + +#define __fenv_get_ex(X) (X&0x3d) +#define __fenv_set_ex(X,Y) X=(X&~0x0000003d)|(Y) + +/* + * These macros define some useful distinctions between various + * SSE instructions. In some cases, distinctions are made for + * the purpose of simplifying the decoding of instructions, while + * in other cases, they are made for the purpose of simplying the + * emulation. Note that these values serve as bit flags within + * the enum values in sseinst_t. + */ +#define DOUBLE 0x100 +#define SIMD 0x080 +#define INTREG 0x040 + +typedef union { + double d[2]; + long long l[2]; + float f[4]; + int i[4]; +} sseoperand_t; + +/* structure to hold a decoded SSE instruction */ +typedef struct { + enum { + /* single precision scalar instructions */ + cmpss = 0, + minss = 1, + maxss = 2, + addss = 3, + subss = 4, + mulss = 5, + divss = 6, + sqrtss = 7, + ucomiss = 16, + comiss = 17, + cvtss2sd = 32, + cvtsi2ss = INTREG + 0, + cvttss2si = INTREG + 1, + cvtss2si = INTREG + 2, + cvtsi2ssq = INTREG + 8, + cvttss2siq = INTREG + 9, + cvtss2siq = INTREG + 10, + + /* single precision SIMD instructions */ + cmpps = SIMD + 0, + minps = SIMD + 1, + maxps = SIMD + 2, + addps = SIMD + 3, + subps = SIMD + 4, + mulps = SIMD + 5, + divps = SIMD + 6, + sqrtps = SIMD + 7, + cvtps2pd = SIMD + 32, + cvtdq2ps = SIMD + 34, + cvttps2dq = SIMD + 35, + cvtps2dq = SIMD + 36, + cvtpi2ps = SIMD + INTREG + 0, + cvttps2pi = SIMD + INTREG + 1, + cvtps2pi = SIMD + INTREG + 2, + + /* double precision scalar instructions */ + cmpsd = DOUBLE + 0, + minsd = DOUBLE + 1, + maxsd = DOUBLE + 2, + addsd = DOUBLE + 3, + subsd = DOUBLE + 4, + mulsd = DOUBLE + 5, + divsd = DOUBLE + 6, + sqrtsd = DOUBLE + 7, + ucomisd = DOUBLE + 16, + comisd = DOUBLE + 17, + cvtsd2ss = DOUBLE + 32, + cvtsi2sd = DOUBLE + INTREG + 0, + cvttsd2si = DOUBLE + INTREG + 1, + cvtsd2si = DOUBLE + INTREG + 2, + cvtsi2sdq = DOUBLE + INTREG + 8, + cvttsd2siq = DOUBLE + INTREG + 9, + cvtsd2siq = DOUBLE + INTREG + 10, + + /* double precision SIMD instructions */ + cmppd = DOUBLE + SIMD + 0, + minpd = DOUBLE + SIMD + 1, + maxpd = DOUBLE + SIMD + 2, + addpd = DOUBLE + SIMD + 3, + subpd = DOUBLE + SIMD + 4, + mulpd = DOUBLE + SIMD + 5, + divpd = DOUBLE + SIMD + 6, + sqrtpd = DOUBLE + SIMD + 7, + cvtpd2ps = DOUBLE + SIMD + 32, + cvtdq2pd = DOUBLE + SIMD + 34, + cvttpd2dq = DOUBLE + SIMD + 35, + cvtpd2dq = DOUBLE + SIMD + 36, + cvtpi2pd = DOUBLE + SIMD + INTREG + 0, + cvttpd2pi = DOUBLE + SIMD + INTREG + 1, + cvtpd2pi = DOUBLE + SIMD + INTREG + 2, + } op; + int imm; + sseoperand_t *op1, *op2; +} sseinst_t; + +/* x86-specific auxiliary functions */ +extern int *__fex_accrued(void); +extern void __fex_get_x86_exc(siginfo_t *, ucontext_t *); +extern int __fex_parse_sse(ucontext_t *, sseinst_t *); +extern enum fex_exception __fex_get_sse_op(ucontext_t *, sseinst_t *, + fex_info_t *); +extern void __fex_get_simd_op(ucontext_t *, sseinst_t *, + enum fex_exception *, fex_info_t *); +extern void __fex_st_sse_result(ucontext_t *, sseinst_t *, + enum fex_exception, fex_info_t *); +extern void __fex_st_simd_result(ucontext_t *, sseinst_t *, + enum fex_exception *, fex_info_t *); + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fex_log.c b/usr/src/libm/src/m9x/fex_log.c new file mode 100644 index 0000000..62a0939 --- /dev/null +++ b/usr/src/libm/src/m9x/fex_log.c @@ -0,0 +1,398 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fex_log.c 1.13 06/01/31 SMI" + +#pragma weak fex_get_log = __fex_get_log +#pragma weak fex_set_log = __fex_set_log +#pragma weak fex_get_log_depth = __fex_get_log_depth +#pragma weak fex_set_log_depth = __fex_set_log_depth +#pragma weak fex_log_entry = __fex_log_entry + +#include "fenv_synonyms.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fex_handler.h" + +#if !defined(PC) +#if defined(REG_PC) +#define PC REG_PC +#else +#error Neither PC nor REG_PC is defined! +#endif +#endif + +static FILE *log_fp = NULL; +static mutex_t log_lock = DEFAULTMUTEX; +static int log_depth = 100; + +FILE *fex_get_log(void) +{ + FILE *fp; + + mutex_lock(&log_lock); + fp = log_fp; + mutex_unlock(&log_lock); + return fp; +} + +int fex_set_log(FILE *fp) +{ + mutex_lock(&log_lock); + log_fp = fp; + mutex_unlock(&log_lock); + __fex_update_te(); + return 1; +} + +int fex_get_log_depth(void) +{ + int d; + + mutex_lock(&log_lock); + d = log_depth; + mutex_unlock(&log_lock); + return d; +} + +int fex_set_log_depth(int d) +{ + if (d < 0) + return 0; + mutex_lock(&log_lock); + log_depth = d; + mutex_unlock(&log_lock); + return 1; +} + +static struct exc_list { + struct exc_list *next; + char *addr; + unsigned long code; + int nstack; + char *stack[1]; /* actual length is max(1,nstack) */ +} *list = NULL; + +#ifdef __sparcv9 +#define FRAMEP(X) (struct frame *)((char*)(X)+(((long)(X)&1)?2047:0)) +#else +#define FRAMEP(X) (struct frame *)(X) +#endif + +#ifdef _LP64 +#define PDIG "16" +#else +#define PDIG "8" +#endif + +/* look for a matching exc_list; return 1 if one is found, + otherwise add this one to the list and return 0 */ +static int check_exc_list(char *addr, unsigned long code, char *stk, + struct frame *fp) +{ + struct exc_list *l, *ll; + struct frame *f; + int i, n; + + if (list) { + for (l = list; l; ll = l, l = l->next) { + if (l->addr != addr || l->code != code) + continue; + if (log_depth < 1 || l->nstack < 1) + return 1; + if (l->stack[0] != stk) + continue; + n = 1; + for (i = 1, f = fp; i < log_depth && i < l->nstack && + f && f->fr_savpc; i++, f = FRAMEP(f->fr_savfp)) + if (l->stack[i] != (char *)f->fr_savpc) { + n = 0; + break; + } + if (n) + return 1; + } + } + + /* create a new exc_list structure and tack it on the list */ + for (n = 1, f = fp; n < log_depth && f && f->fr_savpc; + n++, f = FRAMEP(f->fr_savfp)) ; + if ((l = (struct exc_list *)malloc(sizeof(struct exc_list) + + (n - 1) * sizeof(char *))) != NULL) { + l->next = NULL; + l->addr = addr; + l->code = code; + l->nstack = ((log_depth < 1)? 0 : n); + l->stack[0] = stk; + for (i = 1; i < n; i++) { + l->stack[i] = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + } + if (list) + ll->next = l; + else + list = l; + } + return 0; +} + +/* +* Warning: cleverness ahead +* +* In the following code, the use of sprintf+write rather than fprintf +* to send output to the log file is intentional. The reason is that +* fprintf is not async-signal-safe. "But," you protest, "SIGFPE is +* not an asynchronous signal! It's always handled by the same thread +* that executed the fpop that provoked it." That's true, but a prob- +* lem arises because (i) base conversion in fprintf can cause a fp +* exception and (ii) my signal handler acquires a mutex lock before +* sending output to the log file (so that outputs for entries from +* different threads aren't interspersed). Therefore, if the code +* were to use fprintf, a deadlock could occur as follows: +* +* Thread A Thread B +* +* Incurs a fp exception, Calls fprintf, +* acquires log_lock acquires file rmutex lock +* +* Calls fprintf, Incurs a fp exception, +* waits for file rmutex lock waits for log_lock +* +* (I could just verify that fprintf doesn't hold the rmutex lock while +* it's doing the base conversion, but since efficiency is of little +* concern here, I opted for the safe and dumb route.) +*/ + +static void print_stack(int fd, char *addr, struct frame *fp) +{ + int i; + char *name, buf[30]; + + for (i = 0; i < log_depth && addr != NULL; i++) { + if (__fex_sym(addr, &name) != NULL) { + write(fd, buf, sprintf(buf, " 0x%0" PDIG "lx ", + (long)addr)); + write(fd, name, strlen(name)); + write(fd, "\n", 1); + if (!strcmp(name, "main")) + break; + } else { + write(fd, buf, sprintf(buf, " 0x%0" PDIG "lx\n", + (long)addr)); + } + if (fp == NULL) + break; + addr = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + } +} + +void fex_log_entry(const char *msg) +{ + ucontext_t uc; + struct frame *fp; + char *stk; + int fd; + + /* if logging is disabled, just return */ + mutex_lock(&log_lock); + if (log_fp == NULL) { + mutex_unlock(&log_lock); + return; + } + + /* get the frame pointer from the current context and + pop our own frame */ + getcontext(&uc); +#if defined(__sparc) || defined(__amd64) + fp = FRAMEP(uc.uc_mcontext.gregs[REG_SP]); +#elif defined(__i386) /* !defined(__amd64) */ + fp = FRAMEP(uc.uc_mcontext.gregs[EBP]); +#else +#error Unknown architecture +#endif + if (fp == NULL) { + mutex_unlock(&log_lock); + return; + } + stk = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + + /* if we've already logged this message here, don't make an entry */ + if (check_exc_list(stk, (unsigned long)msg, stk, fp)) { + mutex_unlock(&log_lock); + return; + } + + /* make an entry */ + fd = fileno(log_fp); + write(fd, "fex_log_entry: ", 15); + write(fd, msg, strlen(msg)); + write(fd, "\n", 1); + __fex_sym_init(); + print_stack(fd, stk, fp); + mutex_unlock(&log_lock); +} + +static const char *exception[FEX_NUM_EXC] = { + "inexact result", + "division by zero", + "underflow", + "overflow", + "invalid operation (0/0)", + "invalid operation (inf/inf)", + "invalid operation (inf-inf)", + "invalid operation (0*inf)", + "invalid operation (sqrt)", + "invalid operation (snan)", + "invalid operation (int)", + "invalid operation (cmp)" +}; + +void +__fex_mklog(ucontext_t *uap, char *addr, int f, enum fex_exception e, + int m, void *p) +{ + struct frame *fp; + char *stk, *name, buf[30]; + int fd; + + /* if logging is disabled, just return */ + mutex_lock(&log_lock); + if (log_fp == NULL) { + mutex_unlock(&log_lock); + return; + } + + /* get stack info */ +#if defined(__sparc) + stk = (char*)uap->uc_mcontext.gregs[REG_PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[REG_SP]); +#elif defined(__amd64) + stk = (char*)uap->uc_mcontext.gregs[REG_PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[REG_RBP]); +#elif defined(__i386) /* !defined(__amd64) */ + stk = (char*)uap->uc_mcontext.gregs[PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[EBP]); +#else +#error Unknown architecture +#endif + + /* if the handling mode is the default and this exception's + flag is already raised, don't make an entry */ + if (m == FEX_NONSTOP) { + switch (e) { + case fex_inexact: + if (f & FE_INEXACT) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_underflow: + if (f & FE_UNDERFLOW) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_overflow: + if (f & FE_OVERFLOW) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_division: + if (f & FE_DIVBYZERO) { + mutex_unlock(&log_lock); + return; + } + break; + default: + if (f & FE_INVALID) { + mutex_unlock(&log_lock); + return; + } + break; + } + } + + /* if we've already logged this exception at this address, + don't make an entry */ + if (check_exc_list(addr, (unsigned long)e, stk, fp)) { + mutex_unlock(&log_lock); + return; + } + + /* make an entry */ + fd = fileno(log_fp); + write(fd, "Floating point ", 15); + write(fd, exception[e], strlen(exception[e])); + write(fd, buf, sprintf(buf, " at 0x%0" PDIG "lx", (long)addr)); + __fex_sym_init(); + if (__fex_sym(addr, &name) != NULL) { + write(fd, " ", 1); + write(fd, name, strlen(name)); + } + switch (m) { + case FEX_NONSTOP: + write(fd, ", nonstop mode\n", 15); + break; + + case FEX_ABORT: + write(fd, ", abort\n", 8); + break; + + case FEX_NOHANDLER: + if (p == (void *)SIG_DFL) { + write(fd, ", handler: SIG_DFL\n", 19); + break; + } + else if (p == (void *)SIG_IGN) { + write(fd, ", handler: SIG_IGN\n", 19); + break; + } + /* fall through*/ + default: + write(fd, ", handler: ", 11); + if (__fex_sym((char *)p, &name) != NULL) { + write(fd, name, strlen(name)); + write(fd, "\n", 1); + } else { + write(fd, buf, sprintf(buf, "0x%0" PDIG "lx\n", + (long)p)); + } + break; + } + print_stack(fd, stk, fp); + mutex_unlock(&log_lock); +} diff --git a/usr/src/libm/src/m9x/fma.c b/usr/src/libm/src/m9x/fma.c new file mode 100644 index 0000000..ff13ee3 --- /dev/null +++ b/usr/src/libm/src/m9x/fma.c @@ -0,0 +1,608 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fma.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fma = __fma +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0x3fe00000u, 0 }, + { 0x40000000u, 0 }, + { 0x43300000u, 0 }, + { 0x41a00000u, 0 }, + { 0x3e500000u, 0 }, + { 0x3df00000u, 0 }, + { 0x3bf00000u, 0 }, + { 0x7fe00000u, 0 }, + { 0x00100000u, 0 }, + { 0x00100001u, 0 } +}; + +#define half C[0].d +#define two C[1].d +#define two52 C[2].d +#define two27 C[3].d +#define twom26 C[4].d +#define twom32 C[5].d +#define twom64 C[6].d +#define huge C[7].d +#define tiny C[8].d +#define tiny2 C[9].d + +static const unsigned fsr_rm = 0xc0000000u; + +/* + * fma for SPARC: 64-bit double precision, big-endian + */ +double +__fma(double x, double y, double z) { + union { + unsigned i[2]; + double d; + } xx, yy, zz; + double xhi, yhi, xlo, ylo, t; + unsigned xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky; + int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; + volatile double dummy; + + /* extract the high order words of the arguments */ + xx.d = x; + yy.d = y; + zz.d = z; + hx = xx.i[0] & ~0x80000000; + hy = yy.i[0] & ~0x80000000; + hz = zz.i[0] & ~0x80000000; + + /* dispense with inf, nan, and zero cases */ + if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 || + (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */ + return (x * y + z); + + if (hz >= 0x7ff00000) /* z is inf or nan */ + return (x + z); /* avoid spurious under/overflow in x * y */ + + if ((hz | zz.i[1]) == 0) /* z is zero */ + /* + * x * y isn't zero but could underflow to zero, + * so don't add z, lest we perturb the sign + */ + return (x * y); + + /* + * now x, y, and z are all finite and nonzero; save the fsr and + * set round-to-negative-infinity mode (and clear nonstandard + * mode before we try to scale subnormal operands) + */ + __fenv_getfsr(&fsr); + __fenv_setfsr(&fsr_rm); + + /* extract signs and exponents, and normalize subnormals */ + sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; + sz = zz.i[0] & 0x80000000; + ex = hx >> 20; + if (!ex) { + xx.d = x * two52; + ex = ((xx.i[0] & ~0x80000000) >> 20) - 52; + } + ey = hy >> 20; + if (!ey) { + yy.d = y * two52; + ey = ((yy.i[0] & ~0x80000000) >> 20) - 52; + } + ez = hz >> 20; + if (!ez) { + zz.d = z * two52; + ez = ((zz.i[0] & ~0x80000000) >> 20) - 52; + } + + /* multiply x*y to 106 bits */ + exy = ex + ey - 0x3ff; + xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000; + yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000; + x = xx.d; + y = yy.d; + xhi = ((x + twom26) + two27) - two27; + yhi = ((y + twom26) + two27) - two27; + xlo = x - xhi; + ylo = y - yhi; + x *= y; + y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; + if (x >= two) { + x *= half; + y *= half; + exy++; + } + + /* extract the significands */ + xx.d = x; + xy0 = (xx.i[0] & 0xfffff) | 0x100000; + xy1 = xx.i[1]; + yy.d = t = y + twom32; + xy2 = yy.i[1]; + yy.d = (y - (t - twom32)) + twom64; + xy3 = yy.i[1]; + z0 = (zz.i[0] & 0xfffff) | 0x100000; + z1 = zz.i[1]; + z2 = z3 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-3], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && + (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + z2 = xy2; xy2 = 0; + z3 = xy3; xy3 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 116) { + xy0 = xy1 = xy2 = 0; + xy3 = 1; + } else if (e >= 96) { + sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e)); + xy3 = xy0 >> (e - 96); + if (sticky) + xy3 |= 1; + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e)); + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + if (sticky) + xy3 |= 1; + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = xy3 | ((xy2 << 1) << (63 - e)); + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + if (sticky) + xy3 |= 1; + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + sticky = (xy3 << 1) << (31 - e); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + if (sticky) + xy3 |= 1; + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = -xy3; + if (xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z3 += xy3; + e = (z3 < xy3); + z2 += xy2; + if (e) { + z2++; + e = (z2 <= xy2); + } else + e = (z2 < xy2); + z1 += xy1; + if (e) { + z1++; + e = (z1 <= xy1); + } else + e = (z1 < xy1); + z0 += xy0; + if (e) + z0++; + + /* postnormalize and collect rounding information into z2 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 56) { + z2 = 1; /* result can't be exactly zero */ + z0 = z1 = 0; + } else if (e >= 32) { + sticky = z3 | z2 | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + if (sticky) + z2 |= 1; + z1 = z0 >> (e - 32); + z0 = 0; + } else { + sticky = z3 | (z2 << 1) << (31 - e); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + if (sticky) + z2 |= 1; + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 >>= e; + } + ez = 1; + } else if (z0 >= 0x200000) { + /* carry out; shift right by one */ + sticky = (z2 & 1) | z3; + z2 = (z2 >> 1) | (z1 << 31); + if (sticky) + z2 |= 1; + z1 = (z1 >> 1) | (z0 << 31); + z0 >>= 1; + ez++; + } else { + if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = 0; + ez -= 32; + } + while (z0 < 0x100000 && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 <<= 1; + ez--; + } + } + if (z3) + z2 |= 1; + } + + /* get the rounding mode and clear current exceptions */ + rm = fsr >> 30; + fsr &= ~FSR_CEXC; + + /* strip off the integer bit, if there is one */ + ibit = z0 & 0x100000; + if (ibit) + z0 -= 0x100000; + else { + ez = 0; + if (!(z0 | z1 | z2)) { /* exact zero */ + zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; + zz.i[1] = 0; + __fenv_setfsr(&fsr); + return (zz.d); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz) + rm ^= rm >> 1; + + /* round and raise exceptions */ + if (z2) { + fsr |= FSR_NXC; + + /* decide whether to round the fraction up */ + if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u || + (z2 == 0x80000000u && (z1 & 1))))) { + /* round up and renormalize if necessary */ + if (++z1 == 0) { + if (++z0 == 0x100000) { + z0 = 0; + ez++; + } + } + } + } + + /* check for under/overflow */ + if (ez >= 0x7ff) { + if (rm == FSR_RN || rm == FSR_RP) { + zz.i[0] = sz | 0x7ff00000; + zz.i[1] = 0; + } else { + zz.i[0] = sz | 0x7fefffff; + zz.i[1] = 0xffffffff; + } + fsr |= FSR_OFC | FSR_NXC; + } else { + zz.i[0] = sz | (ez << 20) | z0; + zz.i[1] = z1; + + /* + * !ibit => exact result was tiny before rounding, + * z2 nonzero => result delivered is inexact + */ + if (!ibit) { + if (z2) + fsr |= FSR_UFC | FSR_NXC; + else if (fsr & FSR_UFM) + fsr |= FSR_UFC; + } + } + + /* restore the fsr and emulate exceptions as needed */ + if ((fsr & FSR_CEXC) & (fsr >> 23)) { + __fenv_setfsr(&fsr); + if (fsr & FSR_OFC) { + dummy = huge; + dummy *= huge; + } else if (fsr & FSR_UFC) { + dummy = tiny; + if (fsr & FSR_NXC) + dummy *= tiny; + else + dummy -= tiny2; + } else { + dummy = huge; + dummy += tiny; + } + } else { + fsr |= (fsr & 0x1f) << 5; + __fenv_setfsr(&fsr); + } + return (zz.d); +} + +#elif defined(__i386) + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fma for x86: 64-bit double precision, little-endian + */ +double +__fma(double x, double y, double z) { + union { + unsigned i[NI]; + long double e; + } xx, yy, zz; + long double xe, ye, xhi, xlo, yhi, ylo; + int ex, ey, ez; + unsigned cwsw, oldcwsw, rm; + + /* convert the operands to double extended */ + xx.e = (long double) x; + yy.e = (long double) y; + zz.e = (long double) z; + + /* extract the exponents of the arguments */ + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) + /* x or y is inf, nan, or zero */ + return ((double) (xx.e * yy.e + zz.e)); + + if (ez >= 0x7fff) /* z is inf or nan */ + return ((double) (xx.e + zz.e)); + /* avoid spurious inexact in x * y */ + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and to-nearest + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 106 bits */ + xe = xx.e; + xx.i[0] = 0; + xhi = xx.e; /* hi 32 bits */ + xlo = xe - xhi; /* lo 21 bits */ + ye = yy.e; + yy.i[0] = 0; + yhi = yy.e; + ylo = ye - yhi; + xe = xe * ye; + ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo; + + /* distill the sum of xe, ye, and z */ + xhi = ye + zz.e; + yhi = xhi - ye; + xlo = (zz.e - yhi) + (ye - (xhi - yhi)); + /* now (xhi,xlo) = ye + z */ + + yhi = xe + xhi; + ye = yhi - xe; + ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */ + + xhi = xlo + ylo; + xe = xhi - xlo; + xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */ + + yy.e = yhi + xhi; + ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */ + + if (yy.i[1] != 0) { /* yy.e is nonzero */ + /* perturb yy.e if its least significant 10 bits are zero */ + if (!(yy.i[0] & 0x3ff)) { + xx.e = ylo + xlo; + if (xx.i[1] != 0) { + xx.i[2] = (xx.i[2] & 0x8000) | + ((yy.i[2] & 0x7fff) - 63); + xx.i[1] = 0x80000000; + xx.i[0] = 0; + yy.e += xx.e; + } + } + } else { + /* set sign of zero result according to rounding direction */ + rm = oldcwsw & 0x0c000000; + yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0); + } + + /* + * restore the control and status words and convert the result + * to double + */ + __fenv_setcwsw(&oldcwsw); + return ((double) yy.e); +} + +#if 0 +/* + * another fma for x86: assumes return value will be left in + * long double (80-bit double extended) precision + */ +long double +__fma(double x, double y, double z) { + union { + unsigned i[3]; + long double e; + } xx, yy, zz, tt; + long double xe, ye, xhi, xlo, yhi, ylo, zhi, zlo; + int ex, ey, ez; + unsigned cwsw, oldcwsw, s; + + /* convert the operands to double extended */ + xx.e = (long double) x; + yy.e = (long double) y; + zz.e = (long double) z; + + /* extract the exponents of the arguments */ + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) + /* x or y is inf, nan, or zero */ + return (xx.e * yy.e + zz.e); + + if (ez >= 0x7fff) /* z is inf or nan */ + return (xx.e + zz.e); /* avoid spurious inexact in x * y */ + + if (ez == 0) /* z is zero */ + return (xx.e * yy.e); /* x * y isn't zero; no need to add z */ + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and to-nearest + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 106 bits */ + xe = xx.e; + xx.i[0] = 0; + xhi = xx.e; /* hi 32 bits */ + xlo = xe - xhi; /* lo 21 bits */ + ye = yy.e; + yy.i[0] = 0; + yhi = yy.e; + ylo = ye - yhi; + xx.e = xe * ye; + xx.i[0] &= ~0x7ff; /* 53 bits of x*y */ + yy.e = ((xhi * yhi - xx.e) + xhi * ylo + xlo * yhi) + xlo * ylo; + + /* reduce to a sum of two terms */ + if (yy.e != 0.0) { + ex = xx.i[2] & 0x7fff; + if (ez - ex > 10) { + /* collapse y into a single bit and add to x */ + yy.i[0] = 0; + yy.i[1] = 0x80000000; + yy.i[2] = (yy.i[2] & 0x8000) | (ex - 60); + xx.e += yy.e; + } else if (ex - ez <= 10) { + xx.e += zz.e; /* exact */ + zz.e = yy.e; + } else if (ex - ez <= 42) { + /* split z into two pieces */ + tt.i[0] = 0; + tt.i[1] = 0x80000000; + tt.i[2] = ex + 11; + zhi = (zz.e + tt.e) - tt.e; + zlo = zz.e - zhi; + xx.e += zhi; + zz.e = yy.e + zlo; + } else if (ex - ez <= 63) { + zz.e += yy.e; /* exact */ + } else if (ex - ez <= 106) { + /* + * collapse the tail of z into a sticky bit and add z + * to y without error + */ + if (ex - ez <= 81) { + s = 1 << (ex - ez - 50); + if (zz.i[0] & (s - 1)) + zz.i[0] |= s; + zz.i[0] &= ~(s - 1); + } else { + s = 1 << (ex - ez - 82); + if ((zz.i[1] & (s - 1)) | zz.i[0]) + zz.i[1] |= s; + zz.i[1] &= ~(s - 1); + zz.i[0] = 0; + } + zz.e += yy.e; + } else { + /* collapse z into a single bit and add to y */ + zz.i[0] = 0; + zz.i[1] = 0x80000000; + zz.i[2] = (zz.i[2] & 0x8000) | (ex - 113); + zz.e += yy.e; + } + } + + /* restore the control and status words, and sum */ + __fenv_setcwsw(&oldcwsw); + return (xx.e + zz.e); +} +#endif + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fma.h b/usr/src/libm/src/m9x/fma.h new file mode 100644 index 0000000..9e2b718 --- /dev/null +++ b/usr/src/libm/src/m9x/fma.h @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FMA_H +#define _FMA_H + +#pragma ident "@(#)fma.h 1.3 06/01/31 SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __sparc + +/* + * Common definitions for fma routines (SPARC) + */ + +/* fsr fields */ + +/* current exception bits */ +#define FSR_NXC 0x1 +#define FSR_DZC 0x2 +#define FSR_UFC 0x4 +#define FSR_OFC 0x8 +#define FSR_NVC 0x10 +#define FSR_CEXC 0x1f /* mask for all cexc bits */ + +/* accrued exception bits */ +#define FSR_NXA 0x20 +#define FSR_DZA 0x40 +#define FSR_UFA 0x80 +#define FSR_OFA 0x100 +#define FSR_NVA 0x200 + +/* trap enable bits */ +#define FSR_NXM 0x00800000 +#define FSR_DZM 0x01000000 +#define FSR_UFM 0x02000000 +#define FSR_OFM 0x04000000 +#define FSR_NVM 0x08000000 + +/* rounding directions (right-adjusted) */ +#define FSR_RN 0 +#define FSR_RZ 1 +#define FSR_RP 2 +#define FSR_RM 3 + +/* inline templates */ +extern void __fenv_getfsr(unsigned int *); +extern void __fenv_setfsr(const unsigned int *); + +#endif /* __sparc */ + + +#ifdef __i386 + +/* + * Common definitions for fma routines (x86) + */ + +/* control and status word fields */ + +/* exception flags */ +#define FSW_NV 0x1 +#define FSW_DN 0x2 +#define FSW_DZ 0x4 +#define FSW_OF 0x8 +#define FSW_UF 0x10 +#define FSW_NX 0x20 + +/* exception masks */ +#define FCW_NVM 0x00010000 +#define FCW_DNM 0x00020000 +#define FCW_DZM 0x00040000 +#define FCW_OFM 0x00080000 +#define FCW_UFM 0x00100000 +#define FCW_NXM 0x00200000 +#define FCW_ALLM 0x003f0000 + +/* rounding directions */ +#define FCW_RN 0x00000000 +#define FCW_RM 0x04000000 +#define FCW_RP 0x08000000 +#define FCW_RZ 0x0c000000 + +/* rounding precisions */ +#define FCW_P24 0x00000000 +#define FCW_P53 0x02000000 +#define FCW_P64 0x03000000 + +/* inline templates */ +extern void __fenv_getcwsw(unsigned int *); +extern void __fenv_setcwsw(const unsigned int *); + +#endif /* __i386 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _FMA_H */ diff --git a/usr/src/libm/src/m9x/fmaf.c b/usr/src/libm/src/m9x/fmaf.c new file mode 100644 index 0000000..f0799b7 --- /dev/null +++ b/usr/src/libm/src/m9x/fmaf.c @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmaf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmaf = __fmaf +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +/* + * fmaf for SPARC: 32-bit single precision, big-endian + */ +float +__fmaf(float x, float y, float z) { + union { + unsigned i[2]; + double d; + } xy, zz; + unsigned u, s; + int exy, ez; + + /* + * the following operations can only raise the invalid exception, + * and then only if either x*y is of the form Inf*0 or one of x, + * y, or z is a signaling NaN + */ + xy.d = (double) x * y; + zz.d = (double) z; + + /* + * if the sum xy + z will be exact, just compute it and cast the + * result to float + */ + exy = (xy.i[0] >> 20) & 0x7ff; + ez = (zz.i[0] >> 20) & 0x7ff; + if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 || + ez == 0x7ff || ez == 0) { + return ((float) (xy.d + zz.d)); + } + + /* + * collapse the tail of the smaller summand into a "sticky bit" + * so that the sum can be computed without error + */ + if (ez > exy) { + if (ez - exy < 31) { + u = xy.i[1]; + s = 2 << (ez - exy); + if (u & (s - 1)) + u |= s; + xy.i[1] = u & ~(s - 1); + } else if (ez - exy < 51) { + u = xy.i[0]; + s = 1 << (ez - exy - 31); + if ((u & (s - 1)) | xy.i[1]) + u |= s; + xy.i[0] = u & ~(s - 1); + xy.i[1] = 0; + } else { + /* collapse all of xy into a single bit */ + xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20); + xy.i[1] = 0; + } + } else { + if (exy - ez < 31) { + u = zz.i[1]; + s = 2 << (exy - ez); + if (u & (s - 1)) + u |= s; + zz.i[1] = u & ~(s - 1); + } else if (exy - ez < 51) { + u = zz.i[0]; + s = 1 << (exy - ez - 31); + if ((u & (s - 1)) | zz.i[1]) + u |= s; + zz.i[0] = u & ~(s - 1); + zz.i[1] = 0; + } else { + /* collapse all of zz into a single bit */ + zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20); + zz.i[1] = 0; + } + } + + return ((float) (xy.d + zz.d)); +} + +#elif defined(__i386) + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fmaf for x86: 32-bit single precision, little-endian + */ +float +__fmaf(float x, float y, float z) { + union { + unsigned i[NI]; + long double e; + } xy, zz; + unsigned u, s, cwsw, oldcwsw; + int exy, ez; + + /* set rounding precision to 64 bits */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xfcffffff) | 0x03000000; + __fenv_setcwsw(&cwsw); + + /* + * the following operations can only raise the invalid exception, + * and then only if either x*y is of the form Inf*0 or one of x, + * y, or z is a signaling NaN + */ + xy.e = (long double) x * y; + zz.e = (long double) z; + + /* + * if the sum xy + z will be exact, just compute it and cast the + * result to float + */ + exy = xy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 || + ez == 0x7fff || ez == 0) { + goto cont; + } + + /* + * collapse the tail of the smaller summand into a "sticky bit" + * so that the sum can be computed without error + */ + if (ez > exy) { + if (ez - exy < 31) { + u = xy.i[0]; + s = 2 << (ez - exy); + if (u & (s - 1)) + u |= s; + xy.i[0] = u & ~(s - 1); + } else if (ez - exy < 62) { + u = xy.i[1]; + s = 1 << (ez - exy - 31); + if ((u & (s - 1)) | xy.i[0]) + u |= s; + xy.i[1] = u & ~(s - 1); + xy.i[0] = 0; + } else { + /* collapse all of xy into a single bit */ + xy.i[0] = 0; + xy.i[1] = 0x80000000; + xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62); + } + } else { + if (exy - ez < 62) { + u = zz.i[1]; + s = 1 << (exy - ez - 31); + if ((u & (s - 1)) | zz.i[0]) + u |= s; + zz.i[1] = u & ~(s - 1); + zz.i[0] = 0; + } else { + /* collapse all of zz into a single bit */ + zz.i[0] = 0; + zz.i[1] = 0x80000000; + zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62); + } + } + +cont: + xy.e += zz.e; + + /* restore the rounding precision */ + __fenv_getcwsw(&cwsw); + cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000); + __fenv_setcwsw(&cwsw); + + return ((float) xy.e); +} + +#if 0 +/* + * another fmaf for x86: assumes return value will be left in + * long double (80-bit double extended) precision + */ +long double +__fmaf(float x, float y, float z) { + /* + * Note: This implementation assumes the rounding precision mode + * is set to the default, rounding to 64 bit precision. If this + * routine must work in non-default rounding precision modes, do + * the following instead: + * + * long double t; + * + * + * t = x * y; + * + * return t + z; + * + * Note that the code to change rounding precision must not alter + * the exception masks or flags, since the product x * y may raise + * an invalid operation exception. + */ + return ((long double) x * y + z); +} +#endif + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fmal.c b/usr/src/libm/src/m9x/fmal.c new file mode 100644 index 0000000..7fb9a62 --- /dev/null +++ b/usr/src/libm/src/m9x/fmal.c @@ -0,0 +1,1224 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmal.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmal = __fmal +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0x3fe00000u, 0 }, + { 0x40000000u, 0 }, + { 0x3ef00000u, 0 }, + { 0x3e700000u, 0 }, + { 0x41300000u, 0 }, + { 0x3e300000u, 0 }, + { 0x3b300000u, 0 }, + { 0x38300000u, 0 }, + { 0x42300000u, 0 }, + { 0x3df00000u, 0 }, + { 0x7fe00000u, 0 }, + { 0x00100000u, 0 }, + { 0x00100001u, 0 }, + { 0, 0 }, + { 0x7ff00000u, 0 }, + { 0x7ff00001u, 0 } +}; + +#define half C[0].d +#define two C[1].d +#define twom16 C[2].d +#define twom24 C[3].d +#define two20 C[4].d +#define twom28 C[5].d +#define twom76 C[6].d +#define twom124 C[7].d +#define two36 C[8].d +#define twom32 C[9].d +#define huge C[10].d +#define tiny C[11].d +#define tiny2 C[12].d +#define zero C[13].d +#define inf C[14].d +#define snan C[15].d + +static const unsigned fsr_rm = 0xc0000000u; + +/* + * fmal for SPARC: 128-bit quad precision, big-endian + */ +long double +__fmal(long double x, long double y, long double z) { + union { + unsigned i[4]; + long double q; + } xx, yy, zz; + union { + unsigned i[2]; + double d; + } u; + double dx[5], dy[5], dxy[9], c, s; + unsigned xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7; + unsigned z0, z1, z2, z3, z4, z5, z6, z7; + unsigned fsr, rm, sticky; + int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; + int cx, cy, cz; + volatile double dummy; + + /* extract the high order words of the arguments */ + xx.q = x; + yy.q = y; + zz.q = z; + hx = xx.i[0] & ~0x80000000; + hy = yy.i[0] & ~0x80000000; + hz = zz.i[0] & ~0x80000000; + + /* + * distinguish zero, finite nonzero, infinite, and quiet nan + * arguments; raise invalid and return for signaling nans + */ + if (hx >= 0x7fff0000) { + if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) { + if (!(hx & 0x8000)) { + /* signaling nan, raise invalid */ + dummy = snan; + dummy += snan; + xx.i[0] |= 0x8000; + return (xx.q); + } + cx = 3; /* quiet nan */ + } else + cx = 2; /* inf */ + } else if (hx == 0) { + cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0; + /* subnormal or zero */ + } else + cx = 1; /* finite nonzero */ + + if (hy >= 0x7fff0000) { + if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) { + if (!(hy & 0x8000)) { + dummy = snan; + dummy += snan; + yy.i[0] |= 0x8000; + return (yy.q); + } + cy = 3; + } else + cy = 2; + } else if (hy == 0) { + cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0; + } else + cy = 1; + + if (hz >= 0x7fff0000) { + if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) { + if (!(hz & 0x8000)) { + dummy = snan; + dummy += snan; + zz.i[0] |= 0x8000; + return (zz.q); + } + cz = 3; + } else + cz = 2; + } else if (hz == 0) { + cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0; + } else + cz = 1; + + /* get the fsr and clear current exceptions */ + __fenv_getfsr(&fsr); + fsr &= ~FSR_CEXC; + + /* handle all other zero, inf, and nan cases */ + if (cx != 1 || cy != 1 || cz != 1) { + /* if x or y is a quiet nan, return it */ + if (cx == 3) { + __fenv_setfsr(&fsr); + return (x); + } + if (cy == 3) { + __fenv_setfsr(&fsr); + return (y); + } + + /* if x*y is 0*inf, raise invalid and return the default nan */ + if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) { + dummy = zero; + dummy *= inf; + zz.i[0] = 0x7fffffff; + zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; + return (zz.q); + } + + /* if z is a quiet nan, return it */ + if (cz == 3) { + __fenv_setfsr(&fsr); + return (z); + } + + /* + * now none of x, y, or z is nan; handle cases where x or y + * is inf + */ + if (cx == 2 || cy == 2) { + /* + * if z is also inf, either we have inf-inf or + * the result is the same as z depending on signs + */ + if (cz == 2) { + if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) { + dummy = inf; + dummy -= inf; + zz.i[0] = 0x7fffffff; + zz.i[1] = zz.i[2] = zz.i[3] = + 0xffffffff; + return (zz.q); + } + __fenv_setfsr(&fsr); + return (z); + } + + /* otherwise the result is inf with appropriate sign */ + zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) | + 0x7fff0000; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + __fenv_setfsr(&fsr); + return (zz.q); + } + + /* if z is inf, return it */ + if (cz == 2) { + __fenv_setfsr(&fsr); + return (z); + } + + /* + * now x, y, and z are all finite; handle cases where x or y + * is zero + */ + if (cx == 0 || cy == 0) { + /* either we have 0-0 or the result is the same as z */ + if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < + 0) { + zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 : + 0; + __fenv_setfsr(&fsr); + return (zz.q); + } + __fenv_setfsr(&fsr); + return (z); + } + + /* if we get here, x and y are nonzero finite, z must be zero */ + return (x * y); + } + + /* + * now x, y, and z are all finite and nonzero; set round-to- + * negative-infinity mode + */ + __fenv_setfsr(&fsr_rm); + + /* + * get the signs and exponents and normalize the significands + * of x and y + */ + sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; + ex = hx >> 16; + hx &= 0xffff; + if (!ex) { + if (hx | (xx.i[1] & 0xfffe0000)) { + ex = 1; + } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) { + hx = xx.i[1]; + xx.i[1] = xx.i[2]; + xx.i[2] = xx.i[3]; + xx.i[3] = 0; + ex = -31; + } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) { + hx = xx.i[2]; + xx.i[1] = xx.i[3]; + xx.i[2] = xx.i[3] = 0; + ex = -63; + } else { + hx = xx.i[3]; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + ex = -95; + } + while ((hx & 0x10000) == 0) { + hx = (hx << 1) | (xx.i[1] >> 31); + xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); + xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); + xx.i[3] <<= 1; + ex--; + } + } else + hx |= 0x10000; + ey = hy >> 16; + hy &= 0xffff; + if (!ey) { + if (hy | (yy.i[1] & 0xfffe0000)) { + ey = 1; + } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) { + hy = yy.i[1]; + yy.i[1] = yy.i[2]; + yy.i[2] = yy.i[3]; + yy.i[3] = 0; + ey = -31; + } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) { + hy = yy.i[2]; + yy.i[1] = yy.i[3]; + yy.i[2] = yy.i[3] = 0; + ey = -63; + } else { + hy = yy.i[3]; + yy.i[1] = yy.i[2] = yy.i[3] = 0; + ey = -95; + } + while ((hy & 0x10000) == 0) { + hy = (hy << 1) | (yy.i[1] >> 31); + yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31); + yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31); + yy.i[3] <<= 1; + ey--; + } + } else + hy |= 0x10000; + exy = ex + ey - 0x3fff; + + /* convert the significands of x and y to doubles */ + c = twom16; + dx[0] = (double) ((int) hx) * c; + dy[0] = (double) ((int) hy) * c; + + c *= twom24; + dx[1] = (double) ((int) (xx.i[1] >> 8)) * c; + dy[1] = (double) ((int) (yy.i[1] >> 8)) * c; + + c *= twom24; + dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) & + 0xffffff)) * c; + dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) & + 0xffffff)) * c; + + c *= twom24; + dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) & + 0xffffff)) * c; + dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) & + 0xffffff)) * c; + + c *= twom24; + dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c; + dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c; + + /* form the "digits" of the product */ + dxy[0] = dx[0] * dy[0]; + dxy[1] = dx[0] * dy[1] + dx[1] * dy[0]; + dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0]; + dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + + dx[3] * dy[0]; + dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + + dx[3] * dy[1] + dx[4] * dy[0]; + dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + + dx[4] * dy[1]; + dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2]; + dxy[7] = dx[3] * dy[4] + dx[4] * dy[3]; + dxy[8] = dx[4] * dy[4]; + + /* split odd-numbered terms and combine into even-numbered terms */ + c = (dxy[1] + two20) - two20; + dxy[0] += c; + dxy[1] -= c; + c = (dxy[3] + twom28) - twom28; + dxy[2] += c + dxy[1]; + dxy[3] -= c; + c = (dxy[5] + twom76) - twom76; + dxy[4] += c + dxy[3]; + dxy[5] -= c; + c = (dxy[7] + twom124) - twom124; + dxy[6] += c + dxy[5]; + dxy[8] += (dxy[7] - c); + + /* propagate carries, adjusting the exponent if need be */ + dxy[7] = dxy[6] + dxy[8]; + dxy[5] = dxy[4] + dxy[7]; + dxy[3] = dxy[2] + dxy[5]; + dxy[1] = dxy[0] + dxy[3]; + if (dxy[1] >= two) { + dxy[0] *= half; + dxy[1] *= half; + dxy[2] *= half; + dxy[3] *= half; + dxy[4] *= half; + dxy[5] *= half; + dxy[6] *= half; + dxy[7] *= half; + dxy[8] *= half; + exy++; + } + + /* extract the significand of x*y */ + s = two36; + u.d = c = dxy[1] + s; + xy0 = u.i[1]; + c -= s; + dxy[1] -= c; + dxy[0] -= c; + + s *= twom32; + u.d = c = dxy[1] + s; + xy1 = u.i[1]; + c -= s; + dxy[2] += (dxy[0] - c); + dxy[3] = dxy[2] + dxy[5]; + + s *= twom32; + u.d = c = dxy[3] + s; + xy2 = u.i[1]; + c -= s; + dxy[4] += (dxy[2] - c); + dxy[5] = dxy[4] + dxy[7]; + + s *= twom32; + u.d = c = dxy[5] + s; + xy3 = u.i[1]; + c -= s; + dxy[4] -= c; + dxy[5] = dxy[4] + dxy[7]; + + s *= twom32; + u.d = c = dxy[5] + s; + xy4 = u.i[1]; + c -= s; + dxy[6] += (dxy[4] - c); + dxy[7] = dxy[6] + dxy[8]; + + s *= twom32; + u.d = c = dxy[7] + s; + xy5 = u.i[1]; + c -= s; + dxy[8] += (dxy[6] - c); + + s *= twom32; + u.d = c = dxy[8] + s; + xy6 = u.i[1]; + c -= s; + dxy[8] -= c; + + s *= twom32; + u.d = c = dxy[8] + s; + xy7 = u.i[1]; + + /* extract the sign, exponent, and significand of z */ + sz = zz.i[0] & 0x80000000; + ez = hz >> 16; + z0 = hz & 0xffff; + if (!ez) { + if (z0 | (zz.i[1] & 0xfffe0000)) { + z1 = zz.i[1]; + z2 = zz.i[2]; + z3 = zz.i[3]; + ez = 1; + } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) { + z0 = zz.i[1]; + z1 = zz.i[2]; + z2 = zz.i[3]; + z3 = 0; + ez = -31; + } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) { + z0 = zz.i[2]; + z1 = zz.i[3]; + z2 = z3 = 0; + ez = -63; + } else { + z0 = zz.i[3]; + z1 = z2 = z3 = 0; + ez = -95; + } + while ((z0 & 0x10000) == 0) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 <<= 1; + ez--; + } + } else { + z0 |= 0x10000; + z1 = zz.i[1]; + z2 = zz.i[2]; + z3 = zz.i[3]; + } + z4 = z5 = z6 = z7 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-7], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || + (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || + (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + e = xy2; xy2 = z2; z2 = e; + e = xy3; xy3 = z3; z3 = e; + z4 = xy4; xy4 = 0; + z5 = xy5; xy5 = 0; + z6 = xy6; xy6 = 0; + z7 = xy7; xy7 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 236) { + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; + xy7 = 1; + } else if (e >= 224) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | + ((xy0 << 1) << (255 - e)); + xy7 = xy0 >> (e - 224); + if (sticky) + xy7 |= 1; + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; + } else if (e >= 192) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | + ((xy1 << 1) << (223 - e)); + xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e)); + if (sticky) + xy7 |= 1; + xy6 = xy0 >> (e - 192); + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0; + } else if (e >= 160) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | + ((xy2 << 1) << (191 - e)); + xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e)); + xy5 = xy0 >> (e - 160); + xy0 = xy1 = xy2 = xy3 = xy4 = 0; + } else if (e >= 128) { + sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e)); + xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e)); + xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e)); + xy4 = xy0 >> (e - 128); + xy0 = xy1 = xy2 = xy3 = 0; + } else if (e >= 96) { + sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e)); + xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e)); + xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e)); + xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); + xy3 = xy0 >> (e - 96); + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e)); + xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e)); + xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e)); + xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = xy7 | ((xy6 << 1) << (63 - e)); + xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e)); + xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e)); + xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + sticky = (xy7 << 1) << (31 - e); + xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e)); + xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e)); + xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e)); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = ~xy3; + xy4 = ~xy4; + xy5 = ~xy5; + xy6 = ~xy6; + xy7 = -xy7; + if (xy7 == 0) + if (++xy6 == 0) + if (++xy5 == 0) + if (++xy4 == 0) + if (++xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z7 += xy7; + e = (z7 < xy7); + z6 += xy6; + if (e) { + z6++; + e = (z6 <= xy6); + } else + e = (z6 < xy6); + z5 += xy5; + if (e) { + z5++; + e = (z5 <= xy5); + } else + e = (z5 < xy5); + z4 += xy4; + if (e) { + z4++; + e = (z4 <= xy4); + } else + e = (z4 < xy4); + z3 += xy3; + if (e) { + z3++; + e = (z3 <= xy3); + } else + e = (z3 < xy3); + z2 += xy2; + if (e) { + z2++; + e = (z2 <= xy2); + } else + e = (z2 < xy2); + z1 += xy1; + if (e) { + z1++; + e = (z1 <= xy1); + } else + e = (z1 < xy1); + z0 += xy0; + if (e) + z0++; + + /* postnormalize and collect rounding information into z4 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 116) { + z4 = 1; /* result can't be exactly zero */ + z0 = z1 = z2 = z3 = 0; + } else if (e >= 96) { + sticky = z7 | z6 | z5 | z4 | z3 | z2 | + ((z1 << 1) << (127 - e)); + z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e)); + if (sticky) + z4 |= 1; + z3 = z0 >> (e - 96); + z0 = z1 = z2 = 0; + } else if (e >= 64) { + sticky = z7 | z6 | z5 | z4 | z3 | + ((z2 << 1) << (95 - e)); + z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e)); + if (sticky) + z4 |= 1; + z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e)); + z2 = z0 >> (e - 64); + z0 = z1 = 0; + } else if (e >= 32) { + sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e)); + z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e)); + if (sticky) + z4 |= 1; + z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + z1 = z0 >> (e - 32); + z0 = 0; + } else { + sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e); + z4 = (z4 >> e) | ((z3 << 1) << (31 - e)); + if (sticky) + z4 |= 1; + z3 = (z3 >> e) | ((z2 << 1) << (31 - e)); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 >>= e; + } + ez = 1; + } else if (z0 >= 0x20000) { + /* carry out; shift right by one */ + sticky = (z4 & 1) | z5 | z6 | z7; + z4 = (z4 >> 1) | (z3 << 31); + if (sticky) + z4 |= 1; + z3 = (z3 >> 1) | (z2 << 31); + z2 = (z2 >> 1) | (z1 << 31); + z1 = (z1 >> 1) | (z0 << 31); + z0 >>= 1; + ez++; + } else { + if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) + != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = z4; + z4 = z5; + z5 = z6; + z6 = z7; + z7 = 0; + ez -= 32; + } + while (z0 < 0x10000 && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 = (z3 << 1) | (z4 >> 31); + z4 = (z4 << 1) | (z5 >> 31); + z5 = (z5 << 1) | (z6 >> 31); + z6 = (z6 << 1) | (z7 >> 31); + z7 <<= 1; + ez--; + } + } + if (z5 | z6 | z7) + z4 |= 1; + } + + /* get the rounding mode */ + rm = fsr >> 30; + + /* strip off the integer bit, if there is one */ + ibit = z0 & 0x10000; + if (ibit) + z0 -= 0x10000; + else { + ez = 0; + if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */ + zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + __fenv_setfsr(&fsr); + return (zz.q); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz) + rm ^= rm >> 1; + + /* round and raise exceptions */ + if (z4) { + fsr |= FSR_NXC; + + /* decide whether to round the fraction up */ + if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || + (z4 == 0x80000000u && (z3 & 1))))) { + /* round up and renormalize if necessary */ + if (++z3 == 0) + if (++z2 == 0) + if (++z1 == 0) + if (++z0 == 0x10000) { + z0 = 0; + ez++; + } + } + } + + /* check for under/overflow */ + if (ez >= 0x7fff) { + if (rm == FSR_RN || rm == FSR_RP) { + zz.i[0] = sz | 0x7fff0000; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + } else { + zz.i[0] = sz | 0x7ffeffff; + zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; + } + fsr |= FSR_OFC | FSR_NXC; + } else { + zz.i[0] = sz | (ez << 16) | z0; + zz.i[1] = z1; + zz.i[2] = z2; + zz.i[3] = z3; + + /* + * !ibit => exact result was tiny before rounding, + * z4 nonzero => result delivered is inexact + */ + if (!ibit) { + if (z4) + fsr |= FSR_UFC | FSR_NXC; + else if (fsr & FSR_UFM) + fsr |= FSR_UFC; + } + } + + /* restore the fsr and emulate exceptions as needed */ + if ((fsr & FSR_CEXC) & (fsr >> 23)) { + __fenv_setfsr(&fsr); + if (fsr & FSR_OFC) { + dummy = huge; + dummy *= huge; + } else if (fsr & FSR_UFC) { + dummy = tiny; + if (fsr & FSR_NXC) + dummy *= tiny; + else + dummy -= tiny2; + } else { + dummy = huge; + dummy += tiny; + } + } else { + fsr |= (fsr & 0x1f) << 5; + __fenv_setfsr(&fsr); + } + return (zz.q); +} + +#elif defined(__i386) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0, 0x3fe00000u }, + { 0, 0x40000000u }, + { 0, 0x3df00000u }, + { 0, 0x3bf00000u }, + { 0, 0x41f00000u }, + { 0, 0x43e00000u }, + { 0, 0x7fe00000u }, + { 0, 0x00100000u }, + { 0, 0x00100001u } +}; + +#define half C[0].d +#define two C[1].d +#define twom32 C[2].d +#define twom64 C[3].d +#define two32 C[4].d +#define two63 C[5].d +#define huge C[6].d +#define tiny C[7].d +#define tiny2 C[8].d + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fmal for x86: 80-bit extended double precision, little-endian + */ +long double +__fmal(long double x, long double y, long double z) { + union { + unsigned i[NI]; + long double e; + } xx, yy, zz; + long double xhi, yhi, xlo, ylo, t; + unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4; + unsigned oldcwsw, cwsw, rm, sticky, carry; + int ex, ey, ez, exy, sxy, sz, e, tinyafter; + volatile double dummy; + + /* extract the exponents of the arguments */ + xx.e = x; + yy.e = y; + zz.e = z; + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 || + (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */ + return (x * y + z); + + if (ez == 0x7fff) /* z is inf or nan */ + return (x + z); /* avoid spurious under/overflow in x * y */ + + if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */ + /* + * x * y isn't zero but could underflow to zero, + * so don't add z, lest we perturb the sign + */ + return (x * y); + + /* + * now x, y, and z are all finite and nonzero; extract signs and + * normalize the significands (this will raise the denormal operand + * exception if need be) + */ + sxy = (xx.i[2] ^ yy.i[2]) & 0x8000; + sz = zz.i[2] & 0x8000; + if (!ex) { + xx.e = x * two63; + ex = (xx.i[2] & 0x7fff) - 63; + } + if (!ey) { + yy.e = y * two63; + ey = (yy.i[2] & 0x7fff) - 63; + } + if (!ez) { + zz.e = z * two63; + ez = (zz.i[2] & 0x7fff) - 63; + } + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and toward-zero + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 128 bits */ + exy = ex + ey - 0x3fff; + xx.i[2] = 0x3fff; + yy.i[2] = 0x3fff; + x = xx.e; + y = yy.e; + xhi = ((x + twom32) + two32) - two32; + yhi = ((y + twom32) + two32) - two32; + xlo = x - xhi; + ylo = y - yhi; + x *= y; + y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; + if (x >= two) { + x *= half; + y *= half; + exy++; + } + + /* extract the significands */ + xx.e = x; + xy0 = xx.i[1]; + xy1 = xx.i[0]; + yy.e = t = y + twom32; + xy2 = yy.i[0]; + yy.e = (y - (t - twom32)) + twom64; + xy3 = yy.i[0]; + xy4 = 0; + z0 = zz.i[1]; + z1 = zz.i[0]; + z2 = z3 = z4 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-4], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && + (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + z2 = xy2; xy2 = 0; + z3 = xy3; xy3 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 130) { + xy0 = xy1 = xy2 = xy3 = 0; + xy4 = 1; + } else if (e >= 128) { + sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e)); + xy4 = xy0 >> (e - 128); + if (sticky) + xy4 |= 1; + xy0 = xy1 = xy2 = xy3 = 0; + } else if (e >= 96) { + sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e)); + xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); + if (sticky) + xy4 |= 1; + xy3 = xy0 >> (e - 96); + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy3 | ((xy2 << 1) << (95 - e)); + xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); + if (sticky) + xy4 |= 1; + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = (xy3 << 1) << (63 - e); + xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); + if (sticky) + xy4 |= 1; + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + xy4 = (xy3 << 1) << (31 - e); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = ~xy3; + xy4 = -xy4; + if (xy4 == 0) + if (++xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z4 += xy4; + carry = (z4 < xy4); + z3 += xy3; + if (carry) { + z3++; + carry = (z3 <= xy3); + } else + carry = (z3 < xy3); + z2 += xy2; + if (carry) { + z2++; + carry = (z2 <= xy2); + } else + carry = (z2 < xy2); + z1 += xy1; + if (carry) { + z1++; + carry = (z1 <= xy1); + } else + carry = (z1 < xy1); + z0 += xy0; + if (carry) { + z0++; + carry = (z0 <= xy0); + } else + carry = (z0 < xy0); + + /* for a magnitude subtract, ignore the last carry out */ + if (sxy ^ sz) + carry = 0; + + /* postnormalize and collect rounding information into z2 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 67) { + z2 = 1; /* result can't be exactly zero */ + z0 = z1 = 0; + } else if (e >= 64) { + sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e)); + z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e)); + if (sticky) + z2 |= 1; + z1 = carry >> (e - 64); + z0 = 0; + } else if (e >= 32) { + sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + if (sticky) + z2 |= 1; + z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e)); + z0 = carry >> (e - 32); + } else { + sticky = z4 | z3 | (z2 << 1) << (31 - e); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + if (sticky) + z2 |= 1; + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 = (z0 >> e) | ((carry << 1) << (31 - e)); + } + ez = 1; + } else if (carry) { + /* carry out; shift right by one */ + sticky = (z2 & 1) | z3 | z4; + z2 = (z2 >> 1) | (z1 << 31); + if (sticky) + z2 |= 1; + z1 = (z1 >> 1) | (z0 << 31); + z0 = (z0 >> 1) | 0x80000000; + ez++; + } else { + if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!z0 && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = z4; + z4 = 0; + ez -= 32; + } + while (z0 < 0x80000000u && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 = (z3 << 1) | (z4 >> 31); + z4 <<= 1; + ez--; + } + } + if (z3 | z4) + z2 |= 1; + } + + /* get the rounding mode */ + rm = oldcwsw & 0x0c000000; + + /* adjust exponent if result is subnormal */ + tinyafter = 0; + if (!(z0 & 0x80000000)) { + ez = 0; + tinyafter = 1; + if (!(z0 | z1 | z2)) { /* exact zero */ + zz.i[2] = rm == FCW_RM ? 0x8000 : 0; + zz.i[1] = zz.i[0] = 0; + __fenv_setcwsw(&oldcwsw); + return (zz.e); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz && (rm == FCW_RM || rm == FCW_RP)) + rm = (FCW_RM + FCW_RP) - rm; + + /* round */ + if (z2) { + if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || + (z2 == 0x80000000u && (z1 & 1))))) { + /* round up and renormalize if necessary */ + if (++z1 == 0) { + if (++z0 == 0) { + z0 = 0x80000000; + ez++; + } else if (z0 == 0x80000000) { + /* rounded up to smallest normal */ + ez = 1; + if ((rm == FCW_RP && z2 > + 0x80000000u) || (rm == FCW_RN && + z2 >= 0xc0000000u)) + /* + * would have rounded up to + * smallest normal even with + * unbounded range + */ + tinyafter = 0; + } + } + } + } + + /* restore the control and status words, check for over/underflow */ + __fenv_setcwsw(&oldcwsw); + if (ez >= 0x7fff) { + if (rm == FCW_RN || rm == FCW_RP) { + zz.i[2] = sz | 0x7fff; + zz.i[1] = 0x80000000; + zz.i[0] = 0; + } else { + zz.i[2] = sz | 0x7ffe; + zz.i[1] = 0xffffffff; + zz.i[0] = 0xffffffff; + } + dummy = huge; + dummy *= huge; + } else { + zz.i[2] = sz | ez; + zz.i[1] = z0; + zz.i[0] = z1; + + /* + * tinyafter => result rounded w/ unbounded range would be tiny, + * z2 nonzero => result delivered is inexact + */ + if (tinyafter) { + dummy = tiny; + if (z2) + dummy *= tiny; + else + dummy -= tiny2; + } else if (z2) { + dummy = huge; + dummy += tiny; + } + } + + return (zz.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fmax.c b/usr/src/libm/src/m9x/fmax.c new file mode 100644 index 0000000..454a38b --- /dev/null +++ b/usr/src/libm/src/m9x/fmax.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmax.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmax = __fmax +#endif + +/* + * fmax(x,y) returns the larger of x and y. If just one of the + * arguments is NaN, fmax returns the other argument. If both + * arguments are NaN, fmax returns NaN. + * + * See fmaxf.c for a discussion of implementation trade-offs. + */ + +#include "libm.h" /* for isgreaterequal macro */ + +double +__fmax(double x, double y) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is less than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x < y) +#else + if (!isgreaterequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; clear the + * sign of the result if either x or y has its sign clear + */ + xx.d = x; + yy.d = y; +#if defined(__sparc) + s = ~(xx.i[0] & yy.i[0]) & 0x80000000; + xx.i[0] &= ~s; +#elif defined(__i386) + s = ~(xx.i[1] & yy.i[1]) & 0x80000000; + xx.i[1] &= ~s; +#else +#error Unknown architecture +#endif + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/fmaxf.c b/usr/src/libm/src/m9x/fmaxf.c new file mode 100644 index 0000000..f5b570d --- /dev/null +++ b/usr/src/libm/src/m9x/fmaxf.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmaxf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmaxf = __fmaxf +#endif + +/* + * fmax(x,y) returns the larger of x and y. If just one of the + * arguments is NaN, fmax returns the other argument. If both + * arguments are NaN, fmax returns NaN (ideally, one of the + * argument NaNs). + * + * C99 does not require that fmax(-0,+0) = fmax(+0,-0) = +0, but + * ideally fmax should satisfy this. + * + * C99 makes no mention of exceptions for fmax. I suppose ideally + * either fmax never raises any exceptions or else it raises the + * invalid operation exception if and only if some argument is a + * signaling NaN. In the former case, fmax should always return + * one of its arguments. In the latter, fmax shouldn't return a + * signaling NaN, although when both arguments are signaling NaNs, + * this ideal is at odds with the stipulation that fmax should + * always return one of its arguments. + * + * Commutativity of fmax follows from the properties listed above + * except when both arguments are NaN. In that case, fmax may be + * declared commutative by fiat because there is no portable way + * to tell different NaNs apart. Ideally fmax would be truly com- + * mutative for all arguments. + * + * On SPARC V8, fmax must involve tests and branches. Ideally, + * an implementation on SPARC V9 should avoid branching, using + * conditional moves instead where necessary, and be as efficient + * as possible in its use of other resources. + * + * It appears to be impossible to attain all of the aforementioned + * ideals simultaneously. The implementation below satisfies the + * following (on SPARC): + * + * 1. fmax(x,y) returns the larger of x and y if neither x nor y + * is NaN and the non-NaN argument if just one of x or y is NaN. + * If both x and y are NaN, fmax(x,y) returns x unchanged. + * 2. fmax(-0,+0) = fmax(+0,-0) = +0. + * 3. If either argument is a signaling NaN, fmax raises the invalid + * operation exception. Otherwise, it raises no exceptions. + */ + +#include "libm.h" /* for isgreaterequal macro */ + +float +__fmaxf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsul %fcc0,%f1,%f0 + * st %f0,[x] + * st %f1,[y] + * ld [x],%l0 + * ld [y],%l1 + * and %l0,%l1,%l2 + * sethi %hi(0x80000000),%l3 + * andn %l3,%l2,%l2 + * andn %l0,%l2,%l0 + * st %l0,[x] + * ld [x],%f0 + * + * If VIS instructions are available, use this code instead: + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsul %fcc0,%f1,%f0 + * fands %f0,%f1,%f2 + * fzeros %f3 + * fnegs %f3,%f3 + * fandnot2s %f3,%f2,%f2 + * fandnot2s %f0,%f2,%f0 + * + * If VIS 3.0 instructions are available, use this: + * + * flcmps %fcc0,%f0,%f1 + * fmovslg %fcc0,%f1,%f0 ! move if %fcc0 is 1 or 2 + */ + + union { + unsigned i; + float f; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is less than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x < y) +#else + if (!isgreaterequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; clear the + * sign of the result if either x or y has its sign clear + */ + xx.f = x; + yy.f = y; + s = ~(xx.i & yy.i) & 0x80000000; + xx.i &= ~s; + + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/fmaxl.c b/usr/src/libm/src/m9x/fmaxl.c new file mode 100644 index 0000000..68a236e --- /dev/null +++ b/usr/src/libm/src/m9x/fmaxl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmaxl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmaxl = __fmaxl +#endif + +#include "libm.h" /* for isgreaterequal macro */ + +long double +__fmaxl(long double x, long double y) { + union { +#if defined(__sparc) + unsigned i[4]; +#elif defined(__i386) + unsigned i[3]; +#else +#error Unknown architecture +#endif + long double ld; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is less than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x < y) +#else + if (!isgreaterequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; clear the + * sign of the result if either x or y has its sign clear + */ + xx.ld = x; + yy.ld = y; +#if defined(__sparc) + s = ~(xx.i[0] & yy.i[0]) & 0x80000000; + xx.i[0] &= ~s; +#elif defined(__i386) + s = ~(xx.i[2] & yy.i[2]) & 0x8000; + xx.i[2] &= ~s; +#else +#error Unknown architecture +#endif + + return (xx.ld); +} diff --git a/usr/src/libm/src/m9x/fmin.c b/usr/src/libm/src/m9x/fmin.c new file mode 100644 index 0000000..24ad2b5 --- /dev/null +++ b/usr/src/libm/src/m9x/fmin.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmin.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmin = __fmin +#endif + +/* + * fmin(x,y) returns the smaller of x and y. If just one of the + * arguments is NaN, fmin returns the other argument. If both + * arguments are NaN, fmin returns NaN. + * + * See fmaxf.c for a discussion of implementation trade-offs. + */ + +#include "libm.h" /* for islessequal macro */ + +double +__fmin(double x, double y) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is greater than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x > y) +#else + if (!islessequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; set the + * sign of the result if either x or y has its sign set + */ + xx.d = x; + yy.d = y; +#if defined(__sparc) + s = (xx.i[0] | yy.i[0]) & 0x80000000; + xx.i[0] |= s; +#elif defined(__i386) + s = (xx.i[1] | yy.i[1]) & 0x80000000; + xx.i[1] |= s; +#else +#error Unknown architecture +#endif + + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/fminf.c b/usr/src/libm/src/m9x/fminf.c new file mode 100644 index 0000000..8c1c7c8 --- /dev/null +++ b/usr/src/libm/src/m9x/fminf.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fminf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fminf = __fminf +#endif + +#include "libm.h" /* for islessequal macro */ + +float +__fminf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsug %fcc0,%f1,%f0 + * st %f0,[x] + * st %f1,[y] + * ld [x],%l0 + * ld [y],%l1 + * or %l0,%l1,%l2 + * sethi %hi(0x80000000),%l3 + * and %l3,%l2,%l2 + * or %l0,%l2,%l0 + * st %l0,[x] + * ld [x],%f0 + * + * If VIS instructions are available, use this code instead: + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsug %fcc0,%f1,%f0 + * fors %f0,%f1,%f2 + * fzeros %f3 + * fnegs %f3,%f3 + * fands %f3,%f2,%f2 + * fors %f0,%f2,%f0 + * + * If VIS 3.0 instructions are available, use this: + * + * flcmps %fcc0,%f0,%f1 + * fmovsge %fcc0,%f1,%f0 ! move if %fcc0 is 0 or 2 + */ + + union { + unsigned i; + float f; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is greater than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x > y) +#else + if (!islessequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; set the + * sign of the result if either x or y has its sign set + */ + xx.f = x; + yy.f = y; + s = (xx.i | yy.i) & 0x80000000; + xx.i |= s; + + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/fminl.c b/usr/src/libm/src/m9x/fminl.c new file mode 100644 index 0000000..07944b6 --- /dev/null +++ b/usr/src/libm/src/m9x/fminl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fminl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fminl = __fminl +#endif + +#include "libm.h" /* for islessequal macro */ + +long double +__fminl(long double x, long double y) { + union { +#if defined(__sparc) + unsigned i[4]; +#elif defined(__i386) + unsigned i[3]; +#else +#error Unknown architecture +#endif + long double ld; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is greater than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x > y) +#else + if (!islessequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; set the + * sign of the result if either x or y has its sign set + */ + xx.ld = x; + yy.ld = y; +#if defined(__sparc) + s = (xx.i[0] | yy.i[0]) & 0x80000000; + xx.i[0] |= s; +#elif defined(__i386) + s = (xx.i[2] | yy.i[2]) & 0x8000; + xx.i[2] |= s; +#else +#error Unknown architecture +#endif + + return (xx.ld); +} diff --git a/usr/src/libm/src/m9x/frexp.c b/usr/src/libm/src/m9x/frexp.c new file mode 100644 index 0000000..a5f7ebc --- /dev/null +++ b/usr/src/libm/src/m9x/frexp.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)frexp.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak frexp = __frexp +#endif + +/* + * frexp(x, exp) returns the normalized significand of x and sets + * *exp so that x = r*2^(*exp) where r is the return value. If x + * is finite and nonzero, 1/2 <= |r| < 1. + * + * If x is zero, infinite or NaN, frexp returns x and sets *exp = 0. + * (The relevant standards do not specify *exp when x is infinite or + * NaN, but this code sets it anyway.) + * + * If x is a signaling NaN, this code returns x without attempting + * to raise the invalid operation exception. If x is subnormal, + * this code treats it as nonzero regardless of nonstandard mode. + */ + +#include "libm.h" + +double +__frexp(double x, int *exp) { + union { + unsigned i[2]; + double d; + } xx, yy; + double t; + unsigned hx; + int e; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx >= 0x7ff00000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00100000) { /* x is subnormal or zero */ + if ((hx | xx.i[LOWORD]) == 0) { + *exp = 0; + return (x); + } + + /* + * normalize x by regarding it as an integer + * + * Here we use 32-bit integer arithmetic to avoid trapping + * or emulating 64-bit arithmetic. If 64-bit arithmetic is + * available (e.g., in SPARC V9), do this instead: + * + * long lx = ((long) hx << 32) | xx.i[LOWORD]; + * xx.d = (xx.i[HIWORD] < 0)? -lx : lx; + * + * If subnormal arithmetic doesn't trap, just multiply x by + * a power of two. + */ + yy.i[HIWORD] = 0x43300000 | hx; + yy.i[LOWORD] = xx.i[LOWORD]; + t = yy.d; + yy.i[HIWORD] = 0x43300000; + yy.i[LOWORD] = 0; + t -= yy.d; /* t = |x| scaled */ + xx.d = ((int)xx.i[HIWORD] < 0)? -t : t; + hx = xx.i[HIWORD] & ~0x80000000; + e = -1074; + } + + /* now xx.d is normal */ + xx.i[HIWORD] = (xx.i[HIWORD] & ~0x7ff00000) | 0x3fe00000; + *exp = e + (hx >> 20) - 0x3fe; + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/frexpf.c b/usr/src/libm/src/m9x/frexpf.c new file mode 100644 index 0000000..f137adc --- /dev/null +++ b/usr/src/libm/src/m9x/frexpf.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)frexpf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak frexpf = __frexpf +#endif + +#include "libm.h" + +float +__frexpf(float x, int *exp) { + union { + unsigned i; + float f; + } xx; + unsigned hx; + int e; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx >= 0x7f800000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00800000) { /* x is subnormal or zero */ + if (hx == 0) { + *exp = 0; + return (x); + } + + /* normalize x by regarding it as an integer */ + xx.f = (int) xx.i < 0 ? -(int) hx : (int) hx; + hx = xx.i & ~0x80000000; + e = -149; + } + + /* now xx.f is normal */ + xx.i = (xx.i & ~0x7f800000) | 0x3f000000; + *exp = e + (hx >> 23) - 0x7e; + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/frexpl.c b/usr/src/libm/src/m9x/frexpl.c new file mode 100644 index 0000000..a0dd032 --- /dev/null +++ b/usr/src/libm/src/m9x/frexpl.c @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)frexpl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak frexpl = __frexpl +#endif + +#include "libm.h" + +#if defined(__sparc) + +long double +__frexpl(long double x, int *exp) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx; + int e, s; + + xx.q = x; + hx = xx.i[0] & ~0x80000000; + + if (hx >= 0x7fff0000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00010000) { /* x is subnormal or zero */ + if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) { + *exp = 0; + return (x); + } + + /* normalize x */ + s = xx.i[0] & 0x80000000; + while ((hx | (xx.i[1] & 0xffff0000)) == 0) { + hx = xx.i[1]; + xx.i[1] = xx.i[2]; + xx.i[2] = xx.i[3]; + xx.i[3] = 0; + e -= 32; + } + while (hx < 0x10000) { + hx = (hx << 1) | (xx.i[1] >> 31); + xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); + xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); + xx.i[3] <<= 1; + e--; + } + xx.i[0] = s | hx; + } + + /* now xx.q is normal */ + xx.i[0] = (xx.i[0] & ~0x7fff0000) | 0x3ffe0000; + *exp = e + (hx >> 16) - 0x3ffe; + return (xx.q); +} + +#elif defined(__i386) + +long double +__frexpl(long double x, int *exp) { + union { + unsigned i[3]; + long double e; + } xx; + unsigned hx; + int e; + + xx.e = x; + hx = xx.i[2] & 0x7fff; + + if (hx >= 0x7fff) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x0001) { /* x is subnormal or zero */ + if ((xx.i[0] | xx.i[1]) == 0) { + *exp = 0; + return (x); + } + + /* normalize x */ + xx.e *= 18446744073709551616.0L; /* 2^64 */ + hx = xx.i[2] & 0x7fff; + e = -64; + } + + /* now xx.e is normal */ + xx.i[2] = (xx.i[2] & 0x8000) | 0x3ffe; + *exp = e + hx - 0x3ffe; + return (xx.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/ldexp.c b/usr/src/libm/src/m9x/ldexp.c new file mode 100644 index 0000000..60fb5b6 --- /dev/null +++ b/usr/src/libm/src/m9x/ldexp.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ldexp.c 1.13 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ldexp = __ldexp +#endif + +#include "libm.h" +#include + +double +ldexp(double x, int n) { + int *px = (int *) &x, ix = px[HIWORD] & ~0x80000000; + + if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + x = scalbn(x, n); + ix = px[HIWORD] & ~0x80000000; + /* + * SVID3 requires both overflow and underflow cases to set errno + * XPG3/XPG4/XPG4.2/SUSv2 requires overflow to set errno + */ + if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0) + errno = ERANGE; + return (x); +} diff --git a/usr/src/libm/src/m9x/ldexpf.c b/usr/src/libm/src/m9x/ldexpf.c new file mode 100644 index 0000000..881874c --- /dev/null +++ b/usr/src/libm/src/m9x/ldexpf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ldexpf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ldexpf = __ldexpf +#endif + +#include "libm.h" + +float +ldexpf(float x, int n) { + return (scalbnf(x, n)); +} diff --git a/usr/src/libm/src/m9x/ldexpl.c b/usr/src/libm/src/m9x/ldexpl.c new file mode 100644 index 0000000..e40ce1f --- /dev/null +++ b/usr/src/libm/src/m9x/ldexpl.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ldexpl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ldexpl = __ldexpl +#endif + +#include "libm.h" + +long double +ldexpl(long double x, int n) { + return (scalbnl(x, n)); +} diff --git a/usr/src/libm/src/m9x/llrint.c b/usr/src/libm/src/m9x/llrint.c new file mode 100644 index 0000000..8d3e79b --- /dev/null +++ b/usr/src/libm/src/m9x/llrint.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llrint.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llrint = __llrint +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrint = __llrint +#pragma weak __lrint = __llrint +#endif +#endif + +/* + * llrint(x) rounds its argument to the nearest integer according + * to the current rounding direction and converts the result to a + * 64 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer would + * exceed 64 bits, the invalid operation exception is raised. If x + * is not an integer, the inexact exception is raised. + */ + +#include "libm.h" + +long long +llrint(double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpll instruction + * instead. + */ + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) || defined(__amd64) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000; +#elif defined(__i386) /* !defined(__amd64) */ + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000; +#else +#error Unknown architecture +#endif + yy.i[LOWORD] = 0; + x = (x + yy.d) - yy.d; + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/libm/src/m9x/llrintf.c b/usr/src/libm/src/m9x/llrintf.c new file mode 100644 index 0000000..70287f3 --- /dev/null +++ b/usr/src/libm/src/m9x/llrintf.c @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llrintf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llrintf = __llrintf +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrintf = __llrintf +#pragma weak __lrintf = __llrintf +#endif +#endif + +#include "libm.h" + +long long +llrintf(float x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpll instruction + * instead. + */ + union { + unsigned i; + float f; + } xx, yy; + unsigned hx; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) || defined(__amd64) + yy.i = (xx.i & 0x80000000) | 0x4b000000; +#elif defined(__i386) + /* assume 64-bit precision */ + yy.i = (xx.i & 0x80000000) | 0x5f000000; +#else +#error Unknown architecture +#endif + x = (x + yy.f) - yy.f; + + /* + * on LP32 architectures, we can just convert x to a 32-bit + * integer and sign-extend it + */ + return ((long) x); + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/libm/src/m9x/llrintl.c b/usr/src/libm/src/m9x/llrintl.c new file mode 100644 index 0000000..5d1f2b2 --- /dev/null +++ b/usr/src/libm/src/m9x/llrintl.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llrintl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llrintl = __llrintl +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrintl = __llrintl +#pragma weak __lrintl = __llrintl +#endif +#endif + +#include "libm.h" + +#if defined(__sparc) + +#include "fma.h" + +long long +llrintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i[2]; + long long l; + } zz; + union { + unsigned i; + float f; + } tt; + unsigned int hx, sx, frac, fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (0LL); + + /* get the rounding mode */ + __fenv_getfsr(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + dummy = 1.0e30f; /* x is nonzero, so raise inexact */ + dummy += 1.0e-30f; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { + zz.i[0] = 0; + zz.i[1] = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else if (j >= 64) { + zz.i[0] = xx.i[0] >> (j - 64); + zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64)); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if (((xx.i[2] << 1) << (95 - j)) | xx.i[3]) + frac |= 1; + } else { + zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32)); + zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32)); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if ((xx.i[3] << 1) << (63 - j)) + frac |= 1; + } + + /* round */ + if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (zz.i[1] & 1)))))) { + if (++zz.i[1] == 0) + zz.i[0]++; + } + + /* check for result out of range (note that z is |x| at this point) */ + if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] || + !sx))) { + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* raise inexact if need be */ + if (frac) { + dummy = 1.0e30F; + dummy += 1.0e-30F; + } + + /* negate result if need be */ + if (sx) { + zz.i[0] = ~zz.i[0]; + zz.i[1] = -zz.i[1]; + if (zz.i[1] == 0) + zz.i[0]++; + } + return (zz.l); +} +#elif defined(__i386) || defined(__amd64) +long long +llrintl(long double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one ought to just use the fistpll instruction + * instead. + */ + union { + unsigned i[3]; + long double e; + } xx, yy; + int ex; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + + if (ex < 0x403e) { /* |x| < 2^63 */ + /* add and subtract a power of two to round x to an integer */ + yy.i[2] = (xx.i[2] & 0x8000) | 0x403e; + yy.i[1] = 0x80000000; + yy.i[0] = 0; + x = (x + yy.e) - yy.e; + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/llround.c b/usr/src/libm/src/m9x/llround.c new file mode 100644 index 0000000..b65a9e1 --- /dev/null +++ b/usr/src/libm/src/m9x/llround.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llround.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llround = __llround +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lround = __llround +#pragma weak __lround = __llround +#endif +#endif + +/* + * llround(x) rounds its argument to the nearest integer, rounding + * ties away from zero, and converts the result to a 64 bit signed + * integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 64 bits, the invalid operation exception is raised. + */ + +#include "libm.h" + +long long +llround(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + if (hx >= 0x3fe00000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + } + + /* now x is nan, inf, or integral */ + return ((long long) xx.d); +} diff --git a/usr/src/libm/src/m9x/llroundf.c b/usr/src/libm/src/m9x/llroundf.c new file mode 100644 index 0000000..7bdc977 --- /dev/null +++ b/usr/src/libm/src/m9x/llroundf.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llroundf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llroundf = __llroundf +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lroundf = __llroundf +#pragma weak __lroundf = __llroundf +#endif +#endif + +#include "libm.h" + +long long +llroundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + if (hx >= 0x3f000000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + + /* + * on LP32 architectures, we can just convert x to a 32-bit + * integer and sign-extend it + */ + return ((long) xx.f); + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/libm/src/m9x/llroundl.c b/usr/src/libm/src/m9x/llroundl.c new file mode 100644 index 0000000..0c96cdf --- /dev/null +++ b/usr/src/libm/src/m9x/llroundl.c @@ -0,0 +1,165 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llroundl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llroundl = __llroundl +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lroundl = __llroundl +#pragma weak __lroundl = __llroundl +#endif +#endif + +#include "libm.h" + +#if defined(__sparc) +long long +llroundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i[2]; + long long l; + } zz; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { + zz.i[0] = 0; + zz.i[1] = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else if (j >= 64) { + zz.i[0] = xx.i[0] >> (j - 64); + zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64)); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if (((xx.i[2] << 1) << (95 - j)) | xx.i[3]) + frac |= 1; + } else { + zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32)); + zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32)); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if ((xx.i[3] << 1) << (63 - j)) + frac |= 1; + } + + /* round */ + if (frac >= 0x80000000u) { + if (++zz.i[1] == 0) + zz.i[0]++; + } + + /* check for result out of range (note that z is |x| at this point) */ + if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] || + !sx))) { + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* negate result if need be */ + if (sx) { + zz.i[0] = ~zz.i[0]; + zz.i[1] = -zz.i[1]; + if (zz.i[1] == 0) + zz.i[0]++; + } + + return (zz.l); +} +#elif defined(__i386) || defined(__amd64) +long long +llroundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + + if (ex < 0x403e) { /* |x| < 2^63 */ + /* handle |x| < 1 */ + if (ex < 0x3fff) { + if (ex >= 0x3ffe) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + } + + /* now x is nan, inf, or integral */ + return ((long long) xx.e); +} +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/lrint.c b/usr/src/libm/src/m9x/lrint.c new file mode 100644 index 0000000..3a5792c --- /dev/null +++ b/usr/src/libm/src/m9x/lrint.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lrint.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lrint = __lrint +#endif + +/* + * lrint(x) rounds its argument to the nearest integer according + * to the current rounding direction and converts the result to + * a 32 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 32 bits, the invalid operation exception is raised. + * If x is not an integer, the inexact exception is raised. + */ + +#include /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lrint(double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpl instruction + * instead. + */ + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000; +#elif defined(__i386) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000; +#else +#error Unknown architecture +#endif + yy.i[LOWORD] = 0; + x = (x + yy.d) - yy.d; + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lrintf.c b/usr/src/libm/src/m9x/lrintf.c new file mode 100644 index 0000000..92bb664 --- /dev/null +++ b/usr/src/libm/src/m9x/lrintf.c @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lrintf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lrintf = __lrintf +#endif + +#include /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lrintf(float x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpl instruction + * instead. + */ + union { + unsigned i; + float f; + } xx, yy; + unsigned hx; + + xx.f = x; + hx = xx.i & ~0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) + yy.i = (xx.i & 0x80000000) | 0x4b000000; +#elif defined(__i386) + /* assume 64-bit precision */ + yy.i = (xx.i & 0x80000000) | 0x5f000000; +#else +#error Unknown architecture +#endif + x = (x + yy.f) - yy.f; + return ((long) x); + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lrintl.c b/usr/src/libm/src/m9x/lrintl.c new file mode 100644 index 0000000..d827195 --- /dev/null +++ b/usr/src/libm/src/m9x/lrintl.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lrintl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lrintl = __lrintl +#endif + +#include /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +#if defined(__sparc) + +#include "fma.h" + +long +lrintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac, fsr, l; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (0L); + + /* get the rounding mode */ + __fenv_getfsr(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + dummy = 1.0e30F; /* x is nonzero, so raise inexact */ + dummy += 1.0e-30F; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + return (sx ? -1L : 1L); + return (0L); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); /* 91 <= j <= 112 */ + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { /* 96 <= j <= 112 */ + l = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else { /* 91 <= j <= 95 */ + l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64)); + frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] << (96 - j)) | xx.i[3]) + frac |= 1; + } + + /* round */ + if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000U || + (frac == 0x80000000 && (l & 1)))))) + l++; + + /* check for result out of range (note that z is |x| at this point) */ + if (l > 0x80000000U || (l == 0x80000000U && !sx)) { + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* raise inexact if need be */ + if (frac) { + dummy = 1.0e30F; + dummy += 1.0e-30F; + } + + /* negate result if need be */ + if (sx) + l = -l; + return ((long) l); +} +#elif defined(__i386) +long +lrintl(long double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one ought to just use the fistpl instruction + * instead. + */ + union { + unsigned i[3]; + long double e; + } xx, yy; + int ex; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + if (ex < 0x403e) { /* |x| < 2^63 */ + /* add and subtract a power of two to round x to an integer */ + yy.i[2] = (xx.i[2] & 0x8000) | 0x403e; + yy.i[1] = 0x80000000; + yy.i[0] = 0; + x = (x + yy.e) - yy.e; + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lround.c b/usr/src/libm/src/m9x/lround.c new file mode 100644 index 0000000..c009156 --- /dev/null +++ b/usr/src/libm/src/m9x/lround.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lround.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lround = __lround +#endif + +/* + * lround(x) rounds its argument to the nearest integer, rounding ties + * away from zero, and converts the result to a 32 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 32 bits, the invalid operation exception is raised. + */ + +#include /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lround(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) { /* |x| < 1 */ + if (hx >= 0x3fe00000) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + } + + /* now x is nan, inf, or integral */ + return ((long) xx.d); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lroundf.c b/usr/src/libm/src/m9x/lroundf.c new file mode 100644 index 0000000..a00652c --- /dev/null +++ b/usr/src/libm/src/m9x/lroundf.c @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lroundf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lroundf = __lroundf +#endif + +#include /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lroundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) { /* |x| < 1 */ + if (hx >= 0x3f000000) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + return ((long) xx.f); + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lroundl.c b/usr/src/libm/src/m9x/lroundl.c new file mode 100644 index 0000000..bb931d4 --- /dev/null +++ b/usr/src/libm/src/m9x/lroundl.c @@ -0,0 +1,144 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lroundl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lroundl = __lroundl +#endif + +#include /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +#if defined(__sparc) +long +lroundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac, l; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1L : 1L); + return (0L); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); /* 91 <= j <= 112 */ + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { /* 96 <= j <= 112 */ + l = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else { /* 91 <= j <= 95 */ + l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64)); + frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] << (96 - j)) | xx.i[3]) + frac |= 1; + } + + /* round */ + if (frac >= 0x80000000U) + l++; + + /* check for result out of range (note that z is |x| at this point) */ + if (l > 0x80000000U || (l == 0x80000000U && !sx)) { + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* negate result if need be */ + if (sx) + l = -l; + return ((long) l); +} +#elif defined(__i386) +long +lroundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) { /* |x| < 1 */ + if (ex >= 0x3ffe) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + } + + /* now x is nan, inf, or integral */ + return ((long) xx.e); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/modf.c b/usr/src/libm/src/m9x/modf.c new file mode 100644 index 0000000..66ce2bd --- /dev/null +++ b/usr/src/libm/src/m9x/modf.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)modf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak modf = __modf +#pragma weak _modf = __modf +#endif + +/* + * modf(x, iptr) decomposes x into an integral part and a fractional + * part both having the same sign as x. It stores the integral part + * in *iptr and returns the fractional part. + * + * If x is infinite, modf sets *iptr to x and returns copysign(0.0,x). + * If x is NaN, modf sets *iptr to x and returns x. + * + * If x is a signaling NaN, this code does not attempt to raise the + * invalid operation exception. + */ + +#include "libm.h" + +double +__modf(double x, double *iptr) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx, s; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx >= 0x43300000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7ff00000 || (hx == 0x7ff00000 && + xx.i[LOWORD] == 0)) { + xx.i[HIWORD] &= 0x80000000; + xx.i[LOWORD] = 0; + } + return (xx.d); + } + + if (hx < 0x3ff00000) { /* |x| < 1 */ + xx.i[HIWORD] &= 0x80000000; + xx.i[LOWORD] = 0; + *iptr = xx.d; + return (x); + } + + /* split x at the binary point */ + s = xx.i[HIWORD] & 0x80000000; + if (hx < 0x41400000) { + yy.i[HIWORD] = xx.i[HIWORD] & ~((1 << (0x413 - (hx >> 20))) - + 1); + yy.i[LOWORD] = 0; + } else { + yy.i[HIWORD] = xx.i[HIWORD]; + yy.i[LOWORD] = xx.i[LOWORD] & ~((1 << (0x433 - (hx >> 20))) - + 1); + } + *iptr = yy.d; + xx.d -= yy.d; + xx.i[HIWORD] = (xx.i[HIWORD] & ~0x80000000) | s; + /* keep sign of x */ + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/modff.c b/usr/src/libm/src/m9x/modff.c new file mode 100644 index 0000000..4931cc3 --- /dev/null +++ b/usr/src/libm/src/m9x/modff.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)modff.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak modff = __modff +#pragma weak _modff = __modff +#endif + +#include "libm.h" + +float +__modff(float x, float *iptr) { + union { + unsigned i; + float f; + } xx, yy; + unsigned hx, s; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx >= 0x4b000000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx <= 0x7f800000) + xx.i &= 0x80000000; + return (xx.f); + } + + if (hx < 0x3f800000) { /* |x| < 1 */ + xx.i &= 0x80000000; + *iptr = xx.f; + return (x); + } + + /* split x at the binary point */ + s = xx.i & 0x80000000; + yy.i = xx.i & ~((1 << (0x96 - (hx >> 23))) - 1); + *iptr = yy.f; + xx.f -= yy.f; + xx.i = (xx.i & ~0x80000000) | s; + /* restore sign in case difference is 0 */ + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/modfl.c b/usr/src/libm/src/m9x/modfl.c new file mode 100644 index 0000000..134dba8 --- /dev/null +++ b/usr/src/libm/src/m9x/modfl.c @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)modfl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak modfl = __modfl +#endif + +#include "libm.h" + +#if defined(__sparc) + +long double +__modfl(long double x, long double *iptr) { + union { + unsigned i[4]; + long double q; + } xx, yy; + unsigned hx, s; + + xx.q = x; + hx = xx.i[0] & ~0x80000000; + + if (hx >= 0x406f0000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7fff0000 || (hx == 0x7fff0000 && + (xx.i[1] | xx.i[2] | xx.i[3]) == 0)) { + xx.i[0] &= 0x80000000; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } + return (xx.q); + } + + if (hx < 0x3fff0000) { /* |x| < 1 */ + xx.i[0] &= 0x80000000; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + *iptr = xx.q; + return (x); + } + + /* split x at the binary point */ + s = xx.i[0] & 0x80000000; + if (hx < 0x40100000) { + yy.i[0] = xx.i[0] & ~((1 << (0x400f - (hx >> 16))) - 1); + yy.i[1] = yy.i[2] = yy.i[3] = 0; + } else if (hx < 0x40300000) { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1] & ~((1 << (0x402f - (hx >> 16))) - 1); + yy.i[2] = yy.i[3] = 0; + } else if (hx < 0x40500000) { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1]; + yy.i[2] = xx.i[2] & ~((1 << (0x404f - (hx >> 16))) - 1); + yy.i[3] = 0; + } else { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1]; + yy.i[2] = xx.i[2]; + yy.i[3] = xx.i[3] & ~((1 << (0x406f - (hx >> 16))) - 1); + } + *iptr = yy.q; + + /* + * we could implement the following more efficiently than by using + * software emulation of fsubq, but we'll do it this way for now + * (and hope hardware support becomes commonplace) + */ + xx.q -= yy.q; + xx.i[0] = (xx.i[0] & ~0x80000000) | s; /* keep sign of x */ + return (xx.q); +} + +#elif defined(__i386) + +long double +__modfl(long double x, long double *iptr) { + union { + unsigned i[3]; + long double e; + } xx, yy; + unsigned hx, s; + + /* + * It might be faster to use one of the x86 fpops instead of + * the following. + */ + xx.e = x; + hx = xx.i[2] & 0x7fff; + + if (hx >= 0x403e) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7fff || (hx == 0x7fff && + ((xx.i[1] << 1) | xx.i[0]) == 0)) { + xx.i[2] &= 0x8000; + xx.i[1] = xx.i[0] = 0; + } + return (xx.e); + } + + if (hx < 0x3fff) { /* |x| < 1 */ + xx.i[2] &= 0x8000; + xx.i[1] = xx.i[0] = 0; + *iptr = xx.e; + return (x); + } + + /* split x at the binary point */ + s = xx.i[2] & 0x8000; + yy.i[2] = xx.i[2]; + if (hx < 0x401f) { + yy.i[1] = xx.i[1] & ~((1 << (0x401e - hx)) - 1); + yy.i[0] = 0; + } else { + yy.i[1] = xx.i[1]; + yy.i[0] = xx.i[0] & ~((1 << (0x403e - hx)) - 1); + } + *iptr = yy.e; + xx.e -= yy.e; + xx.i[2] = (xx.i[2] & ~0x8000) | s; /* keep sign of x */ + return (xx.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nan.c b/usr/src/libm/src/m9x/nan.c new file mode 100644 index 0000000..6a994ca --- /dev/null +++ b/usr/src/libm/src/m9x/nan.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nan.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nan = __nan +#endif + +/* + * nan(c) returns a NaN. This implementation ignores c. + */ + +#include "libm.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} __nan_union = { 0x7fffffff, 0xffffffff }; + +#elif defined(__i386) + +static const union { + unsigned i[2]; + double d; +} __nan_union = { 0xffffffff, 0x7fffffff }; + +#else +#error Unknown architecture +#endif + +/* ARGSUSED0 */ +double +__nan(const char *c) { + return (__nan_union.d); +} diff --git a/usr/src/libm/src/m9x/nanf.c b/usr/src/libm/src/m9x/nanf.c new file mode 100644 index 0000000..08a029e --- /dev/null +++ b/usr/src/libm/src/m9x/nanf.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nanf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nanf = __nanf +#endif + +#include "libm.h" + +static const union { + unsigned i; + float f; +} __nanf_union = { 0x7fffffff }; + +/* ARGSUSED0 */ +float +__nanf(const char *c) { + return (__nanf_union.f); +} diff --git a/usr/src/libm/src/m9x/nanl.c b/usr/src/libm/src/m9x/nanl.c new file mode 100644 index 0000000..cf00010 --- /dev/null +++ b/usr/src/libm/src/m9x/nanl.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nanl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nanl = __nanl +#endif + +#include "libm.h" + +#if defined(__sparc) + +static const union { + unsigned i[4]; + long double ld; +} __nanl_union = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +#elif defined(__i386) + +static const union { + unsigned i[3]; + long double ld; +} __nanl_union = { 0xffffffff, 0xffffffff, 0x7fff }; + +#else +#error Unknown architecture +#endif + +/* ARGSUSED0 */ +long double +__nanl(const char *c) { + return (__nanl_union.ld); +} diff --git a/usr/src/libm/src/m9x/nearbyint.c b/usr/src/libm/src/m9x/nearbyint.c new file mode 100644 index 0000000..d977522 --- /dev/null +++ b/usr/src/libm/src/m9x/nearbyint.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nearbyint.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nearbyint = __nearbyint +#endif + +/* + * nearbyint(x) returns the nearest fp integer to x in the direction + * corresponding to the current rounding direction without raising + * the inexact exception. + * + * nearbyint(x) is x unchanged if x is +/-0 or +/-inf. If x is NaN, + * nearbyint(x) is also NaN. + */ + +#include "libm.h" +#include "fenv_synonyms.h" +#include + +double +__nearbyint(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i, frac; + int rm, j; + + xx.d = x; + sx = xx.i[HIWORD] & 0x80000000; + hx = xx.i[HIWORD] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x43300000) { /* x is nan, inf, or already integral */ + if (hx >= 0x7ff00000) /* x is inf or nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + return (x); + } else if ((hx | xx.i[LOWORD]) == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + rm = fegetround(); + + /* flip the sense of directed roundings if x is negative */ + if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD)) + rm = (FE_UPWARD + FE_DOWNWARD) - rm; + + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (hx >= 0x3fe00000 && ((hx & 0xfffff) | xx.i[LOWORD])))) + xx.i[HIWORD] = sx | 0x3ff00000; + else + xx.i[HIWORD] = sx; + xx.i[LOWORD] = 0; + return (xx.d); + } + + /* round x at the integer bit */ + j = 0x433 - (hx >> 20); + if (j >= 32) { + i = 1 << (j - 32); + frac = ((xx.i[HIWORD] << 1) << (63 - j)) | + (xx.i[LOWORD] >> (j - 32)); + if (xx.i[LOWORD] & (i - 1)) + frac |= 1; + if (!frac) + return (x); + xx.i[LOWORD] = 0; + xx.i[HIWORD] &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (frac > 0x80000000u || (frac == 0x80000000) && + (xx.i[HIWORD] & i)))) + xx.i[HIWORD] += i; + } else { + i = 1 << j; + frac = (xx.i[LOWORD] << 1) << (31 - j); + if (!frac) + return (x); + xx.i[LOWORD] &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (frac > 0x80000000u || (frac == 0x80000000) && + (xx.i[LOWORD] & i)))) { + xx.i[LOWORD] += i; + if (xx.i[LOWORD] == 0) + xx.i[HIWORD]++; + } + } + return (xx.d); +} + +#if 0 + +/* +* Alternate implementations for SPARC, x86, using fp ops. These may +* be faster depending on how expensive saving and restoring the fp +* modes and status flags is. +*/ + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +double +__nearbyint(double x) { + union { + unsigned i[2]; + double d; + } xx, yy; + double z; + unsigned hx, sx, fsr, oldfsr; + int rm; + + xx.d = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x43300000) /* x is nan, inf, or already integral */ + return (x + 0.0); + else if ((hx | xx.i[1]) == 0) /* x is zero */ + return (x); + + /* save the fsr */ + __fenv_getfsr(&oldfsr); + + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + /* flip the sense of directed roundings if x is negative */ + rm = oldfsr >> 30; + if (sx) + rm ^= rm >> 1; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3fe00000 && + ((hx & 0xfffff) | xx.i[1])))) + xx.i[0] = sx | 0x3ff00000; + else + xx.i[0] = sx; + xx.i[1] = 0; + return (xx.d); + } + + /* clear the inexact trap */ + fsr = oldfsr & ~FSR_NXM; + __fenv_setfsr(&fsr); + + /* round x at the integer bit */ + yy.i[0] = sx | 0x43300000; + yy.i[1] = 0; + z = (x + yy.d) - yy.d; + + /* restore the old fsr */ + __fenv_setfsr(&oldfsr); + + return (z); +} + +#elif defined(__i386) + +/* inline template */ +extern long double frndint(long double); + +double +__nearbyint(double x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint((long double) x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + /* note: the value of z is representable in double precision */ + return (z); +} + +#else +#error Unknown architecture +#endif + +#endif diff --git a/usr/src/libm/src/m9x/nearbyintf.c b/usr/src/libm/src/m9x/nearbyintf.c new file mode 100644 index 0000000..2251c89 --- /dev/null +++ b/usr/src/libm/src/m9x/nearbyintf.c @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nearbyintf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nearbyintf = __nearbyintf +#endif + +#include "libm.h" +#include "fenv_synonyms.h" +#include + +float +__nearbyintf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i, frac; + int rm; + + xx.f = x; + sx = xx.i & 0x80000000; + hx = xx.i & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x4b000000) { /* x is nan, inf, or already integral */ + if (hx > 0x7f800000) /* x is nan */ + return (x * x); /* + -> * for Cheetah */ + return (x); + } else if (hx == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + rm = fegetround(); + + /* flip the sense of directed roundings if x is negative */ + if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD)) + rm = (FE_UPWARD + FE_DOWNWARD) - rm; + + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + if (rm == FE_UPWARD || (rm == FE_TONEAREST && hx > 0x3f000000)) + xx.i = sx | 0x3f800000; + else + xx.i = sx; + return (xx.f); + } + + /* round x at the integer bit */ + i = 1 << (0x96 - (hx >> 23)); + frac = hx & (i - 1); + if (!frac) + return (x); + + hx &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && (frac > (i >> 1) || + (frac == (i >> 1)) && (hx & i)))) + xx.i = sx | (hx + i); + else + xx.i = sx | hx; + return (xx.f); +} + +#if 0 + +/* + * Alternate implementations for SPARC, x86, using fp ops. These may + * be faster depending on how expensive saving and restoring the fp + * modes and status flags is. + */ + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +float +__nearbyintf(float x) { + union { + unsigned i; + float f; + } xx, yy; + float z; + unsigned hx, sx, fsr, oldfsr; + int rm; + + xx.f = x; + sx = xx.i & 0x80000000; + hx = xx.i & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x4b000000) /* x is nan, inf, or already integral */ + return (x + 0.0f); + else if (hx == 0) /* x is zero */ + return (x); + + /* save the fsr */ + __fenv_getfsr(&oldfsr); + + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + /* flip the sense of directed roundings if x is negative */ + rm = oldfsr >> 30; + if (sx) + rm ^= rm >> 1; + if (rm == FSR_RP || (rm == FSR_RN && hx > 0x3f000000)) + xx.i = sx | 0x3f800000; + else + xx.i = sx; + return (xx.f); + } + + /* clear the inexact trap */ + fsr = oldfsr & ~FSR_NXM; + __fenv_setfsr(&fsr); + + /* round x at the integer bit */ + yy.i = sx | 0x4b000000; + z = (x + yy.f) - yy.f; + + /* restore the old fsr */ + __fenv_setfsr(&oldfsr); + + return (z); +} + +#elif defined(__i386) + +/* inline template */ +extern long double frndint(long double); + +float +__nearbyintf(float x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint((long double) x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + /* note: the value of z is representable in single precision */ + return (z); +} + +#else +#error Unknown architecture +#endif + +#endif diff --git a/usr/src/libm/src/m9x/nearbyintl.c b/usr/src/libm/src/m9x/nearbyintl.c new file mode 100644 index 0000000..98def46 --- /dev/null +++ b/usr/src/libm/src/m9x/nearbyintl.c @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nearbyintl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nearbyintl = __nearbyintl +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +static union { + unsigned i; + float f; +} snan = { 0x7f800001 }; + +long double +__nearbyintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx, i, frac, fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) { /* x is nan, inf, or already integral */ + /* check for signaling nan */ + if ((hx > 0x7fff0000 || (hx == 0x7fff0000 && + (xx.i[1] | xx.i[2] | xx.i[3]))) && !(hx & 0x8000)) { + dummy = snan.f; + dummy += snan.f; + xx.i[0] = sx | hx | 0x8000; + } + return (xx.q); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + __fenv_getfsr(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + xx.i[0] = sx | 0x3fff0000; + else + xx.i[0] = sx; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + return (xx.q); + } + + /* round x at the integer bit */ + j = 0x406f - (hx >> 16); + if (j >= 96) { + i = 1 << (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if ((xx.i[1] & (i - 1)) | xx.i[2] | xx.i[3]) + frac |= 1; + if (!frac) + return (x); + xx.i[1] = xx.i[2] = xx.i[3] = 0; + xx.i[0] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[0] & i))))) + xx.i[0] += i; + } else if (j >= 64) { + i = 1 << (j - 64); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] & (i - 1)) | xx.i[3]) + frac |= 1; + if (!frac) + return (x); + xx.i[2] = xx.i[3] = 0; + xx.i[1] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[1] & i))))) { + xx.i[1] += i; + if (xx.i[1] == 0) + xx.i[0]++; + } + } else if (j >= 32) { + i = 1 << (j - 32); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if (xx.i[3] & (i - 1)) + frac |= 1; + if (!frac) + return (x); + xx.i[3] = 0; + xx.i[2] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[2] & i))))) { + xx.i[2] += i; + if (xx.i[2] == 0) + if (++xx.i[1] == 0) + xx.i[0]++; + } + } else { + i = 1 << j; + frac = (xx.i[3] << 1) << (31 - j); + if (!frac) + return (x); + xx.i[3] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[3] & i))))) { + xx.i[3] += i; + if (xx.i[3] == 0) + if (++xx.i[2] == 0) + if (++xx.i[1] == 0) + xx.i[0]++; + } + } + + return (xx.q); +} + +#elif defined(__i386) + +/* inline template */ +extern long double frndint(long double); + +long double +__nearbyintl(long double x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint(x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + return (z); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nexttoward.c b/usr/src/libm/src/m9x/nexttoward.c new file mode 100644 index 0000000..d9bbb55 --- /dev/null +++ b/usr/src/libm/src/m9x/nexttoward.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nexttoward.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nexttoward = __nexttoward +#endif + +/* + * nexttoward(x, y) delivers the next representable number after x + * in the direction of y. If x and y are both zero, the result is + * zero with the same sign as y. If either x or y is NaN, the result + * is NaN. + * + * If x != y and the result is infinite, overflow is raised; if + * x != y and the result is subnormal or zero, underflow is raised. + * (This is wrong, but it's what C99 apparently wants.) + */ + +#include "libm.h" + +#if defined(__sparc) + +static union { + unsigned i[2]; + double d; +} C[] = { + 0x00100000, 0, + 0x7fe00000, 0, + 0x7fffffff, 0xffffffff +}; + +#define tiny C[0].d +#define huge C[1].d +#define qnan C[2].d + +enum fcc_type { + fcc_equal = 0, + fcc_less = 1, + fcc_greater = 2, + fcc_unordered = 3 +}; + +#ifdef __sparcv9 +#define _Q_cmp _Qp_cmp +#endif + +extern enum fcc_type _Q_cmp(const long double *, const long double *); + +double +__nexttoward(double x, long double y) { + union { + unsigned i[2]; + double d; + } xx; + union { + unsigned i[4]; + long double q; + } yy; + long double lx; + unsigned hx; + volatile double dummy; + enum fcc_type rel; + + /* + * It would be somewhat more efficient to check for NaN and + * zero operands before converting x to long double and then + * to code the comparison in line rather than calling _Q_cmp. + * However, since this code probably won't get used much, + * I'm opting in favor of simplicity instead. + */ + lx = xx.d = x; + hx = (xx.i[0] & ~0x80000000) | xx.i[1]; + + /* check for each of four possible orderings */ + rel = _Q_cmp(&lx, &y); + if (rel == fcc_unordered) + return (qnan); + + if (rel == fcc_equal) { + if (hx == 0) { /* x is zero; return zero with y's sign */ + yy.q = y; + xx.i[0] = yy.i[0]; + return (xx.d); + } + return (x); + } + + if (rel == fcc_less) { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0; + xx.i[1] = 0x00000001; + } else if ((int)xx.i[0] >= 0) { /* x is positive */ + if (++xx.i[1] == 0) + xx.i[0]++; + } else { + if (xx.i[1]-- == 0) + xx.i[0]--; + } + } else { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x80000000; + xx.i[1] = 0x00000001; + } else if ((int)xx.i[0] >= 0) { /* x is positive */ + if (xx.i[1]-- == 0) + xx.i[0]--; + } else { + if (++xx.i[1] == 0) + xx.i[0]++; + } + } + + /* raise exceptions as needed */ + hx = xx.i[0] & ~0x80000000; + if (hx == 0x7ff00000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00100000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.d); +} + +#elif defined(__i386) + +static union { + unsigned i[2]; + double d; +} C[] = { + 0, 0x00100000, + 0, 0x7fe00000, +}; + +#define tiny C[0].d +#define huge C[1].d + +double +__nexttoward(double x, long double y) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx; + long double lx; + volatile double dummy; + + lx = xx.d = x; + hx = (xx.i[1] & ~0x80000000) | xx.i[0]; + + /* check for each of four possible orderings */ + if (isunordered(lx, y)) + return ((double) (lx + y)); + + if (lx == y) + return ((double) y); + + if (lx < y) { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x00000001; + xx.i[1] = 0; + } else if ((int)xx.i[1] >= 0) { /* x is positive */ + if (++xx.i[0] == 0) + xx.i[1]++; + } else { + if (xx.i[0]-- == 0) + xx.i[1]--; + } + } else { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x00000001; + xx.i[1] = 0x80000000; + } else if ((int)xx.i[1] >= 0) { /* x is positive */ + if (xx.i[0]-- == 0) + xx.i[1]--; + } else { + if (++xx.i[0] == 0) + xx.i[1]++; + } + } + + /* raise exceptions as needed */ + hx = xx.i[1] & ~0x80000000; + if (hx == 0x7ff00000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00100000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.d); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nexttowardf.c b/usr/src/libm/src/m9x/nexttowardf.c new file mode 100644 index 0000000..0bf8a05 --- /dev/null +++ b/usr/src/libm/src/m9x/nexttowardf.c @@ -0,0 +1,184 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nexttowardf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nexttowardf = __nexttowardf +#endif + +#include "libm.h" + +static union { + unsigned i; + float f; +} C[] = { + 0x00800000, + 0x7f000000, + 0x7fffffff +}; + +#define tiny C[0].f +#define huge C[1].f +#define qnan C[2].f + +#if defined(__sparc) + +enum fcc_type { + fcc_equal = 0, + fcc_less = 1, + fcc_greater = 2, + fcc_unordered = 3 +}; + +#ifdef __sparcv9 +#define _Q_cmp _Qp_cmp +#endif + +extern enum fcc_type _Q_cmp(const long double *, const long double *); + +float +__nexttowardf(float x, long double y) { + union { + unsigned i; + float f; + } xx; + union { + unsigned i[4]; + long double q; + } yy; + long double lx; + unsigned hx; + volatile float dummy; + enum fcc_type rel; + + /* + * It would be somewhat more efficient to check for NaN and + * zero operands before converting x to long double and then + * to code the comparison in line rather than calling _Q_cmp. + * However, since this code probably won't get used much, + * I'm opting in favor of simplicity instead. + */ + lx = xx.f = x; + hx = xx.i & ~0x80000000; + + /* check for each of four possible orderings */ + rel = _Q_cmp(&lx, &y); + if (rel == fcc_unordered) + return (qnan); + + if (rel == fcc_equal) { + if (hx == 0) { /* x is zero; return zero with y's sign */ + yy.q = y; + xx.i = yy.i[0]; + return (xx.f); + } + return (x); + } + + if (rel == fcc_less) { + if (hx == 0) /* x is zero */ + xx.i = 0x00000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i++; + else + xx.i--; + } else { + if (hx == 0) /* x is zero */ + xx.i = 0x80000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i--; + else + xx.i++; + } + + /* raise exceptions as needed */ + hx = xx.i & ~0x80000000; + if (hx == 0x7f800000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00800000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.f); +} + +#elif defined(__i386) + +float +__nexttowardf(float x, long double y) { + union { + unsigned i; + float f; + } xx; + unsigned hx; + long double lx; + volatile float dummy; + + lx = xx.f = x; + hx = xx.i & ~0x80000000; + + /* check for each of four possible orderings */ + if (isunordered(lx, y)) + return ((float) (lx + y)); + + if (lx == y) + return ((float) y); + + if (lx < y) { + if (hx == 0) /* x is zero */ + xx.i = 0x00000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i++; + else + xx.i--; + } else { + if (hx == 0) /* x is zero */ + xx.i = 0x80000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i--; + else + xx.i++; + } + + /* raise exceptions as needed */ + hx = xx.i & ~0x80000000; + if (hx == 0x7f800000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00800000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.f); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nexttowardl.c b/usr/src/libm/src/m9x/nexttowardl.c new file mode 100644 index 0000000..4578738 --- /dev/null +++ b/usr/src/libm/src/m9x/nexttowardl.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nexttowardl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nexttowardl = __nexttowardl +#endif + +#include "libm.h" +#include /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__i386) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nexttowardl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/libm/src/m9x/regset.h b/usr/src/libm/src/m9x/regset.h new file mode 100644 index 0000000..54c9306 --- /dev/null +++ b/usr/src/libm/src/m9x/regset.h @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Parts of Solaris 10 x86 /usr/include/sys/regset.h + */ + +#ifndef _SYS_REGSET_H +#define _SYS_REGSET_H + +#pragma ident "@(#)regset.h 1.3 06/01/31 SMI" + +#include + +typedef union { + long double _q; + uint32_t _l[4]; +} myupad128_t; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The names and offsets defined here are specified by i386 ABI suppl. + */ + +#define SS 18 /* only stored on a privilege transition */ +#define UESP 17 /* only stored on a privilege transition */ +#define EFL 16 +#define CS 15 +#define EIP 14 +#define ERR 13 +#define TRAPNO 12 +#define EAX 11 +#define ECX 10 +#define EDX 9 +#define EBX 8 +#define ESP 7 +#define EBP 6 +#define ESI 5 +#define EDI 4 +#define DS 3 +#define ES 2 +#define FS 1 +#define GS 0 + +/* aliases for portability */ + +#define REG_PC EIP +#define REG_FP EBP +#define REG_SP UESP +#define REG_PS EFL +#define REG_R0 EAX +#define REG_R1 EDX + +/* + * A gregset_t is defined as an array type for compatibility with the reference + * source. This is important due to differences in the way the C language + * treats arrays and structures as parameters. + */ +#define _NGREG 19 + +typedef int greg_t; +typedef greg_t gregset_t[_NGREG]; + +/* + * This definition of the floating point structure is binary + * compatible with the Intel386 psABI definition, and source + * compatible with that specification for x87-style floating point. + * It also allows SSE/SSE2 state to be accessed on machines that + * possess such hardware capabilities. + */ +typedef struct fpu { + union { + struct fpchip_state { + uint32_t state[27]; /* 287/387 saved state */ + uint32_t status; /* saved at exception */ + uint32_t mxcsr; /* SSE control and status */ + uint32_t xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + myupad128_t xmm[8]; /* %xmm0-%xmm7 */ + } fpchip_state; + struct fp_emul_space { /* for emulator(s) */ + uint8_t fp_emul[246]; + uint8_t fp_epad[2]; + } fp_emul_space; + uint32_t f_fpregs[95]; /* union of the above */ + } fp_reg_set; +} fpregset_t; + +/* + * Structure mcontext defines the complete hardware machine state. + * (This structure is specified in the i386 ABI suppl.) + */ +typedef struct { + gregset_t gregs; /* general register set */ + fpregset_t fpregs; /* floating point register set */ +} mcontext_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_REGSET_H */ diff --git a/usr/src/libm/src/m9x/remquo.c b/usr/src/libm/src/m9x/remquo.c new file mode 100644 index 0000000..25d501e --- /dev/null +++ b/usr/src/libm/src/m9x/remquo.c @@ -0,0 +1,267 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remquo.c 1.10 06/01/31 SMI" + +#pragma weak remquo = __remquo + +/* INDENT OFF */ +/* + * double remquo(double x, double y, int *quo) return remainder(x,y) and an + * integer pointer quo such that *quo = N mod {2**31}, where N is the + * exact integral part of x/y rounded to nearest even. + * + * remquo call internal fmodquo + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include /* fabs() */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((int *) &x)[LOWORD] + +static const double one = 1.0, Zero[] = {0.0, -0.0}; + +static double +fmodquo(double x, double y, int *quo) { + int n, hx, hy, hz, ix, iy, sx, sq, i, m; + unsigned lx, ly, lz; + + hx = __HI(x); /* high word of x */ + lx = __LO(x); /* low word of x */ + hy = __HI(y); /* high word of y */ + ly = __LO(y); /* low word of y */ + sx = hx & 0x80000000; /* sign of x */ + sq = (hx ^ hy) & 0x80000000; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + *quo = 0; + if ((hy | ly) == 0 || hx >= 0x7ff00000 || /* y=0, or x !finite */ + (hy | ((ly | -ly) >> 31)) > 0x7ff00000) /* or y is NaN */ + return ((x * y) / (x * y)); + if (hx <= hy) { + if (hx < hy || lx < ly) + return (x); /* |x|<|y| return x */ + if (lx == ly) { + *quo = 1 + (sq >> 30); + /* |x|=|y| return x*0 */ + return (Zero[(unsigned) sx >> 31]); + } + } + + /* determine ix = ilogb(x) */ + if (hx < 0x00100000) { /* subnormal x */ + if (hx == 0) { + for (ix = -1043, i = lx; i > 0; i <<= 1) + ix -= 1; + } else { + for (ix = -1022, i = (hx << 11); i > 0; i <<= 1) + ix -= 1; + } + } else + ix = (hx >> 20) - 1023; + + /* determine iy = ilogb(y) */ + if (hy < 0x00100000) { /* subnormal y */ + if (hy == 0) { + for (iy = -1043, i = ly; i > 0; i <<= 1) + iy -= 1; + } else { + for (iy = -1022, i = (hy << 11); i > 0; i <<= 1) + iy -= 1; + } + } else + iy = (hy >> 20) - 1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if (ix >= -1022) + hx = 0x00100000 | (0x000fffff & hx); + else { /* subnormal x, shift x to normal */ + n = -1022 - ix; + if (n <= 31) { + hx = (hx << n) | (lx >> (32 - n)); + lx <<= n; + } else { + hx = lx << (n - 32); + lx = 0; + } + } + if (iy >= -1022) + hy = 0x00100000 | (0x000fffff & hy); + else { /* subnormal y, shift y to normal */ + n = -1022 - iy; + if (n <= 31) { + hy = (hy << n) | (ly >> (32 - n)); + ly <<= n; + } else { + hy = ly << (n - 32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + m = 0; + while (n--) { + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz -= 1; + if (hz < 0) { + hx = hx + hx + (lx >> 31); + lx = lx + lx; + } else { + m += 1; + if ((hz | lz) == 0) { /* return sign(x)*0 */ + if (n < 31) + m <<= 1 + n; + else + m = 0; + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + return (Zero[(unsigned) sx >> 31]); + } + hx = hz + hz + (lz >> 31); + lx = lz + lz; + } + m += m; + } + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz -= 1; + if (hz >= 0) { + hx = hz; + lx = lz; + m += 1; + } + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if ((hx | lx) == 0) { /* return sign(x)*0 */ + return (Zero[(unsigned) sx >> 31]); + } + while (hx < 0x00100000) { /* normalize x */ + hx = hx + hx + (lx >> 31); + lx = lx + lx; + iy -= 1; + } + if (iy >= -1022) { /* normalize output */ + hx = (hx - 0x00100000) | ((iy + 1023) << 20); + __HI(x) = hx | sx; + __LO(x) = lx; + } else { /* subnormal output */ + n = -1022 - iy; + if (n <= 20) { + lx = (lx >> n) | ((unsigned) hx << (32 - n)); + hx >>= n; + } else if (n <= 31) { + lx = (hx << (32 - n)) | (lx >> n); + hx = sx; + } else { + lx = hx >> (n - 32); + hx = sx; + } + __HI(x) = hx | sx; + __LO(x) = lx; + x *= one; /* create necessary signal */ + } + return (x); /* exact output */ +} + +#define zero Zero[0] + +double +remquo(double x, double y, int *quo) { + int hx, hy, sx, sq; + double v; + unsigned ly; + + hx = __HI(x); /* high word of x */ + hy = __HI(y); /* high word of y */ + ly = __LO(y); /* low word of y */ + sx = hx & 0x80000000; /* sign of x */ + sq = (hx ^ hy) & 0x80000000; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + *quo = 0; + if ((hy | ly) == 0 || hx >= 0x7ff00000 || /* y=0, or x !finite */ + (hy | ((ly | -ly) >> 31)) > 0x7ff00000) /* or y is NaN */ + return ((x * y) / (x * y)); + + y = fabs(y); + x = fabs(x); + if (hy <= 0x7fdfffff) { + x = fmodquo(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x00200000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = 0.5 * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/m9x/remquof.c b/usr/src/libm/src/m9x/remquof.c new file mode 100644 index 0000000..14a2f73 --- /dev/null +++ b/usr/src/libm/src/m9x/remquof.c @@ -0,0 +1,267 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remquof.c 1.10 06/01/31 SMI" + +#pragma weak remquof = __remquof + +/* INDENT OFF */ +/* + * float remquof(float x, float y, int *quo) return remainderf(x,y) and an + * integer pointer quo such that *quo = N mod (2**31), where N is the + * exact integeral part of x/y rounded to nearest even. + * + * remquof call internal fmodquof + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include +extern float fabsf(float); + +static const int + is = (int) 0x80000000, + im = 0x007fffff, + ii = 0x7f800000, + iu = 0x00800000; + +static const float zero = 0.0F, half = 0.5F; +/* INDENT ON */ + +static float +fmodquof(float x, float y, int *quo) { + float w; + int hx, ix, iy, iz, k, ny, nd, m, sq; + + hx = *(int *) &x; + ix = hx & 0x7fffffff; + iy = *(int *) &y; + sq = (iy ^ hx) & is; /* sign of x/y */ + iy &= 0x7fffffff; + + /* purge off exception values */ + *quo = 0; + if (ix >= ii || iy > ii || iy == 0) { + w = x * y; + w = w / w; + } else if (ix <= iy) { + if (ix < iy) + w = x; /* return x if |x|<|y| */ + else { + *quo = 1 + (sq >> 30); + w = zero * x; /* return sign(x)*0.0 */ + } + } else { + /* INDENT OFF */ + /* + * scale x,y to "normal" with + * ny = exponent of y + * nd = exponent of x minus exponent of y + */ + /* INDENT ON */ + ny = iy >> 23; + k = ix >> 23; + + /* special case for subnormal y or x */ + if (ny == 0) { + ny = 1; + while (iy < iu) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < iu) { + nd -= 1; + ix += ix; + } + } else + ix = iu | (ix & im); + } else { + nd = k - ny; + ix = iu | (ix & im); + iy = iu | (iy & im); + } + /* INDENT OFF */ + /* fix point fmod for normalized ix and iy */ + /* + * while (nd--) { + * iz = ix - iy; + * if (iz < 0) + * ix = ix + ix; + * else if (iz == 0) { + * *(int *) &w = is & hx; + * return w; + * } else + * ix = iz + iz; + * } + */ + /* INDENT ON */ + /* unroll the above loop 4 times to gain performance */ + m = 0; + k = nd >> 2; + nd -= (k << 2); + while (k--) { + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + if (iz == 0) { + iz = (k << 2) + nd; + if (iz < 32) + m <<= iz; + else + m = 0; + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + *(int *) &w = is & hx; + return (w); + } + } + while (nd--) { + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + } + /* end of unrolling */ + + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz; + } + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if (ix == 0) { + *(int *) &w = is & hx; + return (w); + } + while (ix < iu) { + ix += ix; + ny -= 1; + } + while (ix > (iu + iu)) { + ny += 1; + ix >>= 1; + } + if (ny > 0) + *(int *) &w = (is & hx) | (ix & im) | (ny << 23); + else { /* subnormal output */ + k = -ny + 1; + ix >>= k; + *(int *) &w = (is & hx) | ix; + } + } + return (w); +} + +float +remquof(float x, float y, int *quo) { + int hx, hy, sx, sq; + float v; + + hx = *(int *) &x; /* high word of x */ + hy = *(int *) &y; /* high word of y */ + sx = hx & is; /* sign of x */ + sq = (hx ^ hy) & is; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values: y is 0 or NaN, x is Inf or NaN */ + *quo = 0; + if (hx >= ii || hy > ii || hy == 0) { + v = x * y; + return (v / v); + } + + y = fabsf(y); + x = fabsf(x); + if (hy <= 0x7f7fffff) { + x = fmodquof(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x01000000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = half * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/m9x/remquol.c b/usr/src/libm/src/m9x/remquol.c new file mode 100644 index 0000000..5d24a86 --- /dev/null +++ b/usr/src/libm/src/m9x/remquol.c @@ -0,0 +1,344 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remquol.c 1.8 06/01/31 SMI" + +#pragma weak remquol = __remquol + +#include "libm.h" +#include "libm_synonyms.h" +#include /* fabsl */ +/* INDENT OFF */ +static const int + is = -0x7fffffff - 1, + im = 0x0000ffff, + iu = 0x00010000; + +static const long double zero = 0.0L, one = 1.0L; +/* INDENT ON */ + +#if defined(__sparc) +#define __H0(x) ((int *) &x)[0] +#define __H1(x) ((int *) &x)[1] +#define __H2(x) ((int *) &x)[2] +#define __H3(x) ((int *) &x)[3] +#else +#error Unsupported architecture +#endif + +/* + * On entrance: *quo is initialized to 0, x finite and y non-zero & ordered + */ +static long double +fmodquol(long double x, long double y, int *quo) { + long double a, b; + int n, ix, iy, k, sx, sq, m; + int hx; + int x0, y0, z0, carry; + unsigned x1, x2, x3, y1, y2, y3, z1, z2, z3; + + hx = __H0(x); + x1 = __H1(x); + x2 = __H2(x); + x3 = __H3(x); + y0 = __H0(y); + y1 = __H1(y); + y2 = __H2(y); + y3 = __H3(y); + + sx = hx & is; + sq = (hx ^ y0) & is; + x0 = hx ^ sx; + y0 &= ~0x80000000; + + a = fabsl(x); + b = fabsl(y); + if (a <= b) { + if (a < b) + return (x); + else { + *quo = 1 + (sq >> 30); + return (zero * x); + } + } + /* determine ix = ilogbl(x) */ + if (x0 < iu) { /* subnormal x */ + ix = 0; + ix = -16382; + while (x0 == 0) { + ix -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu) { + ix -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 <<= 1; + } + } else { + ix = (x0 >> 16) - 16383; + x0 = iu | (x0 & im); + } + + /* determine iy = ilogbl(y) */ + if (y0 < iu) { /* subnormal y */ + iy = -16382; + while (y0 == 0) { + iy -= 16; + y0 = y1 >> 16; + y1 = (y1 << 16) | (y2 >> 16); + y2 = (y2 << 16) | (y3 >> 16); + y3 = (y3 << 16); + } + while (y0 < iu) { + iy -= 1; + y0 = (y0 << 1) | (y1 >> 31); + y1 = (y1 << 1) | (y2 >> 31); + y2 = (y2 << 1) | (y3 >> 31); + y3 <<= 1; + } + } else { + iy = (y0 >> 16) - 16383; + y0 = iu | (y0 & im); + } + + + /* fix point fmod */ + n = ix - iy; + m = 0; + while (n--) { + while (x0 == 0 && n >= 16) { + m <<= 16; + n -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu && n >= 1) { + m += m; + n -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 = (x3 << 1); + } + carry = 0; + z3 = x3 - y3; + carry = z3 > x3; + if (carry == 0) { + z2 = x2 - y2; + carry = z2 > x2; + } else { + z2 = x2 - y2 - 1; + carry = z2 >= x2; + } + if (carry == 0) { + z1 = x1 - y1; + carry = z1 > x1; + } else { + z1 = x1 - y1 - 1; + carry = z1 >= x1; + } + z0 = x0 - y0 - carry; + if (z0 < 0) { /* double x */ + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + m += m; + } else { + m += 1; + if (z0 == 0) { + if ((z1 | z2 | z3) == 0) { + /* 0: we are done */ + if (n < 31) + m <<= (1 + n); + else + m = 0; + m &= ~0x80000000; + *quo = sq >= 0 ? m : -m; + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + } + /* x = z << 1 */ + z0 = z0 + z0 + ((z1 & is) != 0); + z1 = z1 + z1 + ((z2 & is) != 0); + z2 = z2 + z2 + ((z3 & is) != 0); + z3 = z3 + z3; + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + m += m; + } + } + carry = 0; + z3 = x3 - y3; + carry = z3 > x3; + if (carry == 0) { + z2 = x2 - y2; + carry = z2 > x2; + } else { + z2 = x2 - y2 - 1; + carry = z2 >= x2; + } + if (carry == 0) { + z1 = x1 - y1; + carry = z1 > x1; + } else { + z1 = x1 - y1 - 1; + carry = z1 >= x1; + } + z0 = x0 - y0 - carry; + if (z0 >= 0) { + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + m += 1; + } + m &= ~0x80000000; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if ((x0 | x1 | x2 | x3) == 0) { + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + while (x0 < iu) { + if (x0 == 0) { + iy -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } else { + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + iy -= 1; + } + } + + /* normalize output */ + if (iy >= -16382) { + __H0(a) = sx | (x0 - iu) | ((iy + 16383) << 16); + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + } else { /* subnormal output */ + n = -16382 - iy; + k = n & 31; + if (k <= 16) { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 >>= k; + } else { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 = 0; + } + while (n >= 32) { + n -= 32; + x3 = x2; + x2 = x1; + x1 = x0; + x0 = 0; + } + __H0(a) = x0 | sx; + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + a *= one; + } + return (a); +} + +long double +remquol(long double x, long double y, int *quo) { + int hx, hy, sx, sq; + long double v; + + hx = __H0(x); /* high word of x */ + hy = __H0(y); /* high word of y */ + sx = hx & is; /* sign of x */ + sq = (hx ^ hy) & is; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= ~0x80000000; + + /* purge off exception values */ + *quo = 0; + /* y=0, y is NaN, x is NaN or inf */ + if (y == 0.0L || y != y || hx >= 0x7fff0000) + return ((x * y) / (x * y)); + + y = fabsl(y); + x = fabsl(x); + if (hy <= 0x7ffdffff) { + x = fmodquol(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x00020000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = 0.5L * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/m9x/round.c b/usr/src/libm/src/m9x/round.c new file mode 100644 index 0000000..f635830 --- /dev/null +++ b/usr/src/libm/src/m9x/round.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)round.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak round = __round +#endif + +#include "libm.h" + +double +round(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) { /* |x| < 1 */ + if (hx >= 0x3fe00000) + return (sx ? -1.0 : 1.0); + return (sx ? -0.0 : 0.0); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + return (xx.d); + } else if (hx < 0x7ff00000) + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/roundf.c b/usr/src/libm/src/m9x/roundf.c new file mode 100644 index 0000000..6d6adb7 --- /dev/null +++ b/usr/src/libm/src/m9x/roundf.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)roundf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak roundf = __roundf +#endif + +#include "libm.h" + +float +roundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) { /* |x| < 1 */ + if (hx >= 0x3f000000) + return (sx ? -1.0F : 1.0F); + return (sx ? -0.0F : 0.0F); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + return (xx.f); + } else if (hx < 0x7f800000) /* |x| is integral */ + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx > 0x7f800000 ? x * x : x + x); +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/roundl.c b/usr/src/libm/src/m9x/roundl.c new file mode 100644 index 0000000..c4859b2 --- /dev/null +++ b/usr/src/libm/src/m9x/roundl.c @@ -0,0 +1,165 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)roundl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak roundl = __roundl +#endif + +#include "libm.h" + +#if defined(__sparc) +long double +roundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx, v; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */ + return (hx >= 0x7fff0000 ? x + x : x); + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1.0L : 1.0L); + return (sx ? -0.0L : 0.0L); + } + + xx.i[0] = hx; + j = 0x406f - (hx >> 16); /* 1 <= j <= 112 */ + if (j >= 96) { /* 96 <= j <= 112 */ + v = (1U << (j - 96)) >> 1; + if (v) { + if (xx.i[0] & v) + xx.i[0] += v; + xx.i[0] &= ~(v - 1); + } else if (xx.i[1] & 0x80000000) + ++xx.i[0]; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } else if (j >= 64) { /* 64 <= j <= 95 */ + v = (1U << (j - 64)) >> 1; + if (v) { + if (xx.i[1] & v) { + xx.i[1] += v; + if (xx.i[1] < v) + ++xx.i[0]; + } + xx.i[1] &= ~(v - 1); + } else if (xx.i[2] & 0x80000000) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + xx.i[2] = xx.i[3] = 0; + } else if (j >= 32) { /* 32 <= j <= 63 */ + v = (1U << (j - 32)) >> 1; + if (v) { + if (xx.i[2] & v) { + xx.i[2] += v; + if (xx.i[2] < v) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + xx.i[2] &= ~(v - 1); + } else if (xx.i[3] & 0x80000000) { + if (++xx.i[2] == 0) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + xx.i[3] = 0; + } else { /* 1 <= j <= 31 */ + v = 1U << (j - 1); + if (xx.i[3] & v) { + xx.i[3] += v; + if (xx.i[3] < v) { + if (++xx.i[2] == 0) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + } + xx.i[3] &= ~(v - 1); + } + + /* negate result if need be */ + if (sx) + xx.i[0] |= 0x80000000; + return (xx.q); +} +#elif defined(__i386) +long double +roundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) { /* |x| < 1 */ + if (ex >= 0x3ffe) + return (sx ? -1.0L : 1.0L); + return (sx ? -0.0L : 0.0L); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + return (xx.e); + } else if (ex < 0x7fff) /* x is integral */ + return (x); + else /* inf or nan */ + return (x + x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/m9x/scalbln.c b/usr/src/libm/src/m9x/scalbln.c new file mode 100644 index 0000000..731d531 --- /dev/null +++ b/usr/src/libm/src/m9x/scalbln.c @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbln.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalbln = __scalbln +#endif + +#include "libm.h" +#include /* DBL_MAX, DBL_MIN */ + +static const double twom54 = 5.5511151231257827021181583404541015625e-17; +#if defined(USE_FPSCALE) || defined(__i386) +static const double two52 = 4503599627370496.0; +#else +/* + * Normalize non-zero subnormal x and return biased exponent of x in [-51,0] + */ +static int +ilogb_biased(unsigned *px) { + int s = 52; + unsigned v = px[HIWORD] & ~0x80000000, w = px[LOWORD], t = v; + + if (t) + s -= 32; + else + t = w; + if (t & 0xffff0000) + s -= 16, t >>= 16; + if (t & 0xff00) + s -= 8, t >>= 8; + if (t & 0xf0) + s -= 4, t >>= 4; + t <<= 1; + s -= (0xffffaa50 >> t) & 0x3; + if (s < 32) { + v = (v << s) | w >> (32 - s); + w <<= s; + } else { + v = w << (s - 32); + w = 0; + } + px[HIWORD] = (px[HIWORD] & 0x80000000) | v; + px[LOWORD] = w; + return (1 - s); +} +#endif /* defined(USE_FPSCALE) */ + +double +scalbln(double x, long n) { + int *px = (int *) &x, ix, k; + + ix = px[HIWORD] & ~0x80000000; + k = ix >> 20; + if (k == 0x7ff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ((px[HIWORD] & 0x80000) != 0 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + if ((px[LOWORD] | ix) == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two52; + k = ((px[HIWORD] & ~0x80000000) >> 20) - 52; +#else + k = ilogb_biased((unsigned *) px); +#endif + } + k += (int) n; + if (n > 5000 || k > 0x7fe) + return (DBL_MAX * copysign(DBL_MAX, x)); + if (n < -5000 || k <= -54) + return (DBL_MIN * copysign(DBL_MIN, x)); + if (k > 0) { + px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20); + return (x); + } + k += 54; + px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20); + return (x * twom54); +} diff --git a/usr/src/libm/src/m9x/scalblnf.c b/usr/src/libm/src/m9x/scalblnf.c new file mode 100644 index 0000000..ae69036 --- /dev/null +++ b/usr/src/libm/src/m9x/scalblnf.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalblnf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalblnf = __scalblnf +#endif + +#include "libm.h" +#include /* FLT_MAX, FLT_MIN */ + +static const float twom25f = 2.98023223876953125e-8F; +#if defined(USE_FPSCALE) || defined(__i386) +static const float two23f = 8388608.0F; +#else +/* + * v: a non-zero subnormal |x|; returns [-22, 0] + */ +static int +ilogbf_biased(unsigned v) { + int r = -22; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +float +scalblnf(float x, long n) { + int *px = (int *) &x, ix, k; + + ix = *px & ~0x80000000; + k = ix >> 23; + if (k == 0xff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + if (ix == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two23f; + k = ((*px & ~0x80000000) >> 23) - 23; +#else + k = ilogbf_biased(ix); + *px = (*px & 0x80000000) | (ix << (-k + 1)); +#endif + } + k += (int) n; + if (n > 5000 || k > 0xfe) + return (FLT_MAX * copysignf(FLT_MAX, x)); + if (n < -5000 || k <= -25) + return (FLT_MIN * copysignf(FLT_MIN, x)); + if (k > 0) { + *px = (*px & ~0x7f800000) | (k << 23); + return (x); + } + k += 25; + *px = (*px & ~0x7f800000) | (k << 23); + return (x * twom25f); +} diff --git a/usr/src/libm/src/m9x/scalblnl.c b/usr/src/libm/src/m9x/scalblnl.c new file mode 100644 index 0000000..f017495 --- /dev/null +++ b/usr/src/libm/src/m9x/scalblnl.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalblnl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalblnl = __scalblnl +#endif + +#include "libm.h" +#include /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define XSET_EXP(k, x) ((int *) &x)[0] = (((int *) &x)[0] & ~0x7fff0000) | \ + (k << 16) +#define ISINFNANL(k, x) (k == 0x7fff) +#define XTWOT_OFFSET 113 +static const long double xtwot = 10384593717069655257060992658440192.0L, + /* 2^113 */ + twomtm1 = 4.814824860968089632639944856462318296E-35L; /* 2^-114 */ +#elif defined(__i386) +#define XSET_EXP(k, x) ((int *) &x)[2] = (((int *) &x)[2] & ~0x7fff) | k +#if defined(HANDLE_UNSUPPORTED) +#define ISINFNANL(k, x) (k == 0x7fff || k != 0 && \ + (((int *) &x)[1] & 0x80000000) == 0) +#else +#define ISINFNANL(k, x) (k == 0x7fff) +#endif +#define XTWOT_OFFSET 64 +static const long double xtwot = 18446744073709551616.0L, /* 2^64 */ + twomtm1 = 2.7105054312137610850186E-20L; /* 2^-65 */ +#endif + +long double +scalblnl(long double x, long n) { + int k = XBIASED_EXP(x); + + if (ISINFNANL(k, x)) + return (x + x); + if (ISZEROL(x) || n == 0) + return (x); + if (k == 0) { + x *= xtwot; + k = XBIASED_EXP(x) - XTWOT_OFFSET; + } + k += (int) n; + if (n > 50000 || k > 0x7ffe) + return (LDBL_MAX * copysignl(LDBL_MAX, x)); + if (n < -50000 || k <= -XTWOT_OFFSET - 1) + return (LDBL_MIN * copysignl(LDBL_MIN, x)); + if (k > 0) { + XSET_EXP(k, x); + return (x); + } + k += XTWOT_OFFSET + 1; + XSET_EXP(k, x); + return (x * twomtm1); +} diff --git a/usr/src/libm/src/m9x/tgamma.c b/usr/src/libm/src/m9x/tgamma.c new file mode 100644 index 0000000..4e5253f --- /dev/null +++ b/usr/src/libm/src/m9x/tgamma.c @@ -0,0 +1,1703 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tgamma.c 1.13 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak tgamma = __tgamma +#endif + +/* INDENT OFF */ +/* + * True gamma function + * double tgamma(double x) + * + * Error: + * ------ + * Less that one ulp for both positive and negative arguments. + * + * Algorithm: + * --------- + * A: For negative argument + * (1) gamma(-n or -inf) is NaN + * (2) Underflow Threshold + * (3) Reduction to gamma(1+x) + * B: For x between 1 and 2 + * C: For x between 0 and 1 + * D: For x between 2 and 8 + * E: Overflow thresold {see over.c} + * F: For overflow_threshold >= x >= 8 + * + * Implementation details + * ----------------------- + * -pi + * (A) For negative argument, use gamma(-x) = ------------------------. + * (sin(pi*x)*gamma(1+x)) + * + * (1) gamma(-n or -inf) is NaN with invalid signal by SUSv3 spec. + * (Ideally, gamma(-n) = 1/sinpi(n) = (-1)**(n+1) * inf.) + * + * (2) Underflow Threshold. For each precision, there is a value T + * such that when x>T and when x is not an integer, gamma(-x) will + * always underflow. A table of the underflow threshold value is given + * below. For proof, see file "under.c". + * + * Precision underflow threshold T = + * ---------------------------------------------------------------------- + * single 41.000041962 = 41 + 11 ULP + * (machine format) 4224000B + * double 183.000000000000312639 = 183 + 11 ULP + * (machine format) 4066E000 0000000B + * quad 1774.0000000000000000000000000000017749370 = 1774 + 9 ULP + * (machine format) 4009BB80000000000000000000000009 + * ---------------------------------------------------------------------- + * + * (3) Reduction to gamma(1+x). + * Because of (1) and (2), we need only consider non-integral x + * such that 00, is: + * Let k = int(x), z = x-k. + * For z in (I) + * k+1 + * (-1) + * gamma(-x) = ------------------- ; + * kpsin(z)*gamma(1+x) + * + * otherwise, for z in (II), + * k+1 + * (-1) + * gamma(-x) = ----------------------- ; + * kpcos(0.5-z)*gamma(1+x) + * + * otherwise, for z in (III), + * k+1 + * (-1) + * gamma(-x) = --------------------- . + * kpsin(1-z)*gamma(1+x) + * + * Thus, the computation of gamma(-x) reduced to the computation of + * gamma(1+x) and kpsin(), kpcos(). + * + * (B) For x between 1 and 2. We break [1,2] into three parts: + * GT1 = [1.0000, 1.2845] + * GT2 = [1.2844, 1.6374] + * GT3 = [1.6373, 2.0000] + * + * For x in GTi, i=1,2,3, let + * z1 = 1.134861805732790769689793935774652917006 + * gz1 = gamma(z1) = 0.9382046279096824494097535615803269576988 + * tz1 = gamma'(z1) = -0.3517214357852935791015625000000000000000 + * + * z2 = 1.461632144968362341262659542325721328468e+0000 + * gz2 = gamma(z2) = 0.8856031944108887002788159005825887332080 + * tz2 = gamma'(z2) = 0.00 + * + * z3 = 1.819773101100500601787868704921606996312e+0000 + * gz3 = gamma(z3) = 0.9367814114636523216188468970808378497426 + * tz3 = gamma'(z3) = 0.2805306315422058105468750000000000000000 + * + * and + * y = x-zi ... for extra precision, write y = y.h + y.l + * Then + * gamma(x) = gzi + tzi*(y.h+y.l) + y*y*Ri(y), + * = gzi.h + (tzi*y.h + ((tzi*y.l+gzi.l) + y*y*Ri(y))) + * = gy.h + gy.l + * where + * (I) For double precision + * + * Ri(y) = Pi(y)/Qi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[4]*y^4 + * Q1(y) = q1[0] + q1[1]*y + ... + q1[5]*y^5 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[3]*y^3 + * Q2(y) = q2[0] + q2[1]*y + ... + q2[6]*y^6 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4 + * Q3(y) = q3[0] + q3[1]*y + ... + q3[5]*y^5 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-62.3 ... for i = 1 + * <= 2**-59.4 ... for i = 2 + * <= 2**-62.1 ... for i = 3 + * + * (II) For quad precision + * + * Ri(y) = Pi(y)/Qi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[9]*y^9 + * Q1(y) = q1[0] + q1[1]*y + ... + q1[8]*y^8 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[9]*y^9 + * Q2(y) = q2[0] + q2[1]*y + ... + q2[9]*y^9 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[9]*y^9 + * Q3(y) = q3[0] + q3[1]*y + ... + q3[9]*y^9 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-118.2 ... for i = 1 + * <= 2**-126.8 ... for i = 2 + * <= 2**-119.5 ... for i = 3 + * + * (III) For single precision + * + * Ri(y) = Pi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[5]*y^5 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[5]*y^5 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-30.8 ... for i = 1 + * <= 2**-31.6 ... for i = 2 + * <= 2**-29.5 ... for i = 3 + * + * Notes. (1) GTi and zi are choosen to balance the interval width and + * minimize the distant between gamma(x) and the tangent line at + * zi. In particular, we have + * |gamma(x)-(gzi+tzi*(x-zi))| <= 0.01436... for x in [1,z2] + * <= 0.01265... for x in [z2,2] + * + * (2) zi are slightly adjusted so that tzi=gamma'(zi) is very + * close to a single precision value. + * + * Coefficents: Single precision + * i= 1: + * P1[0] = 7.09087253435088360271451613398019280077561279443e-0001 + * P1[1] = -5.17229560788652108545141978238701790105241761089e-0001 + * P1[2] = 5.23403394528150789405825222323770647162337764327e-0001 + * P1[3] = -4.54586308717075010784041566069480411732634814899e-0001 + * P1[4] = 4.20596490915239085459964590559256913498190955233e-0001 + * P1[5] = -3.57307589712377520978332185838241458642142185789e-0001 + * + * i = 2: + * p2[0] = 4.28486983980295198166056119223984284434264344578e-0001 + * p2[1] = -1.30704539487709138528680121627899735386650103914e-0001 + * p2[2] = 1.60856285038051955072861219352655851542955430871e-0001 + * p2[3] = -9.22285161346010583774458802067371182158937943507e-0002 + * p2[4] = 7.19240511767225260740890292605070595560626179357e-0002 + * p2[5] = -4.88158265593355093703112238534484636193260459574e-0002 + * + * i = 3 + * p3[0] = 3.82409531118807759081121479786092134814808872880e-0001 + * p3[1] = 2.65309888180188647956400403013495759365167853426e-0002 + * p3[2] = 8.06815109775079171923561169415370309376296739835e-0002 + * p3[3] = -1.54821591666137613928840890835174351674007764799e-0002 + * p3[4] = 1.76308239242717268530498313416899188157165183405e-0002 + * + * Coefficents: Double precision + * i = 1: + * p1[0] = 0.70908683619977797008004927192814648151397705078125000 + * p1[1] = 1.71987061393048558089579513384356441668351720061e-0001 + * p1[2] = -3.19273345791990970293320316122813960527705450671e-0002 + * p1[3] = 8.36172645419110036267169600390549973563534476989e-0003 + * p1[4] = 1.13745336648572838333152213474277971244629758101e-0003 + * q1[0] = 1.0 + * q1[1] = 9.71980217826032937526460731778472389791321968082e-0001 + * q1[2] = -7.43576743326756176594084137256042653497087666030e-0002 + * q1[3] = -1.19345944932265559769719470515102012246995255372e-0001 + * q1[4] = 1.59913445751425002620935120470781382215050284762e-0002 + * q1[5] = 1.12601136853374984566572691306402321911547550783e-0003 + * i = 2: + * p2[0] = 0.42848681585558601181418225678498856723308563232421875 + * p2[1] = 6.53596762668970816023718845105667418483122103629e-0002 + * p2[2] = -6.97280829631212931321050770925128264272768936731e-0003 + * p2[3] = 6.46342359021981718947208605674813260166116632899e-0003 + * q2[0] = 1.0 + * q2[1] = 4.57572620560506047062553957454062012327519313936e-0001 + * q2[2] = -2.52182594886075452859655003407796103083422572036e-0001 + * q2[3] = -1.82970945407778594681348166040103197178711552827e-0002 + * q2[4] = 2.43574726993169566475227642128830141304953840502e-0002 + * q2[5] = -5.20390406466942525358645957564897411258667085501e-0003 + * q2[6] = 4.79520251383279837635552431988023256031951133885e-0004 + * i = 3: + * p3[0] = 0.382409479734567459008331979930517263710498809814453125 + * p3[1] = 1.42876048697668161599069814043449301572928034140e-0001 + * p3[2] = 3.42157571052250536817923866013561760785748899071e-0003 + * p3[3] = -5.01542621710067521405087887856991700987709272937e-0004 + * p3[4] = 8.89285814866740910123834688163838287618332122670e-0004 + * q3[0] = 1.0 + * q3[1] = 3.04253086629444201002215640948957897906299633168e-0001 + * q3[2] = -2.23162407379999477282555672834881213873185520006e-0001 + * q3[3] = -1.05060867741952065921809811933670131427552903636e-0002 + * q3[4] = 1.70511763916186982473301861980856352005926669320e-0002 + * q3[5] = -2.12950201683609187927899416700094630764182477464e-0003 + * + * Note that all pi0 are exact in double, which is obtained by a + * special Remez Algorithm. + * + * Coefficents: Quad precision + * i = 1: + * p1[0] = 0.709086836199777919037185741507610124611513720557 + * p1[1] = 4.45754781206489035827915969367354835667391606951e-0001 + * p1[2] = 3.21049298735832382311662273882632210062918153852e-0002 + * p1[3] = -5.71296796342106617651765245858289197369688864350e-0003 + * p1[4] = 6.04666892891998977081619174969855831606965352773e-0003 + * p1[5] = 8.99106186996888711939627812174765258822658645168e-0004 + * p1[6] = -6.96496846144407741431207008527018441810175568949e-0005 + * p1[7] = 1.52597046118984020814225409300131445070213882429e-0005 + * p1[8] = 5.68521076168495673844711465407432189190681541547e-0007 + * p1[9] = 3.30749673519634895220582062520286565610418952979e-0008 + * q1[0] = 1.0+0000 + * q1[1] = 1.35806511721671070408570853537257079579490650668e+0000 + * q1[2] = 2.97567810153429553405327140096063086994072952961e-0001 + * q1[3] = -1.52956835982588571502954372821681851681118097870e-0001 + * q1[4] = -2.88248519561420109768781615289082053597954521218e-0002 + * q1[5] = 1.03475311719937405219789948456313936302378395955e-0002 + * q1[6] = 4.12310203243891222368965360124391297374822742313e-0004 + * q1[7] = -3.12653708152290867248931925120380729518332507388e-0004 + * q1[8] = 2.36672170850409745237358105667757760527014332458e-0005 + * + * i = 2: + * p2[0] = 0.428486815855585429730209907810650616737756697477 + * p2[1] = 2.63622124067885222919192651151581541943362617352e-0001 + * p2[2] = 3.85520683670028865731877276741390421744971446855e-0002 + * p2[3] = 3.05065978278128549958897133190295325258023525862e-0003 + * p2[4] = 2.48232934951723128892080415054084339152450445081e-0003 + * p2[5] = 3.67092777065632360693313762221411547741550105407e-0004 + * p2[6] = 3.81228045616085789674530902563145250532194518946e-0006 + * p2[7] = 4.61677225867087554059531455133839175822537617677e-0006 + * p2[8] = 2.18209052385703200438239200991201916609364872993e-0007 + * p2[9] = 1.00490538985245846460006244065624754421022542454e-0008 + * q2[0] = 1.0 + * q2[1] = 9.20276350207639290567783725273128544224570775056e-0001 + * q2[2] = -4.79533683654165107448020515733883781138947771495e-0003 + * q2[3] = -1.24538337585899300494444600248687901947684291683e-0001 + * q2[4] = 4.49866050763472358547524708431719114204535491412e-0003 + * q2[5] = 7.20715455697920560621638325356292640604078591907e-0003 + * q2[6] = -8.68513169029126780280798337091982780598228096116e-0004 + * q2[7] = -1.25104431629401181525027098222745544809974229874e-0004 + * q2[8] = 3.10558344839000038489191304550998047521253437464e-0005 + * q2[9] = -1.76829227852852176018537139573609433652506765712e-0006 + * + * i = 3 + * p3[0] = 0.3824094797345675048502747661075355640070439388902 + * p3[1] = 3.42198093076618495415854906335908427159833377774e-0001 + * p3[2] = 9.63828189500585568303961406863153237440702754858e-0002 + * p3[3] = 8.76069421042696384852462044188520252156846768667e-0003 + * p3[4] = 1.86477890389161491224872014149309015261897537488e-0003 + * p3[5] = 8.16871354540309895879974742853701311541286944191e-0004 + * p3[6] = 6.83783483674600322518695090864659381650125625216e-0005 + * p3[7] = -1.10168269719261574708565935172719209272190828456e-0006 + * p3[8] = 9.66243228508380420159234853278906717065629721016e-0007 + * p3[9] = 2.31858885579177250541163820671121664974334728142e-0008 + * q3[0] = 1.0 + * q3[1] = 8.25479821168813634632437430090376252512793067339e-0001 + * q3[2] = -1.62251363073937769739639623669295110346015576320e-0002 + * q3[3] = -1.10621286905916732758745130629426559691187579852e-0001 + * q3[4] = 3.48309693970985612644446415789230015515365291459e-0003 + * q3[5] = 6.73553737487488333032431261131289672347043401328e-0003 + * q3[6] = -7.63222008393372630162743587811004613050245128051e-0004 + * q3[7] = -1.35792670669190631476784768961953711773073251336e-0004 + * q3[8] = 3.19610150954223587006220730065608156460205690618e-0005 + * q3[9] = -1.82096553862822346610109522015129585693354348322e-0006 + * + * (C) For x between 0 and 1. + * Let P stand for the number of significant bits in the working precision. + * -P 1 + * (1)For 0 <= x <= 2 , gamma(x) is computed by --- rounded to nearest. + * x + * The error is bound by 0.739 ulp(gamma(x)) in IEEE double precision. + * Proof. + * 1 2 + * Since -------- ~ x + 0.577...*x - ..., we have, for small x, + * gamma(x) + * 1 1 + * ----------- < gamma(x) < --- and + * x(1+0.578x) x + * 1 1 1 + * 0 < --- - gamma(x) <= --- - ----------- < 0.578 + * x x x(1+0.578x) + * 1 1 -P + * The error is thus bounded by --- ulp(---) + 0.578. Since x <= 2 , + * 2 x + * 1 P 1 P 1 + * --- >= 2 , ulp(---) >= ulp(2 ) >= 2. Thus 0.578=0.289*2<=0.289ulp(-) + * x x x + * Thus + * 1 1 + * | gamma(x) - [---] rounded | <= (0.5+0.289)*ulp(---). + * x x + * -P 1 + * Note that for x<= 2 , it is easy to see that ulp(---)=ulp(gamma(x)) + * x + * n 1 + * except only when x = 2 , (n<= -53). In such cases, --- is exact + * x + * and therefore the error is bounded by + * 1 + * 0.298*ulp(---) = 0.298*2*ulp(gamma(x)) = 0.578ulp(gamma(x)). + * x + * Thus we conclude that the error in gamma is less than 0.739 ulp. + * + * (2)Otherwise, for x in GTi-1 (see B), let y = x-(zi-1). From (B) we obtain + * gamma(1+x) + * gamma(1+x) = gy.h + gy.l, then compute gamma(x) by -----------. + * x + * gy.h + * Implementaion note. Write x = x.h+x.l, and Let th = ----- chopped to + * x + * 20 bits, then + * gy.h+gy.l + * gamma(x) = th + (---------- - th ) + * x + * 1 + * = th + ---*(gy.h-th*x.h+gy.l-th*x.l) + * x + * + * (D) For x between 2 and 8. Let n = 1+x chopped to an integer. Then + * + * gamma(x)=(x-1)*(x-2)*...*(x-n)*gamma(x-n) + * + * Since x-n is between 1 and 2, we can apply (B) to compute gamma(x). + * + * Implementation detail. The computation of (x-1)(x-2)...(x-n) in simulated + * higher precision arithmetic can be somewhat optimized. For example, in + * computing (x-1)*(x-2)*(x-3)*(x-4), if we compute (x-1)*(x-4) = z.h+z.l, + * then (x-2)(x-3) = z.h+2+z.l readily. In below, we list the expression + * of the formula to compute gamma(x). + * + * Assume x-n is in GTi (i=1,2, or 3, see B for detail). Let y = x - n - zi. + * By (B) we have gamma(x-n) = gy.h+gy.l. If x = x.h+x.l, then we have + * n=1 (x in [2,3]): + * gamma(x) = (x-1)*gamma(x-1) = (x-1)*(gy.h+gy.l) + * = [(x.h-1)+x.l]*(gy.h+gy.l) + * n=2 (x in [3,4]): + * gamma(x) = (x-1)(x-2)*gamma(x-2) = (x-1)*(x-2)*(gy.h+gy.l) + * = ((x.h-2)+x.l)*((x.h-1)+x.l)*(gy.h+gy.l) + * = [x.h*(x.h-3)+2+x.l*(x+(x.h-3))]*(gy.h+gy.l) + * n=3 (x in [4,5]) + * gamma(x) = (x-1)(x-2)(x-3)*(gy.h+gy.l) + * = (x.h*(x.h-3)+2+x.l*(x+(x.h-3)))*[((x.h-3)+x.l)(gy.h+gy.l)] + * n=4 (x in [5,6]) + * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*(gy.h+gy.l) + * = [(x.h*(x.h-5)+4+x.l(x+(x.h-5)))]*[(x-2)*(x-3)]*(gy.h+gy.l) + * = (y.h+y.l)*(y.h+1+y.l)*(gy.h+gy.l) + * n=5 (x in [6,7]) + * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*[(x-5)*(gy.h+gy.l)] + * n=6 (x in [7,8]) + * gamma(x) = [(x-1)(x-6)]*[(x-2)(x-5)]*[(x-3)(x-4)]*(gy.h+gy.l)] + * = [(y.h+y.l)(y.h+4+y.l)][(y.h+6+y.l)(gy.h+gy.l)] + * + * (E)Overflow Thresold. For x > Overflow thresold of gamma, + * return huge*huge (overflow). + * + * By checking whether lgamma(x) >= 2**{128,1024,16384}, one can + * determine the overflow threshold for x in single, double, and + * quad precision. See over.c for details. + * + * The overflow threshold of gamma(x) are + * + * single: x = 3.5040096283e+01 + * = 0x420C290F (IEEE single) + * double: x = 1.71624376956302711505e+02 + * = 0x406573FAE561F647 (IEEE double) + * quad: x = 1.7555483429044629170038892160702032034177e+03 + * = 0x4009B6E3180CD66A5C4206F128BA77F4 (quad) + * + * (F)For overflow_threshold >= x >= 8, we use asymptotic approximation. + * (1) Stirling's formula + * + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + * where + * L1(x) = (x-.5)*(log(x)-1), + * L2 = .5(log(2pi)-1) = 0.41893853...., + * L3(x) = (1/x)P(1/(x*x)), + * + * The range of L1,L2, and L3 are as follows: + * + * ------------------------------------------------------------------ + * Range(L1) = (single) [8.09..,88.30..] =[2** 3.01..,2** 6.46..] + * (double) [8.09..,709.3..] =[2** 3.01..,2** 9.47..] + * (quad) [8.09..,11356.10..]=[2** 3.01..,2** 13.47..] + * Range(L2) = 0.41893853..... + * Range(L3) = [0.0104...., 0.00048....] =[2**-6.58..,2**-11.02..] + * ------------------------------------------------------------------ + * + * Gamma(x) is then computed by exp(L1+L2+L3). + * + * (2) Error analysis of (F): + * -------------------------- + * The error in Gamma(x) depends on the error inherited in the computation + * of L= L1+L2+L3. Let L' be the computed value of L. The absolute error + * in L' is t = L-L'. Since exp(L') = exp(L-t) = exp(L)*exp(t) ~ + * (1+t)*exp(L), the relative error in exp(L') is approximately t. + * + * To guarantee the relatively accuracy in exp(L'), we would like + * |t| < 2**(-P-5) where P denotes for the number of significant bits + * of the working precision. Consequently, each of the L1,L2, and L3 + * must be computed with absolute error bounded by 2**(-P-5) in absolute + * value. + * + * Since L2 is a constant, it can be pre-computed to the desired accuracy. + * Also |L3| < 2**-6; therefore, it suffices to compute L3 with the + * working precision. That is, + * L3(x) approxmiate log(G(x))-(x-.5)(log(x)-1)-.5(log(2pi)-1) + * to a precision bounded by 2**(-P-5). + * + * 2**(-6) + * _________V___________________ + * L1(x): |_________|___________________| + * __ ________________________ + * L2: |__|________________________| + * __________________________ + * + L3(x): |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + * + * For L1(x)=(x-0.5)*(log(x)-1), we need ilogb(L1(x))+5 extra bits for + * both multiplicants to guarantee L1(x)'s absolute error is bounded by + * 2**(-P-5) in absolute value. Here ilogb(y) is defined to be the unbias + * binary exponent of y in IEEE format. We can get x-0.5 to the desire + * accuracy easily. It remains to compute log(x)-1 with ilogb(L1(x))+5 + * extra bits accracy. Note that the range of L1 is 88.30.., 709.3.., and + * 11356.10... for single, double, and quadruple precision, we have + * + * single double quadruple + * ------------------------------------ + * ilogb(L1(x))+5 <= 11 14 18 + * ------------------------------------ + * + * (3) Table Driven Method for log(x)-1: + * -------------------------------------- + * Let x = 2**n * y, where 1 <= y < 2. Let Z={z(i),i=1,...,m} + * be a set of predetermined evenly distributed floating point numbers + * in [1, 2]. Let z(j) be the closest one to y, then + * log(x)-1 = n*log(2)-1 + log(y) + * = n*log(2)-1 + log(z(j)*y/z(j)) + * = n*log(2)-1 + log(z(j)) + log(y/z(j)) + * = T1(n) + T2(j) + T3, + * + * where T1(n) = n*log(2)-1 and T2(j) = log(z(j)). Both T1 and T2 can be + * pre-calculated and be looked-up in a table. Note that 8 <= x < 1756 + * implies 3<=n<=10 implies 1.079.. < T1(n) < 6.931. + * + * + * y-z(i) y 1+s + * For T3, let s = --------; then ----- = ----- and + * y+z(i) z(i) 1-s + * 1+s 2 3 2 5 + * T3 = log(-----) = 2s + --- s + --- s + .... + * 1-s 3 5 + * + * Suppose the first term 2s is compute in extra precision. The + * dominating error in T3 would then be the rounding error of the + * second term 2/3*s**3. To force the rounding bounded by + * the required accuracy, we have + * single: |2/3*s**3| < 2**-11 ==> |s|<0.09014... + * double: |2/3*s**3| < 2**-14 ==> |s|<0.04507... + * quad : |2/3*s**3| < 2**-18 ==> |s|<0.01788... = 2**(-5.80..) + * + * Base on this analysis, we choose Z = {z(i)|z(i)=1+i/64+1/128, 0<=i<=63}. + * For any y in [1,2), let j = [64*y] chopped to integer, then z(j) is + * the closest to y, and it is not difficult to see that |s| < 2**(-8). + * Please note that the polynomial approximation of T3 must be accurate + * -24-11 -35 -53-14 -67 -113-18 -131 + * to 2 =2 , 2 = 2 , and 2 =2 + * for single, double, and quadruple precision respectively. + * + * Inplementation notes. + * (1) Table look-up entries for T1(n) and T2(j), as well as the calculation + * of the leading term 2s in T3, are broken up into leading and trailing + * part such that (leading part)* 2**24 will always be an integer. That + * will guarantee the addition of the leading parts will be exact. + * + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + * + * (2) How to compute 2s accurately. + * (A) Compute v = 2s to the working precision. If |v| < 2**(-18), + * stop. + * (B) chopped v to 2**(-24): v = ((int)(v*2**24))/2**24 + * (C) 2s = v + (2s - v), where + * 1 + * 2s - v = --- * (2(y-z) - v*(y+z) ) + * y+z + * 1 + * = --- * ( [2(y-z) - v*(y+z)_h ] - v*(y+z)_l ) + * y+z + * where (y+z)_h = (y+z) rounded to 24 bits by (double)(float), + * and (y+z)_l = ((z+z)-(y+z)_h)+(y-z). Note the the quantity + * in [] is exact. + * 2 4 + * (3) Remez approximation for (T3(s)-2s)/s = T3[0]*s + T3[1]*s + ...: + * Single precision: 1 term (compute in double precision arithmetic) + * T3(s) = 2s + S1*s^3, S1 = 0.6666717231848518054693623697539230 + * Remez error: |T3(s)/s - (2s+S1*s^3)| < 2**(-35.87) + * Double precision: 3 terms, Remez error is bounded by 2**(-72.40), + * see "tgamma_log" + * Quad precision: 7 terms, Remez error is bounded by 2**(-136.54), + * see "tgammal_log" + * + * The computation of 0.5*(ln(2pi)-1): + * 0.5*(ln(2pi)-1) = 0.4189385332046727417803297364056176398614... + * split 0.5*(ln(2pi)-1) to hln2pi_h + hln2pi_l, where hln2pi_h is the + * leading 21 bits of the constant. + * hln2pi_h= 0.4189383983612060546875 + * hln2pi_l= 1.348434666870928297364056176398612173648e-07 + * + * The computation of 1/x*P(1/x^2) = log(G(x))-(x-.5)(ln(x)-1)-(.5ln(2pi)-1): + * Let s = 1/x <= 1/8 < 0.125. We have + * quad precision + * |GP(s) - s*P(s^2)| <= 2**(-120.6), where + * 3 5 39 + * GP(s) = GP0*s+GP1*s +GP2*s +... +GP19*s , + * GP0 = 0.083333333333333333333333333333333172839171301 + * hex 0x3ffe5555 55555555 55555555 55555548 + * GP1 = -2.77777777777777777777777777492501211999399424104e-0003 + * GP2 = 7.93650793650793650793635650541638236350020883243e-0004 + * GP3 = -5.95238095238095238057299772679324503339241961704e-0004 + * GP4 = 8.41750841750841696138422987977683524926142600321e-0004 + * GP5 = -1.91752691752686682825032547823699662178842123308e-0003 + * GP6 = 6.41025641022403480921891559356473451161279359322e-0003 + * GP7 = -2.95506535798414019189819587455577003732808185071e-0002 + * GP8 = 1.79644367229970031486079180060923073476568732136e-0001 + * GP9 = -1.39243086487274662174562872567057200255649290646e+0000 + * GP10 = 1.34025874044417962188677816477842265259608269775e+0001 + * GP11 = -1.56803713480127469414495545399982508700748274318e+0002 + * GP12 = 2.18739841656201561694927630335099313968924493891e+0003 + * GP13 = -3.55249848644100338419187038090925410976237921269e+0004 + * GP14 = 6.43464880437835286216768959439484376449179576452e+0005 + * GP15 = -1.20459154385577014992600342782821389605893904624e+0007 + * GP16 = 2.09263249637351298563934942349749718491071093210e+0008 + * GP17 = -2.96247483183169219343745316433899599834685703457e+0009 + * GP18 = 2.88984933605896033154727626086506756972327292981e+0010 + * GP19 = -1.40960434146030007732838382416230610302678063984e+0011 + * + * double precision + * |GP(s) - s*P(s^2)| <= 2**(-63.5), where + * 3 5 7 9 11 13 15 + * GP(s) = GP0*s+GP1*s +GP2*s +GP3*s +GP4*s +GP5*s +GP6*s +GP7*s , + * + * GP0= 0.0833333333333333287074040640618477 (3FB55555 55555555) + * GP1= -2.77777777776649355200565611114627670089130772843e-0003 + * GP2= 7.93650787486083724805476194170211775784158551509e-0004 + * GP3= -5.95236628558314928757811419580281294593903582971e-0004 + * GP4= 8.41566473999853451983137162780427812781178932540e-0004 + * GP5= -1.90424776670441373564512942038926168175921303212e-0003 + * GP6= 5.84933161530949666312333949534482303007354299178e-0003 + * GP7= -1.59453228931082030262124832506144392496561694550e-0002 + * single precision + * |GP(s) - s*P(s^2)| <= 2**(-37.78), where + * 3 5 + * GP(s) = GP0*s+GP1*s +GP2*s + * GP0 = 8.33333330959694065245736888749042811909994573178e-0002 + * GP1 = -2.77765545601667179767706600890361535225507762168e-0003 + * GP2 = 7.77830853479775281781085278324621033523037489883e-0004 + * + * + * Implementation note: + * z = (1/x), z2 = z*z, z4 = z2*z2; + * p = z*(GP0+z2*(GP1+....+z2*GP7)) + * = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7))))) + * + * Adding everything up: + * t = rr.h*ww.h+hln2pi_h ... exact + * w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p + * + * Computing exp(t+w): + * s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then + * exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where + * expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and + * 2**(j/32) is obtained by table look-up S[j]+S_trail[j]. + * Remez error bound: + * |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63). + */ + +#include "libm.h" + +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((unsigned *) &x)[LOWORD] + +struct Double { + double h; + double l; +}; + +/* Hex value of GP0 shoule be 3FB55555 55555555 */ +static const double c[] = { + +1.0, + +2.0, + +0.5, + +1.0e-300, + +6.66666666666666740682e-01, /* A1=T3[0] */ + +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */ + +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */ + +0.0833333333333333287074040640618477, /* GP[0] */ + -2.77777777776649355200565611114627670089130772843e-03, + +7.93650787486083724805476194170211775784158551509e-04, + -5.95236628558314928757811419580281294593903582971e-04, + +8.41566473999853451983137162780427812781178932540e-04, + -1.90424776670441373564512942038926168175921303212e-03, + +5.84933161530949666312333949534482303007354299178e-03, + -1.59453228931082030262124832506144392496561694550e-02, + +4.18937683105468750000e-01, /* hln2pi_h */ + +8.50099203991780279640e-07, /* hln2pi_l */ + +4.18938533204672741744150788368695779923320328369e-01, /* hln2pi */ + +2.16608493865351192653e-02, /* ln2_32hi */ + +5.96317165397058656257e-12, /* ln2_32lo */ + +4.61662413084468283841e+01, /* invln2_32 */ + +5.0000000000000000000e-1, /* Et1 */ + +1.66666666665223585560605991943703896196054020060e-01, /* Et2 */ + +4.16666666665895103520154073534275286743788421687e-02, /* Et3 */ + +8.33336844093536520775865096538773197505523826029e-03, /* Et4 */ + +1.38889201930843436040204096950052984793587640227e-03, /* Et5 */ +}; + +#define one c[0] +#define two c[1] +#define half c[2] +#define tiny c[3] +#define A1 c[4] +#define A2 c[5] +#define A3 c[6] +#define GP0 c[7] +#define GP1 c[8] +#define GP2 c[9] +#define GP3 c[10] +#define GP4 c[11] +#define GP5 c[12] +#define GP6 c[13] +#define GP7 c[14] +#define hln2pi_h c[15] +#define hln2pi_l c[16] +#define hln2pi c[17] +#define ln2_32hi c[18] +#define ln2_32lo c[19] +#define invln2_32 c[20] +#define Et1 c[21] +#define Et2 c[22] +#define Et3 c[23] +#define Et4 c[24] +#define Et5 c[25] + +/* + * double precision coefficients for computing log(x)-1 in tgamma. + * See "algorithm" for details + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + * = 2s + A1*s^3 + A2*s^5 + A3*s^7 (see const A1,A2,A3) + * Note + * (1) the leading entries are truncated to 24 binary point. + * See Remezpak/sun/tgamma_log_64.c + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * See mpremez/work/Log/tgamma_log_4_outr2 + */ + +static const double T1[] = { + -1.00000000000000000000e+00, /* 0xBFF00000 0x00000000 */ + +0.00000000000000000000e+00, /* 0x00000000 0x00000000 */ + -3.06852817535400390625e-01, /* 0xBFD3A37A 0x00000000 */ + -1.90465429995776763166e-09, /* 0xBE205C61 0x0CA86C38 */ + +3.86294305324554443359e-01, /* 0x3FD8B90B 0xC0000000 */ + +5.57953361754750897367e-08, /* 0x3E6DF473 0xDE6AF279 */ + +1.07944148778915405273e+00, /* 0x3FF14564 0x70000000 */ + +5.38906818755173187963e-08, /* 0x3E6CEEAD 0xCDA06BB5 */ + +1.77258867025375366211e+00, /* 0x3FFC5C85 0xF0000000 */ + +5.19860275755595544734e-08, /* 0x3E6BE8E7 0xBCD5E4F2 */ + +2.46573585271835327148e+00, /* 0x4003B9D3 0xB8000000 */ + +5.00813732756017835330e-08, /* 0x3E6AE321 0xAC0B5E2E */ + +3.15888303518295288086e+00, /* 0x40094564 0x78000000 */ + +4.81767189756440192100e-08, /* 0x3E69DD5B 0x9B40D76B */ + +3.85203021764755249023e+00, /* 0x400ED0F5 0x38000000 */ + +4.62720646756862482697e-08, /* 0x3E68D795 0x8A7650A7 */ + +4.54517740011215209961e+00, /* 0x40122E42 0xFC000000 */ + +4.43674103757284839467e-08, /* 0x3E67D1CF 0x79ABC9E4 */ + +5.23832458257675170898e+00, /* 0x4014F40B 0x5C000000 */ + +4.24627560757707130063e-08, /* 0x3E66CC09 0x68E14320 */ + +5.93147176504135131836e+00, /* 0x4017B9D3 0xBC000000 */ + +4.05581017758129486834e-08, /* 0x3E65C643 0x5816BC5D */ +}; + +static const double T2[] = { + +7.78210163116455078125e-03, /* 0x3F7FE020 0x00000000 */ + +3.88108903981662140884e-08, /* 0x3E64D620 0xCF11F86F */ + +2.31670141220092773438e-02, /* 0x3F97B918 0x00000000 */ + +4.51595251008850513740e-08, /* 0x3E683EAD 0x88D54940 */ + +3.83188128471374511719e-02, /* 0x3FA39E86 0x00000000 */ + +5.14549991480218823411e-08, /* 0x3E6B9FEB 0xD5FA9016 */ + +5.32444715499877929688e-02, /* 0x3FAB42DC 0x00000000 */ + +4.29688244898971182165e-08, /* 0x3E671197 0x1BEC28D1 */ + +6.79506063461303710938e-02, /* 0x3FB16536 0x00000000 */ + +5.55623773783008185114e-08, /* 0x3E6DD46F 0x5C1D0C4C */ + +8.24436545372009277344e-02, /* 0x3FB51B07 0x00000000 */ + +1.46738736635337847313e-08, /* 0x3E4F830C 0x1FB493C7 */ + +9.67295765876770019531e-02, /* 0x3FB8C345 0x00000000 */ + +4.98708741103424492282e-08, /* 0x3E6AC633 0x641EB597 */ + +1.10814332962036132812e-01, /* 0x3FBC5E54 0x00000000 */ + +3.33782539813823062226e-08, /* 0x3E61EB78 0xE862BAC3 */ + +1.24703466892242431641e-01, /* 0x3FBFEC91 0x00000000 */ + +1.16087148042227818450e-08, /* 0x3E48EDF5 0x5D551729 */ + +1.38402283191680908203e-01, /* 0x3FC1B72A 0x80000000 */ + +3.96674382274822001957e-08, /* 0x3E654BD9 0xE80A4181 */ + +1.51916027069091796875e-01, /* 0x3FC371FC 0x00000000 */ + +1.49567501781968021494e-08, /* 0x3E500F47 0xBA1DE6CB */ + +1.65249526500701904297e-01, /* 0x3FC526E5 0x80000000 */ + +4.63946052585787334062e-08, /* 0x3E68E86D 0x0DE8B900 */ + +1.78407609462738037109e-01, /* 0x3FC6D60F 0x80000000 */ + +4.80100802600100279538e-08, /* 0x3E69C674 0x8723551E */ + +1.91394805908203125000e-01, /* 0x3FC87FA0 0x00000000 */ + +4.70914263296092971436e-08, /* 0x3E694832 0x44240802 */ + +2.04215526580810546875e-01, /* 0x3FCA23BC 0x00000000 */ + +1.48478803446288209001e-08, /* 0x3E4FE2B5 0x63193712 */ + +2.16873884201049804688e-01, /* 0x3FCBC286 0x00000000 */ + +5.40995645549315919488e-08, /* 0x3E6D0B63 0x358A7E74 */ + +2.29374051094055175781e-01, /* 0x3FCD5C21 0x00000000 */ + +4.99707906542102284117e-08, /* 0x3E6AD3EE 0xE456E443 */ + +2.41719901561737060547e-01, /* 0x3FCEF0AD 0x80000000 */ + +3.53254081075974352804e-08, /* 0x3E62F716 0x4D948638 */ + +2.53915190696716308594e-01, /* 0x3FD04025 0x80000000 */ + +1.92842471355435739091e-08, /* 0x3E54B4D0 0x40DAE27C */ + +2.65963494777679443359e-01, /* 0x3FD1058B 0xC0000000 */ + +5.37194584979797487125e-08, /* 0x3E6CD725 0x6A8C4FD0 */ + +2.77868449687957763672e-01, /* 0x3FD1C898 0xC0000000 */ + +1.31549854251447496506e-09, /* 0x3E16999F 0xAFBC68E7 */ + +2.89633274078369140625e-01, /* 0x3FD2895A 0x00000000 */ + +1.85046735362538929911e-08, /* 0x3E53DE86 0xA35EB493 */ + +3.01261305809020996094e-01, /* 0x3FD347DD 0x80000000 */ + +2.47691407849191245052e-08, /* 0x3E5A987D 0x54D64567 */ + +3.12755703926086425781e-01, /* 0x3FD40430 0x80000000 */ + +6.07781046260499658610e-09, /* 0x3E3A1A9F 0x8EF4304A */ + +3.24119448661804199219e-01, /* 0x3FD4BE5F 0x80000000 */ + +1.99924077768719198045e-08, /* 0x3E557778 0xA0DB4C99 */ + +3.35355520248413085938e-01, /* 0x3FD57677 0x00000000 */ + +2.16727247443196802771e-08, /* 0x3E57455A 0x6C549AB7 */ + +3.46466720104217529297e-01, /* 0x3FD62C82 0xC0000000 */ + +4.72419910516215900493e-08, /* 0x3E695CE3 0xCA97B7B0 */ + +3.57455849647521972656e-01, /* 0x3FD6E08E 0x80000000 */ + +3.92742818015697624778e-08, /* 0x3E6515D0 0xF1C609CA */ + +3.68325531482696533203e-01, /* 0x3FD792A5 0x40000000 */ + +2.96760111198451042238e-08, /* 0x3E5FDD47 0xA27C15DA */ + +3.79078328609466552734e-01, /* 0x3FD842D1 0xC0000000 */ + +2.43255029056564770289e-08, /* 0x3E5A1E8B 0x17493B14 */ + +3.89716744422912597656e-01, /* 0x3FD8F11E 0x80000000 */ + +6.71711261571421332726e-09, /* 0x3E3CD98B 0x1DF85DA7 */ + +4.00243163108825683594e-01, /* 0x3FD99D95 0x80000000 */ + +1.01818702333557515008e-09, /* 0x3E117E08 0xACBA92EF */ + +4.10659909248352050781e-01, /* 0x3FDA4840 0x80000000 */ + +1.57369163351530571459e-08, /* 0x3E50E5BB 0x0A2BFCA7 */ + +4.20969247817993164062e-01, /* 0x3FDAF129 0x00000000 */ + +4.68261364720663662040e-08, /* 0x3E6923BC 0x358899C2 */ + +4.31173443794250488281e-01, /* 0x3FDB9858 0x80000000 */ + +2.10241208525779214510e-08, /* 0x3E569310 0xFB598FB1 */ + +4.41274523735046386719e-01, /* 0x3FDC3DD7 0x80000000 */ + +3.70698288427707487748e-08, /* 0x3E63E6D6 0xA6B9D9E1 */ + +4.51274633407592773438e-01, /* 0x3FDCE1AF 0x00000000 */ + +1.07318658117071930723e-08, /* 0x3E470BE7 0xD6F6FA58 */ + +4.61175680160522460938e-01, /* 0x3FDD83E7 0x00000000 */ + +3.49616477054305011286e-08, /* 0x3E62C517 0x9F2828AE */ + +4.70979690551757812500e-01, /* 0x3FDE2488 0x00000000 */ + +2.46670332000468969567e-08, /* 0x3E5A7C6C 0x261CBD8F */ + +4.80688512325286865234e-01, /* 0x3FDEC399 0xC0000000 */ + +1.70204650424422423704e-08, /* 0x3E52468C 0xC0175CEE */ + +4.90303933620452880859e-01, /* 0x3FDF6123 0xC0000000 */ + +5.44247409572909703749e-08, /* 0x3E6D3814 0x5630A2B6 */ + +4.99827861785888671875e-01, /* 0x3FDFFD2E 0x00000000 */ + +7.77056065794633071345e-09, /* 0x3E40AFE9 0x30AB2FA0 */ + +5.09261846542358398438e-01, /* 0x3FE04BDF 0x80000000 */ + +5.52474495483665749052e-08, /* 0x3E6DA926 0xD265FCC1 */ + +5.18607735633850097656e-01, /* 0x3FE0986F 0x40000000 */ + +2.85741955344967264536e-08, /* 0x3E5EAE6A 0x41723FB5 */ + +5.27867078781127929688e-01, /* 0x3FE0E449 0x80000000 */ + +1.08397144554263914271e-08, /* 0x3E474732 0x2FDBAB97 */ + +5.37041425704956054688e-01, /* 0x3FE12F71 0x80000000 */ + +4.01919275998792285777e-08, /* 0x3E6593EF 0xBC530123 */ + +5.46132385730743408203e-01, /* 0x3FE179EA 0xA0000000 */ + +5.18673922421792693237e-08, /* 0x3E6BD899 0xA0BFC60E */ + +5.55141448974609375000e-01, /* 0x3FE1C3B8 0x00000000 */ + +5.85658922177154808539e-08, /* 0x3E6F713C 0x24BC94F9 */ + +5.64070105552673339844e-01, /* 0x3FE20CDC 0xC0000000 */ + +3.27321296262276338905e-08, /* 0x3E6192AB 0x6D93503D */ + +5.72919726371765136719e-01, /* 0x3FE2555B 0xC0000000 */ + +2.71900203723740076878e-08, /* 0x3E5D31EF 0x96780876 */ + +5.81691682338714599609e-01, /* 0x3FE29D37 0xE0000000 */ + +5.72959078829112371070e-08, /* 0x3E6EC2B0 0x8AC85CD7 */ + +5.90387403964996337891e-01, /* 0x3FE2E474 0x20000000 */ + +4.26371800367512948470e-08, /* 0x3E66E402 0x68405422 */ + +5.99008142948150634766e-01, /* 0x3FE32B13 0x20000000 */ + +4.66979327646159769249e-08, /* 0x3E69121D 0x71320557 */ + +6.07555210590362548828e-01, /* 0x3FE37117 0xA0000000 */ + +3.96341792466729582847e-08, /* 0x3E654747 0xB5C5DD02 */ + +6.16029858589172363281e-01, /* 0x3FE3B684 0x40000000 */ + +1.86263416563663175432e-08, /* 0x3E53FFF8 0x455F1DBE */ + +6.24433279037475585938e-01, /* 0x3FE3FB5B 0x80000000 */ + +8.97441791510503832111e-09, /* 0x3E4345BD 0x096D3A75 */ + +6.32766664028167724609e-01, /* 0x3FE43F9F 0xE0000000 */ + +5.54287010493641158796e-09, /* 0x3E37CE73 0x3BD393DD */ + +6.41031146049499511719e-01, /* 0x3FE48353 0xC0000000 */ + +3.33714317793368531132e-08, /* 0x3E61EA88 0xDF73D5E9 */ + +6.49227917194366455078e-01, /* 0x3FE4C679 0xA0000000 */ + +2.94307433638127158696e-08, /* 0x3E5F99DC 0x7362D1DA */ + +6.57358050346374511719e-01, /* 0x3FE50913 0xC0000000 */ + +2.23619855184231409785e-08, /* 0x3E5802D0 0xD6979675 */ + +6.65422618389129638672e-01, /* 0x3FE54B24 0x60000000 */ + +1.41559608102782173188e-08, /* 0x3E4E6652 0x5EA4550A */ + +6.73422634601593017578e-01, /* 0x3FE58CAD 0xA0000000 */ + +4.06105737027198329700e-08, /* 0x3E65CD79 0x893092F2 */ + +6.81359171867370605469e-01, /* 0x3FE5CDB1 0xC0000000 */ + +5.29405324634793230630e-08, /* 0x3E6C6C17 0x648CF6E4 */ + +6.89233243465423583984e-01, /* 0x3FE60E32 0xE0000000 */ + +3.77733853963405370102e-08, /* 0x3E644788 0xD8CA7C89 */ +}; + +/* S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) */ +static const double S[] = { + +1.00000000000000000000e+00, /* 3FF0000000000000 */ + +1.02189714865411662714e+00, /* 3FF059B0D3158574 */ + +1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ + +1.06714040067682369717e+00, /* 3FF11301D0125B51 */ + +1.09050773266525768967e+00, /* 3FF172B83C7D517B */ + +1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ + +1.13878863475669156458e+00, /* 3FF2387A6E756238 */ + +1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ + +1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ + +1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ + +1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ + +1.26905095719173321989e+00, /* 3FF44E086061892D */ + +1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ + +1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ + +1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ + +1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ + +1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ + +1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ + +1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ + +1.50916442759342284141e+00, /* 3FF82589994CCE13 */ + +1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ + +1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ + +1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ + +1.64575547815396494578e+00, /* 3FFA5503B23E255D */ + +1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ + +1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ + +1.75625216037329945351e+00, /* 3FFC199BDD85529C */ + +1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ + +1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ + +1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ + +1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ + +1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; + +static const double S_trail[] = { + +0.00000000000000000000e+00, + +5.10922502897344389359e-17, /* 3C8D73E2A475B465 */ + +8.55188970553796365958e-17, /* 3C98A62E4ADC610A */ + -7.89985396684158212226e-17, /* BC96C51039449B3A */ + -3.04678207981247114697e-17, /* BC819041B9D78A76 */ + +1.04102784568455709549e-16, /* 3C9E016E00A2643C */ + +8.91281267602540777782e-17, /* 3C99B07EB6C70573 */ + +3.82920483692409349872e-17, /* 3C8612E8AFAD1255 */ + +3.98201523146564611098e-17, /* 3C86F46AD23182E4 */ + -7.71263069268148813091e-17, /* BC963AEABF42EAE2 */ + +4.65802759183693679123e-17, /* 3C8ADA0911F09EBC */ + +2.66793213134218609523e-18, /* 3C489B7A04EF80D0 */ + +2.53825027948883149593e-17, /* 3C7D4397AFEC42E2 */ + -2.85873121003886075697e-17, /* BC807ABE1DB13CAC */ + +7.70094837980298946162e-17, /* 3C96324C054647AD */ + -6.77051165879478628716e-17, /* BC9383C17E40B497 */ + -9.66729331345291345105e-17, /* BC9BDD3413B26456 */ + -3.02375813499398731940e-17, /* BC816E4786887A99 */ + -3.48399455689279579579e-17, /* BC841577EE04992F */ + -1.01645532775429503911e-16, /* BC9D4C1DD41532D8 */ + +7.94983480969762085616e-17, /* 3C96E9F156864B27 */ + -1.01369164712783039808e-17, /* BC675FC781B57EBC */ + +2.47071925697978878522e-17, /* 3C7C7C46B071F2BE */ + -1.01256799136747726038e-16, /* BC9D2F6EDB8D41E1 */ + +8.19901002058149652013e-17, /* 3C97A1CD345DCC81 */ + -1.85138041826311098821e-17, /* BC75584F7E54AC3B */ + +2.96014069544887330703e-17, /* 3C811065895048DD */ + +1.82274584279120867698e-17, /* 3C7503CBD1E949DB */ + +3.28310722424562658722e-17, /* 3C82ED02D75B3706 */ + -6.12276341300414256164e-17, /* BC91A5CD4F184B5C */ + -1.06199460561959626376e-16, /* BC9E9C23179C2893 */ + +8.96076779103666776760e-17, /* 3C99D3E12DD8A18B */ +}; + +/* Primary interval GTi() */ +static const double cr[] = { +/* p1, q1 */ + +0.70908683619977797008004927192814648151397705078125000, + +1.71987061393048558089579513384356441668351720061e-0001, + -3.19273345791990970293320316122813960527705450671e-0002, + +8.36172645419110036267169600390549973563534476989e-0003, + +1.13745336648572838333152213474277971244629758101e-0003, + +1.0, + +9.71980217826032937526460731778472389791321968082e-0001, + -7.43576743326756176594084137256042653497087666030e-0002, + -1.19345944932265559769719470515102012246995255372e-0001, + +1.59913445751425002620935120470781382215050284762e-0002, + +1.12601136853374984566572691306402321911547550783e-0003, +/* p2, q2 */ + +0.42848681585558601181418225678498856723308563232421875, + +6.53596762668970816023718845105667418483122103629e-0002, + -6.97280829631212931321050770925128264272768936731e-0003, + +6.46342359021981718947208605674813260166116632899e-0003, + +1.0, + +4.57572620560506047062553957454062012327519313936e-0001, + -2.52182594886075452859655003407796103083422572036e-0001, + -1.82970945407778594681348166040103197178711552827e-0002, + +2.43574726993169566475227642128830141304953840502e-0002, + -5.20390406466942525358645957564897411258667085501e-0003, + +4.79520251383279837635552431988023256031951133885e-0004, +/* p3, q3 */ + +0.382409479734567459008331979930517263710498809814453125, + +1.42876048697668161599069814043449301572928034140e-0001, + +3.42157571052250536817923866013561760785748899071e-0003, + -5.01542621710067521405087887856991700987709272937e-0004, + +8.89285814866740910123834688163838287618332122670e-0004, + +1.0, + +3.04253086629444201002215640948957897906299633168e-0001, + -2.23162407379999477282555672834881213873185520006e-0001, + -1.05060867741952065921809811933670131427552903636e-0002, + +1.70511763916186982473301861980856352005926669320e-0002, + -2.12950201683609187927899416700094630764182477464e-0003, +}; + +#define P10 cr[0] +#define P11 cr[1] +#define P12 cr[2] +#define P13 cr[3] +#define P14 cr[4] +#define Q10 cr[5] +#define Q11 cr[6] +#define Q12 cr[7] +#define Q13 cr[8] +#define Q14 cr[9] +#define Q15 cr[10] +#define P20 cr[11] +#define P21 cr[12] +#define P22 cr[13] +#define P23 cr[14] +#define Q20 cr[15] +#define Q21 cr[16] +#define Q22 cr[17] +#define Q23 cr[18] +#define Q24 cr[19] +#define Q25 cr[20] +#define Q26 cr[21] +#define P30 cr[22] +#define P31 cr[23] +#define P32 cr[24] +#define P33 cr[25] +#define P34 cr[26] +#define Q30 cr[27] +#define Q31 cr[28] +#define Q32 cr[29] +#define Q33 cr[30] +#define Q34 cr[31] +#define Q35 cr[32] + +static const double + GZ1_h = +0.938204627909682398190, + GZ1_l = +5.121952600248205157935e-17, + GZ2_h = +0.885603194410888749921, + GZ2_l = -4.964236872556339810692e-17, + GZ3_h = +0.936781411463652347038, + GZ3_l = -2.541923110834479415023e-17, + TZ1 = -0.3517214357852935791015625, + TZ3 = +0.280530631542205810546875; +/* INDENT ON */ + +/* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */ +/* assume yh got 20 significant bits */ +static struct Double +GT1(double yh, double yl) { + double t3, t4, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) / + (Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15))); + t3 += (TZ1 * yl + GZ1_l); + t4 = TZ1 * yh; + r.h = (double) ((float) (t4 + GZ1_h + t3)); + t3 += (t4 - (r.h - GZ1_h)); + r.l = t3; + return (r); +} + +/* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */ +/* assume yh got 20 significant bits */ +static struct Double +GT2(double yh, double yl) { + double t3, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) / + (Q20 + (y * ((Q21 + Q22 * y) + z * Q23) + + (z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l; + r.h = (double) ((float) (GZ2_h + t3)); + r.l = t3 - (r.h - GZ2_h); + return (r); +} + +/* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */ +/* assume yh got 20 significant bits */ +static struct Double +GT3(double yh, double yl) { + double t3, t4, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) / + (Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35))); + t3 += (TZ3 * yl + GZ3_l); + t4 = TZ3 * yh; + r.h = (double) ((float) (t4 + GZ3_h + t3)); + t3 += (t4 - (r.h - GZ3_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* + * return tgamma(x) scaled by 2**-m for 8> 20) - 0x3ff; /* exponent of x, range:3-7 */ + n2 += n2; /* 2n */ + ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ + __HI(y) = ix; + __LO(y) = lx; + __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ + __LO(z) = 0; + j2 = (ix >> 13) & 0x7e; /* 2j */ + t1 = y + z; + t2 = y - z; + r = one / t1; + t1 = (double) ((float) t1); + u = r * t2; /* u = (y-z)/(y+z) */ + t4 = T2[j2 + 1] + T1[n2 + 1]; + z2 = u * u; + k = __HI(u) & 0x7fffffff; + t3 = T2[j2] + T1[n2]; + if ((k >> 20) < 0x3ec) { /* |u|<2**-19 */ + t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3)); + } else { + t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3)); + u2 = u + u; + v = (double) ((int) (u2 * t24)) * p24; + t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z))); + t3 += v; + } + ss_h = (double) ((float) (t2 + t3)); + ss_l = t2 - (ss_h - t3); + + /* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 in already in extra precision + */ + z = one / x; + r = x - half; + r_h = (double) ((float) r); + w_h = r_h * ss_h + hln2pi_h; + z2 = z * z; + w = (r - r_h) * ss_h + r * ss_l; + z4 = z2 * z2; + t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7))); + t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6)); + t1 += t2; + w += hln2pi_l; + w_l = z * (GP0 + t1) + w; + k = (int) ((w_h + w_l) * invln2_32 + half); + + /* compute the exponential of w_h+w_l */ + j = k & 0x1f; + *m = (k >> 5); + t3 = (double) k; + + /* perform w - k*ln2_32 (represent as w_h - w_l) */ + t1 = w_h - t3 * ln2_32hi; + t2 = t3 * ln2_32lo; + w = w_l - t2; + w_h = t1 + w_l; + w_l = t2 - (w_l - (w_h - t1)); + + /* compute exp(w_h+w_l) */ + z = w_h - w_l; + z2 = z * z; + t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5)); + t2 = z2 * (Et2 + z2 * Et4); + t3 = w_h - (w_l - (t1 + z * t2)); + zz.l = S_trail[j] * (one + t3) + S[j] * t3; + zz.h = S[j]; + return (zz); +} + +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 11 13 15 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x +ks[5]*x +ks[6]*x + */ +static const double ks[] = { + -1.64493406684822640606569, + +8.11742425283341655883668741874008920850698590621e-0001, + -1.90751824120862873825597279118304943994042258291e-0001, + +2.61478477632554278317289628332654539353521911570e-0002, + -2.34607978510202710377617190278735525354347705866e-0003, + +1.48413292290051695897242899977121846763824221705e-0004, + -6.87730769637543488108688726777687262485357072242e-0006, +}; +/* INDENT ON */ + +/* assume x is not tiny and positive */ +static struct Double +kpsin(double x) { + double z, t1, t2, t3, t4; + struct Double xx; + + z = x * x; + xx.h = x; + t1 = z * x; + t2 = z * z; + t4 = t1 * ks[0]; + t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) * + (ks[4] + z * ks[5] + t2 * ks[6])); + xx.l = t4 + t3; + return (xx); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 8 10 12 + * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x + */ + +static const double one_pi_h = 0.318309886183790635705292970, + one_pi_l = 3.583247455607534006714276420e-17; +static const double npi_2_h = -1.5625, + npi_2_l = -0.00829632679489661923132169163975055099555883223; +static const double kc[] = { + -1.57079632679489661923132169163975055099555883223e+0000, + +1.29192819501230224953283586722575766189551966008e+0000, + -4.25027339940149518500158850753393173519732149213e-0001, + +7.49080625187015312373925142219429422375556727752e-0002, + -8.21442040906099210866977352284054849051348692715e-0003, + +6.10411356829515414575566564733632532333904115968e-0004, +}; +/* INDENT ON */ + +/* assume x is not tiny and positive */ +static struct Double +kpcos(double x) { + double z, t1, t2, t3, t4, x4, x8; + struct Double xx; + + z = x * x; + xx.h = one_pi_h; + t1 = (double) ((float) x); + x4 = z * z; + t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1); + t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z * + kc[4] + x4 * kc[5])); + t4 = t1 * t1; /* 48 bits mantissa */ + x8 = t2 + t3; + t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */ + xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */ + return (xx); +} + +/* INDENT OFF */ +static const double + /* 0.134861805732790769689793935774652917006 */ + t0z1 = 0.1348618057327907737708, + t0z1_l = -4.0810077708578299022531e-18, + /* 0.461632144968362341262659542325721328468 */ + t0z2 = 0.4616321449683623567850, + t0z2_l = -1.5522348162858676890521e-17, + /* 0.819773101100500601787868704921606996312 */ + t0z3 = 0.8197731011005006118708, + t0z3_l = -1.0082945122487103498325e-17; + /* 1.134861805732790769689793935774652917006 */ +/* INDENT ON */ + +/* gamma(x+i) for 0 <= x < 1 */ +static struct Double +gam_n(int i, double x) { + struct Double rr, yy; + double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845) { + if (x > 0.6374) { + r1 = x - t0z3; + r2 = (double) ((float) (r1 - t0z3_l)); + t2 = r1 - r2; + yy = GT3(r2, t2 - t0z3_l); + } else { + r1 = x - t0z2; + r2 = (double) ((float) (r1 - t0z2_l)); + t2 = r1 - r2; + yy = GT2(r2, t2 - t0z2_l); + } + } else { + r1 = x - t0z1; + r2 = (double) ((float) (r1 - t0z1_l)); + t2 = r1 - r2; + yy = GT1(r2, t2 - t0z1_l); + } + + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0= 0x7ff00000) + /* +Inf -> +Inf, -Inf or NaN -> NaN */ + return (x * ((hx < 0)? 0.0 : x)); + if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */ + (hx == 0x406573fa && lx > 0xE561F647)) { + z = x / tiny; + return (z * z); + } + if (hx >= 0x40200000) { /* x >= 8 */ + ww = large_gam(x, &m); + w = ww.h + ww.l; + __HI(w) += m << 20; + return (w); + } + if (hx > 0) { /* x from 0 to 8 */ + i = (int) x; + ww = gam_n(i, x - (double) i); + return (ww.h + ww.l); + } + + /* negative x */ + /* INDENT OFF */ + /* + * compute: xk = + * -2 ... x is an even int (-inf is even) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; + if (ix >= 0x43300000) { + if (ix >= 0x43400000) + xk = -2; + else + xk = -2 + (lx & 1); + } else if (ix >= 0x3ff00000) { + k = (ix >> 20) - 0x3ff; + if (k > 20) { + j = lx >> (52 - k); + if ((j << (52 - k)) == lx) + xk = -2 + (j & 1); + else + xk = j & 1; + } else { + j = ix >> (20 - k); + if ((j << (20 - k)) == ix && lx == 0) + xk = -2 + (j & 1); + else + xk = j & 1; + } + } + if (xk < 0) + /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */ + return ((x - x) / (x - x)); /* 0/0 = NaN */ + + + /* negative underflow thresold */ + if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) { + /* x < -183.0 - 11ulp */ + z = tiny / x; + if (xk == 1) + z = -z; + return (z * tiny); + } + + /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ + + /* + * First compute ss = -sin(pi*y)/pi , so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + y = -x; + j = (int) y; + z = y - (double) j; + if (z > 0.3183098861837906715377675) + if (z > 0.6816901138162093284622325) + ss = kpsin(one - z); + else + ss = kpcos(0.5 - z); + else + ss = kpsin(z); + if (xk == 0) { + ss.h = -ss.h; + ss.l = -ss.l; + } + + /* Then compute ww = gamma(1+y), note that result scale to 2**m */ + m = 0; + if (j < 7) { + ww = gam_n(j + 1, z); + } else { + w = y + one; + if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */ + ww = large_gam(w, &m); + } else { + t = w - one; + if (t == y) { /* y+one exact */ + ww = large_gam(w, &m); + } else { /* use y*gamma(y) */ + if (j == 7) + ww = gam_n(j, z); + else + ww = large_gam(y, &m); + t4 = ww.h + ww.l; + t1 = (double) ((float) y); + t2 = (double) ((float) t4); + /* t4 will not be too large */ + ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2; + ww.h = t1 * t2; + } + } + } + + /* compute 1/(ss*ww) */ + t3 = ss.h + ss.l; + t4 = ww.h + ww.l; + t1 = (double) ((float) t3); + t2 = (double) ((float) t4); + z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */ + z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */ + t3 = t3 * t4; /* t3 = ss*ww */ + z3 = one / t3; /* z3 = 1/(ss*ww) */ + t5 = t1 * t2; + z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */ + t1 = (double) ((float) t3); /* (t1,z1) = ss*ww */ + z1 = z5 - (t1 - t5); + t2 = (double) ((float) z3); /* leading 1/(ss*ww) */ + z2 = z3 * (t2 * z1 - (one - t2 * t1)); + z = t2 - z2; + + /* check whether z*2**-m underflow */ + if (m != 0) { + hx = __HI(z); + i = hx & 0x80000000; + ix = hx ^ i; + j = ix >> 20; + if (j > m) { + ix -= m << 20; + __HI(z) = ix ^ i; + } else if ((m - j) > 52) { + /* underflow */ + if (xk == 0) + z = -tiny * tiny; + else + z = tiny * tiny; + } else { + /* subnormal */ + m -= 60; + t = one; + __HI(t) -= 60 << 20; + ix -= m << 20; + __HI(z) = ix ^ i; + z *= t; + } + } + return (z); +} diff --git a/usr/src/libm/src/m9x/tgammaf.c b/usr/src/libm/src/m9x/tgammaf.c new file mode 100644 index 0000000..538cf89 --- /dev/null +++ b/usr/src/libm/src/m9x/tgammaf.c @@ -0,0 +1,545 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tgammaf.c 1.10 06/01/31 SMI" + +#pragma weak tgammaf = __tgammaf + +/* + * True gamma function + * + * float tgammaf(float x) + * + * Algorithm: see tgamma.c + * + * Maximum error observed: 0.87ulp (both positive and negative arguments) + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include +#include + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((unsigned *) &x)[LOWORD] + +/* Coefficients for primary intervals GTi() */ +static const double cr[] = { + /* p1 */ + +7.09087253435088360271451613398019280077561279443e-0001, + -5.17229560788652108545141978238701790105241761089e-0001, + +5.23403394528150789405825222323770647162337764327e-0001, + -4.54586308717075010784041566069480411732634814899e-0001, + +4.20596490915239085459964590559256913498190955233e-0001, + -3.57307589712377520978332185838241458642142185789e-0001, + + /* p2 */ + +4.28486983980295198166056119223984284434264344578e-0001, + -1.30704539487709138528680121627899735386650103914e-0001, + +1.60856285038051955072861219352655851542955430871e-0001, + -9.22285161346010583774458802067371182158937943507e-0002, + +7.19240511767225260740890292605070595560626179357e-0002, + -4.88158265593355093703112238534484636193260459574e-0002, + + /* p3 */ + +3.82409531118807759081121479786092134814808872880e-0001, + +2.65309888180188647956400403013495759365167853426e-0002, + +8.06815109775079171923561169415370309376296739835e-0002, + -1.54821591666137613928840890835174351674007764799e-0002, + +1.76308239242717268530498313416899188157165183405e-0002, + + /* GZi and TZi */ + +0.9382046279096824494097535615803269576988, /* GZ1 */ + +0.8856031944108887002788159005825887332080, /* GZ2 */ + +0.9367814114636523216188468970808378497426, /* GZ3 */ + -0.3517214357852935791015625, /* TZ1 */ + +0.280530631542205810546875, /* TZ3 */ +}; + +#define P10 cr[0] +#define P11 cr[1] +#define P12 cr[2] +#define P13 cr[3] +#define P14 cr[4] +#define P15 cr[5] +#define P20 cr[6] +#define P21 cr[7] +#define P22 cr[8] +#define P23 cr[9] +#define P24 cr[10] +#define P25 cr[11] +#define P30 cr[12] +#define P31 cr[13] +#define P32 cr[14] +#define P33 cr[15] +#define P34 cr[16] +#define GZ1 cr[17] +#define GZ2 cr[18] +#define GZ3 cr[19] +#define TZ1 cr[20] +#define TZ3 cr[21] + +/* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */ +static double +GT1(double y) { + double z, r; + + z = y * y; + r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y * + P14 + z * P15)); + return (GZ1 + r); +} + +/* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */ +static double +GT2(double y) { + double z; + + z = y * y; + return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y * + P24 + z * P25))); +} + +/* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */ +static double +GT3(double y) { +double z, r; + + z = y * y; + r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y * + P34)); + return (GZ3 + r); +} + +/* INDENT OFF */ +static const double c[] = { ++1.0, ++2.0, ++0.5, ++1.0e-300, ++6.666717231848518054693623697539230e-0001, /* A1=T3[0] */ ++8.33333330959694065245736888749042811909994573178e-0002, /* GP[0] */ +-2.77765545601667179767706600890361535225507762168e-0003, /* GP[1] */ ++7.77830853479775281781085278324621033523037489883e-0004, /* GP[2] */ ++4.18938533204672741744150788368695779923320328369e-0001, /* hln2pi */ ++2.16608493924982901946e-02, /* ln2_32 */ ++4.61662413084468283841e+01, /* invln2_32 */ ++5.00004103388988968841156421415669985414073453720e-0001, /* Et1 */ ++1.66667656752800761782778277828110208108687545908e-0001, /* Et2 */ +}; + +#define one c[0] +#define two c[1] +#define half c[2] +#define tiny c[3] +#define A1 c[4] +#define GP0 c[5] +#define GP1 c[6] +#define GP2 c[7] +#define hln2pi c[8] +#define ln2_32 c[9] +#define invln2_32 c[10] +#define Et1 c[11] +#define Et2 c[12] + +/* S[j] = 2**(j/32.) for the final computation of exp(w) */ +static const double S[] = { ++1.00000000000000000000e+00, /* 3FF0000000000000 */ ++1.02189714865411662714e+00, /* 3FF059B0D3158574 */ ++1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ ++1.06714040067682369717e+00, /* 3FF11301D0125B51 */ ++1.09050773266525768967e+00, /* 3FF172B83C7D517B */ ++1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ ++1.13878863475669156458e+00, /* 3FF2387A6E756238 */ ++1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ ++1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ ++1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ ++1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ ++1.26905095719173321989e+00, /* 3FF44E086061892D */ ++1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ ++1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ ++1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ ++1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ ++1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ ++1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ ++1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ ++1.50916442759342284141e+00, /* 3FF82589994CCE13 */ ++1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ ++1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ ++1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ ++1.64575547815396494578e+00, /* 3FFA5503B23E255D */ ++1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ ++1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ ++1.75625216037329945351e+00, /* 3FFC199BDD85529C */ ++1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ ++1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ ++1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ ++1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ ++1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * return tgammaf(x) in double for 8> 20) - 0x3ff; /* exponent of x, range:3-5 */ + ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ + __HI(y) = ix; + __LO(y) = lx; + __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ + __LO(z) = 0; + j = (ix >> 14) & 0x3f; + t1 = y + z; + t2 = y - z; + u = t2 / t1; + ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u)); + /* ss = log(x)-1 */ + /* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 + */ + z = one / x; + zz = z * z; + w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2); + k = (int) (w * invln2_32 + half); + + /* compute the exponential of w */ + j = k & 0x1f; + m = k >> 5; + z = w - (double) k *ln2_32; + zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2)); + __HI(zz) += m << 20; + return (zz); +} +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x + */ +static const double ks[] = { +-1.64493404985645811354476665052005342839447790544e+0000, ++8.11740794458351064092797249069438269367389272270e-0001, +-1.90703144603551216933075809162889536878854055202e-0001, ++2.55742333994264563281155312271481108635575331201e-0002, +}; +/* INDENT ON */ + +static double +kpsin(double x) { + double z; + + z = x * x; + return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z * + ks[3]))); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 + * = kc[0]+kc[1]*x +kc[2]*x +kc[3]*x + */ +static const double kc[] = { ++3.18309886183790671537767526745028724068919291480e-0001, +-1.57079581447762568199467875065854538626594937791e+0000, ++1.29183528092558692844073004029568674027807393862e+0000, +-4.20232949771307685981015914425195471602739075537e-0001, +}; +/* INDENT ON */ + +static double +kpcos(double x) { + double z; + + z = x * x; + return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3])); +} + +/* INDENT OFF */ +static const double +t0z1 = 0.134861805732790769689793935774652917006, +t0z2 = 0.461632144968362341262659542325721328468, +t0z3 = 0.819773101100500601787868704921606996312; + /* 1.134861805732790769689793935774652917006 */ +/* INDENT ON */ + +/* + * gamma(x+i) for 0 <= x < 1 + */ +static double +gam_n(int i, double x) { + double rr, yy; + double z1, z2; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845) { + if (x > 0.6374) + yy = GT3(x - t0z3); + else + yy = GT2(x - t0z2); + } else + yy = GT1(x - t0z1); + + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0= 0x7f800000) + return (xf * ((hx < 0)? 0.0F : xf)); /* +-Inf or NaN */ + + if (hx > 0x420C290F) /* x > 35.040096283... overflow */ + return (float)(x / tiny); + + if (hx >= 0x41000000) /* x >= 8 */ + return ((float) large_gam(x)); + + if (hx > 0) { /* x from 0 to 8 */ + i = (int) xf; + return ((float) gam_n(i, x - (double) i)); + } + + /* negative x */ + /* INDENT OFF */ + /* + * compute xk = + * -2 ... x is an even int (-inf is considered even) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; + if (ix >= 0x4b000000) { + if (ix > 0x4b000000) + xk = -2; + else + xk = -2 + (ix & 1); + } else if (ix >= 0x3f800000) { + k = (ix >> 23) - 0x7f; + j = ix >> (23 - k); + if ((j << (23 - k)) == ix) + xk = -2 + (j & 1); + else + xk = j & 1; + } + if (xk < 0) { + /* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */ + zf = xf - xf; + return (zf / zf); + } + + /* negative underflow thresold */ + if (ix > 0x4224000B) { /* x < -(41+11ulp) */ + if (xk == 0) + z = -tiny; + else + z = tiny; + return ((float)z); + } + + /* INDENT OFF */ + /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ + /* + * First compute ss = -sin(pi*y)/pi , so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + /* INDENT ON */ + y = -x; + j = (int) y; + z = y - (double) j; + if (z > 0.3183098861837906715377675) + if (z > 0.6816901138162093284622325) + ss = kpsin(one - z); + else + ss = kpcos(0.5 - z); + else + ss = kpsin(z); + if (xk == 0) + ss = -ss; + + /* Then compute ww = gamma(1+y) */ + if (j < 7) + ww = gam_n(j + 1, z); + else + ww = large_gam(y + one); + + /* return 1/(ss*ww) */ + return ((float) (one / (ww * ss))); +} diff --git a/usr/src/libm/src/m9x/tgammal.c b/usr/src/libm/src/m9x/tgammal.c new file mode 100644 index 0000000..b0297de --- /dev/null +++ b/usr/src/libm/src/m9x/tgammal.c @@ -0,0 +1,1166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tgammal.c 1.9 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak tgammal = __tgammal +#endif + +#include "libm.h" + +#if defined(__sparc) +#define H0_WORD(x) ((unsigned *) &x)[0] +#define H3_WORD(x) ((unsigned *) &x)[3] +#define CHOPPED(x) (long double) ((double) (x)) +#elif defined(__i386) +#define H0_WORD(x) ((((int *) &x)[2] << 16) | \ + (0x0000ffff & (((unsigned *) &x)[1] >> 15))) +#define H3_WORD(x) ((unsigned *) &x)[0] +#define CHOPPED(x) (long double) ((float) (x)) +#else +#error Unknown architecture +#endif + +struct LDouble { + long double h, l; +}; + +/* INDENT OFF */ +/* Primary interval GTi() */ +static const long double P1[] = { + +0.709086836199777919037185741507610124611513720557L, + +4.45754781206489035827915969367354835667391606951e-0001L, + +3.21049298735832382311662273882632210062918153852e-0002L, + -5.71296796342106617651765245858289197369688864350e-0003L, + +6.04666892891998977081619174969855831606965352773e-0003L, + +8.99106186996888711939627812174765258822658645168e-0004L, + -6.96496846144407741431207008527018441810175568949e-0005L, + +1.52597046118984020814225409300131445070213882429e-0005L, + +5.68521076168495673844711465407432189190681541547e-0007L, + +3.30749673519634895220582062520286565610418952979e-0008L, +}; +static const long double Q1[] = { + +1.0+0000L, + +1.35806511721671070408570853537257079579490650668e+0000L, + +2.97567810153429553405327140096063086994072952961e-0001L, + -1.52956835982588571502954372821681851681118097870e-0001L, + -2.88248519561420109768781615289082053597954521218e-0002L, + +1.03475311719937405219789948456313936302378395955e-0002L, + +4.12310203243891222368965360124391297374822742313e-0004L, + -3.12653708152290867248931925120380729518332507388e-0004L, + +2.36672170850409745237358105667757760527014332458e-0005L, +}; +static const long double P2[] = { + +0.428486815855585429730209907810650135255270600668084114L, + +2.62768479103809762805691743305424077975230551176e-0001L, + +3.81187532685392297608310837995193946591425896150e-0002L, + +3.00063075891811043820666846129131255948527925381e-0003L, + +2.47315407812279164228398470797498649142513408654e-0003L, + +3.62838199917848372586173483147214880464782938664e-0004L, + +3.43991105975492623982725644046473030098172692423e-0006L, + +4.56902151569603272237014240794257659159045432895e-0006L, + +2.13734755837595695602045100675540011352948958453e-0007L, + +9.74123440547918230781670266967882492234877125358e-0009L, +}; +static const long double Q2[] = { + +1.0L, + +9.18284118632506842664645516830761489700556179701e-0001L, + -6.41430858837830766045202076965923776189154874947e-0003L, + -1.24400885809771073213345747437964149775410921376e-0001L, + +4.69803798146251757538856567522481979624746875964e-0003L, + +7.18309447069495315914284705109868696262662082731e-0003L, + -8.75812626987894695112722600697653425786166399105e-0004L, + -1.23539972377769277995959339188431498626674835169e-0004L, + +3.10019017590151598732360097849672925448587547746e-0005L, + -1.77260223349332617658921874288026777465782364070e-0006L, +}; +static const long double P3[] = { + +0.3824094797345675048502747661075355640070439388902L, + +3.42198093076618495415854906335908427159833377774e-0001L, + +9.63828189500585568303961406863153237440702754858e-0002L, + +8.76069421042696384852462044188520252156846768667e-0003L, + +1.86477890389161491224872014149309015261897537488e-0003L, + +8.16871354540309895879974742853701311541286944191e-0004L, + +6.83783483674600322518695090864659381650125625216e-0005L, + -1.10168269719261574708565935172719209272190828456e-0006L, + +9.66243228508380420159234853278906717065629721016e-0007L, + +2.31858885579177250541163820671121664974334728142e-0008L, +}; +static const long double Q3[] = { + +1.0L, + +8.25479821168813634632437430090376252512793067339e-0001L, + -1.62251363073937769739639623669295110346015576320e-0002L, + -1.10621286905916732758745130629426559691187579852e-0001L, + +3.48309693970985612644446415789230015515365291459e-0003L, + +6.73553737487488333032431261131289672347043401328e-0003L, + -7.63222008393372630162743587811004613050245128051e-0004L, + -1.35792670669190631476784768961953711773073251336e-0004L, + +3.19610150954223587006220730065608156460205690618e-0005L, + -1.82096553862822346610109522015129585693354348322e-0006L, +}; + +static const long double +#if defined(__i386) +GZ1_h = 0.938204627909682449364570100414084663498215377L, +GZ1_l = 4.518346116624229420055327632718530617227944106e-20L, +GZ2_h = 0.885603194410888700264725126309883762587560340L, +GZ2_l = 1.409077427270497062039119290776508217077297169e-20L, +GZ3_h = 0.936781411463652321613537060640553022494714241L, +GZ3_l = 5.309836440284827247897772963887219035221996813e-21L, +#else +GZ1_h = 0.938204627909682449409753561580326910854647031L, +GZ1_l = 4.684412162199460089642452580902345976446297037e-35L, +GZ2_h = 0.885603194410888700278815900582588658192658794L, +GZ2_l = 7.501529273890253789219935569758713534641074860e-35L, +GZ3_h = 0.936781411463652321618846897080837818855399840L, +GZ3_l = 3.088721217404784363585591914529361687403776917e-35L, +#endif +TZ1 = -0.3517214357852935791015625L, +TZ3 = 0.280530631542205810546875L; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] + * ...assume yh got 53 or 24(i386) significant bits + */ +/* INDENT ON */ +static struct LDouble +GT1(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q1[8], t3 = P1[8] + y * P1[9], i = 7; i >= 0; i--) { + t4 = t4 * y + Q1[i]; + t3 = t3 * y + P1[i]; + } + t3 = (y * y) * t3 / t4; + t3 += (TZ1 * yl + GZ1_l); + t4 = TZ1 * yh; + r.h = CHOPPED((t4 + GZ1_h + t3)); + t3 += (t4 - (r.h - GZ1_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] + * ...assume yh got 53 significant bits + */ +/* INDENT ON */ +static struct LDouble +GT2(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q2[9], t3 = P2[9], i = 8; i >= 0; i--) { + t4 = t4 * y + Q2[i]; + t3 = t3 * y + P2[i]; + } + t3 = GZ2_l + (y * y) * t3 / t4; + r.h = CHOPPED((GZ2_h + t3)); + r.l = t3 - (r.h - GZ2_h); + return (r); +} + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] + * ...assume yh got 53 significant bits + */ +/* INDENT ON */ +static struct LDouble +GT3(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q3[9], t3 = P3[9], i = 8; i >= 0; i--) { + t4 = t4 * y + Q3[i]; + t3 = t3 * y + P3[i]; + } + t3 = (y * y) * t3 / t4; + t3 += (TZ3 * yl + GZ3_l); + t4 = TZ3 * yh; + r.h = CHOPPED((t4 + GZ3_h + t3)); + t3 += (t4 - (r.h - GZ3_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* Hex value of GP[0] shoule be 3FB55555 55555555 */ +static const long double GP[] = { + +0.083333333333333333333333333333333172839171301L, + -2.77777777777777777777777777492501211999399424104e-0003L, + +7.93650793650793650793635650541638236350020883243e-0004L, + -5.95238095238095238057299772679324503339241961704e-0004L, + +8.41750841750841696138422987977683524926142600321e-0004L, + -1.91752691752686682825032547823699662178842123308e-0003L, + +6.41025641022403480921891559356473451161279359322e-0003L, + -2.95506535798414019189819587455577003732808185071e-0002L, + +1.79644367229970031486079180060923073476568732136e-0001L, + -1.39243086487274662174562872567057200255649290646e+0000L, + +1.34025874044417962188677816477842265259608269775e+0001L, + -1.56803713480127469414495545399982508700748274318e+0002L, + +2.18739841656201561694927630335099313968924493891e+0003L, + -3.55249848644100338419187038090925410976237921269e+0004L, + +6.43464880437835286216768959439484376449179576452e+0005L, + -1.20459154385577014992600342782821389605893904624e+0007L, + +2.09263249637351298563934942349749718491071093210e+0008L, + -2.96247483183169219343745316433899599834685703457e+0009L, + +2.88984933605896033154727626086506756972327292981e+0010L, + -1.40960434146030007732838382416230610302678063984e+0011L, /* 19 */ +}; + +static const long double T3[] = { + +0.666666666666666666666666666666666634567834260213L, /* T3[0] */ + +0.400000000000000000000000000040853636176634934140L, /* T3[1] */ + +0.285714285714285714285696975252753987869020263448L, /* T3[2] */ + +0.222222222222222225593221101192317258554772129875L, /* T3[3] */ + +0.181818181817850192105847183461778186703779262916L, /* T3[4] */ + +0.153846169861348633757101285952333369222567014596L, /* T3[5] */ + +0.133033462889260193922261296772841229985047571265L, /* T3[6] */ +}; + +static const long double c[] = { +0.0L, +1.0L, +2.0L, +0.5L, +1.0e-4930L, /* tiny */ +4.18937683105468750000e-01L, /* hln2pim1_h */ +8.50099203991780329736405617639861397473637783412817152e-07L, /* hln2pim1_l */ +0.418938533204672741780329736405617639861397473637783412817152L, /* hln2pim1 */ +2.16608493865351192653179168701171875e-02L, /* ln2_32hi */ +5.96317165397058692545083025235937919875797669127130e-12L, /* ln2_32lo */ +46.16624130844682903551758979206054839765267053289554989233L, /* invln2_32 */ +#if defined(__i386) +1.7555483429044629170023839037639845628291e+03L, /* overflow */ +#else +1.7555483429044629170038892160702032034177e+03L, /* overflow */ +#endif +}; + +#define zero c[0] +#define one c[1] +#define two c[2] +#define half c[3] +#define tiny c[4] +#define hln2pim1_h c[5] +#define hln2pim1_l c[6] +#define hln2pim1 c[7] +#define ln2_32hi c[8] +#define ln2_32lo c[9] +#define invln2_32 c[10] +#define overflow c[11] + +/* + * |exp(r) - (1+r+Et0*r^2+...+Et10*r^12)| <= 2^(-128.88) for |r|<=ln2/64 + */ +static const long double Et[] = { + +5.0000000000000000000e-1L, + +1.66666666666666666666666666666828835166292152466e-0001L, + +4.16666666666666666666666666666693398646592712189e-0002L, + +8.33333333333333333333331748774512601775591115951e-0003L, + +1.38888888888888888888888845356011511394764753997e-0003L, + +1.98412698412698413237140350092993252684198882102e-0004L, + +2.48015873015873016080222025357442659895814371694e-0005L, + +2.75573192239028921114572986441972140933432317798e-0006L, + +2.75573192239448470555548102895526369739856219317e-0007L, + +2.50521677867683935940853997995937600214167232477e-0008L, + +2.08767928899010367374984448513685566514152147362e-0009L, +}; + +/* + * long double precision coefficients for computing log(x)-1 in tgamma. + * See "algorithm" for details + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + ... + T3[6]s^15 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-136.54) + */ +static const long double T1[] = { +-1.000000000000000000000000000000000000000000e+00L, + +0.000000000000000000000000000000000000000000e+00L, +-3.068528175354003906250000000000000000000000e-01L, +-1.904654299957767878541823431924500011926579e-09L, + +3.862943053245544433593750000000000000000000e-01L, + +5.579533617547508924291635313615100141107647e-08L, + +1.079441487789154052734375000000000000000000e+00L, + +5.389068187551732136437452970422650211661470e-08L, + +1.772588670253753662109375000000000000000000e+00L, + +5.198602757555955348583270627230200282215294e-08L, + +2.465735852718353271484375000000000000000000e+00L, + +5.008137327560178560729088284037750352769117e-08L, + +3.158883035182952880859375000000000000000000e+00L, + +4.817671897564401772874905940845299849351090e-08L, + +3.852030217647552490234375000000000000000000e+00L, + +4.627206467568624985020723597652849919904913e-08L, + +4.545177400112152099609375000000000000000000e+00L, + +4.436741037572848197166541254460399990458737e-08L, + +5.238324582576751708984375000000000000000000e+00L, + +4.246275607577071409312358911267950061012560e-08L, + +5.931471765041351318359375000000000000000000e+00L, + +4.055810177581294621458176568075500131566384e-08L, +}; + +/* + * T2[2i,2i+1] = log(1+i/64+1/128) + */ +static const long double T2[] = { + +7.7821016311645507812500000000000000000000e-03L, + +3.8810890398166212900061136763678127453570e-08L, + +2.3167014122009277343750000000000000000000e-02L, + +4.5159525100885049160962289916579411752759e-08L, + +3.8318812847137451171875000000000000000000e-02L, + +5.1454999148021880325123797290345960518164e-08L, + +5.3244471549987792968750000000000000000000e-02L, + +4.2968824489897120193786528776939573415076e-08L, + +6.7950606346130371093750000000000000000000e-02L, + +5.5562377378300815277772629414034632394030e-08L, + +8.2443654537200927734375000000000000000000e-02L, + +1.4673873663533785068668307805914095366600e-08L, + +9.6729576587677001953125000000000000000000e-02L, + +4.9870874110342446056487463437015041543346e-08L, + +1.1081433296203613281250000000000000000000e-01L, + +3.3378253981382306169323211928098474801099e-08L, + +1.2470346689224243164062500000000000000000e-01L, + +1.1608714804222781515380863268491613205318e-08L, + +1.3840228319168090820312500000000000000000e-01L, + +3.9667438227482200873601649187393160823607e-08L, + +1.5191602706909179687500000000000000000000e-01L, + +1.4956750178196803424896884511327584958252e-08L, + +1.6524952650070190429687500000000000000000e-01L, + +4.6394605258578736449277240313729237989366e-08L, + +1.7840760946273803710937500000000000000000e-01L, + +4.8010080260010025241510941968354682199540e-08L, + +1.9139480590820312500000000000000000000000e-01L, + +4.7091426329609298807561308873447039132856e-08L, + +2.0421552658081054687500000000000000000000e-01L, + +1.4847880344628820386196239272213742113867e-08L, + +2.1687388420104980468750000000000000000000e-01L, + +5.4099564554931589525744347498478964801484e-08L, + +2.2937405109405517578125000000000000000000e-01L, + +4.9970790654210230725046139871550961365282e-08L, + +2.4171990156173706054687500000000000000000e-01L, + +3.5325408107597432515913513900103385655073e-08L, + +2.5391519069671630859375000000000000000000e-01L, + +1.9284247135543573297906606667466299224747e-08L, + +2.6596349477767944335937500000000000000000e-01L, + +5.3719458497979750926537543389268821141517e-08L, + +2.7786844968795776367187500000000000000000e-01L, + +1.3154985425144750329234012330820349974537e-09L, + +2.8963327407836914062500000000000000000000e-01L, + +1.8504673536253893055525668970003860369760e-08L, + +3.0126130580902099609375000000000000000000e-01L, + +2.4769140784919125538233755492657352680723e-08L, + +3.1275570392608642578125000000000000000000e-01L, + +6.0778104626049965596883190321597861455475e-09L, + +3.2411944866180419921875000000000000000000e-01L, + +1.9992407776871920760434987352182336158873e-08L, + +3.3535552024841308593750000000000000000000e-01L, + +2.1672724744319679579814166199074433006807e-08L, + +3.4646672010421752929687500000000000000000e-01L, + +4.7241991051621587188425772950711830538414e-08L, + +3.5745584964752197265625000000000000000000e-01L, + +3.9274281801569759490140904474434669956562e-08L, + +3.6832553148269653320312500000000000000000e-01L, + +2.9676011119845105154050398826897178765758e-08L, + +3.7907832860946655273437500000000000000000e-01L, + +2.4325502905656478345631019858881408009210e-08L, + +3.8971674442291259765625000000000000000000e-01L, + +6.7171126157142136040035208670510556529487e-09L, + +4.0024316310882568359375000000000000000000e-01L, + +1.0181870233355751019951311700799406124957e-09L, + +4.1065990924835205078125000000000000000000e-01L, + +1.5736916335153056203175822787661567534220e-08L, + +4.2096924781799316406250000000000000000000e-01L, + +4.6826136472066367161506795972449857268707e-08L, + +4.3117344379425048828125000000000000000000e-01L, + +2.1024120852577922478955594998480144051225e-08L, + +4.4127452373504638671875000000000000000000e-01L, + +3.7069828842770746441661301225362605528786e-08L, + +4.5127463340759277343750000000000000000000e-01L, + +1.0731865811707192383079012478685922879010e-08L, + +4.6117568016052246093750000000000000000000e-01L, + +3.4961647705430499925597855358603099030515e-08L, + +4.7097969055175781250000000000000000000000e-01L, + +2.4667033200046897856056359251373510964634e-08L, + +4.8068851232528686523437500000000000000000e-01L, + +1.7020465042442243455448011551208861216878e-08L, + +4.9030393362045288085937500000000000000000e-01L, + +5.4424740957290971159645746860530583309571e-08L, + +4.9982786178588867187500000000000000000000e-01L, + +7.7705606579463314152470441415126573566105e-09L, + +5.0926184654235839843750000000000000000000e-01L, + +5.5247449548366574919228323824878565745713e-08L, + +5.1860773563385009765625000000000000000000e-01L, + +2.8574195534496726996364798698556235730848e-08L, + +5.2786707878112792968750000000000000000000e-01L, + +1.0839714455426392217778300963558522088193e-08L, + +5.3704142570495605468750000000000000000000e-01L, + +4.0191927599879229244153832299023744345999e-08L, + +5.4613238573074340820312500000000000000000e-01L, + +5.1867392242179272209231209163864971792889e-08L, + +5.5514144897460937500000000000000000000000e-01L, + +5.8565892217715480359515904050170125743178e-08L, + +5.6407010555267333984375000000000000000000e-01L, + +3.2732129626227634290090190711817681692354e-08L, + +5.7291972637176513671875000000000000000000e-01L, + +2.7190020372374006726626261068626400393936e-08L, + +5.8169168233871459960937500000000000000000e-01L, + +5.7295907882911235753725372340709967597394e-08L, + +5.9038740396499633789062500000000000000000e-01L, + +4.2637180036751291708123598757577783615014e-08L, + +5.9900814294815063476562500000000000000000e-01L, + +4.6697932764615975024461651502060474048774e-08L, + +6.0755521059036254882812500000000000000000e-01L, + +3.9634179246672960152791125371893149820625e-08L, + +6.1602985858917236328125000000000000000000e-01L, + +1.8626341656366315928196700650292529688219e-08L, + +6.2443327903747558593750000000000000000000e-01L, + +8.9744179151050387440546731199093039879228e-09L, + +6.3276666402816772460937500000000000000000e-01L, + +5.5428701049364114685035797584887586099726e-09L, + +6.4103114604949951171875000000000000000000e-01L, + +3.3371431779336851334405392546708949047361e-08L, + +6.4922791719436645507812500000000000000000e-01L, + +2.9430743363812714969905311122271269100885e-08L, + +6.5735805034637451171875000000000000000000e-01L, + +2.2361985518423140023245936165514147093250e-08L, + +6.6542261838912963867187500000000000000000e-01L, + +1.4155960810278217610006660181148303091649e-08L, + +6.7342263460159301757812500000000000000000e-01L, + +4.0610573702719835388801017264750843477878e-08L, + +6.8135917186737060546875000000000000000000e-01L, + +5.2940532463479321559568089441735584156689e-08L, + +6.8923324346542358398437500000000000000000e-01L, + +3.7773385396340539337814603903232796216537e-08L, +}; + +/* + * S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) + */ +static const long double S[] = { +#if defined(__i386) + +1.0000000000000000000000000e+00L, + +1.0218971486541166782081522e+00L, + +1.0442737824274138402382006e+00L, + +1.0671404006768236181297224e+00L, + +1.0905077326652576591003302e+00L, + +1.1143867425958925362894369e+00L, + +1.1387886347566916536971221e+00L, + +1.1637248587775775137938619e+00L, + +1.1892071150027210666875674e+00L, + +1.2152473599804688780476325e+00L, + +1.2418578120734840485256747e+00L, + +1.2690509571917332224885722e+00L, + +1.2968395546510096659215822e+00L, + +1.3252366431597412945939118e+00L, + +1.3542555469368927282668852e+00L, + +1.3839098819638319548151403e+00L, + +1.4142135623730950487637881e+00L, + +1.4451808069770466200253470e+00L, + +1.4768261459394993113155431e+00L, + +1.5091644275934227397133885e+00L, + +1.5422108254079408235859630e+00L, + +1.5759808451078864864006862e+00L, + +1.6104903319492543080837174e+00L, + +1.6457554781539648445110730e+00L, + +1.6817928305074290860378350e+00L, + +1.7186192981224779156032914e+00L, + +1.7562521603732994831094730e+00L, + +1.7947090750031071864148413e+00L, + +1.8340080864093424633989166e+00L, + +1.8741676341102999013002103e+00L, + +1.9152065613971472938202589e+00L, + +1.9571441241754002689657438e+00L, +#else + +1.00000000000000000000000000000000000e+00L, + +1.02189714865411667823448013478329942e+00L, + +1.04427378242741384032196647873992910e+00L, + +1.06714040067682361816952112099280918e+00L, + +1.09050773266525765920701065576070789e+00L, + +1.11438674259589253630881295691960313e+00L, + +1.13878863475669165370383028384151134e+00L, + +1.16372485877757751381357359909218536e+00L, + +1.18920711500272106671749997056047593e+00L, + +1.21524735998046887811652025133879836e+00L, + +1.24185781207348404859367746872659561e+00L, + +1.26905095719173322255441908103233805e+00L, + +1.29683955465100966593375411779245118e+00L, + +1.32523664315974129462953709549872168e+00L, + +1.35425554693689272829801474014070273e+00L, + +1.38390988196383195487265952726519287e+00L, + +1.41421356237309504880168872420969798e+00L, + +1.44518080697704662003700624147167095e+00L, + +1.47682614593949931138690748037404985e+00L, + +1.50916442759342273976601955103319352e+00L, + +1.54221082540794082361229186209073479e+00L, + +1.57598084510788648645527016018190504e+00L, + +1.61049033194925430817952066735740067e+00L, + +1.64575547815396484451875672472582254e+00L, + +1.68179283050742908606225095246642969e+00L, + +1.71861929812247791562934437645631244e+00L, + +1.75625216037329948311216061937531314e+00L, + +1.79470907500310718642770324212778174e+00L, + +1.83400808640934246348708318958828892e+00L, + +1.87416763411029990132999894995444645e+00L, + +1.91520656139714729387261127029583086e+00L, + +1.95714412417540026901832225162687149e+00L, +#endif +}; +static const long double S_trail[] = { +#if defined(__i386) + +0.0000000000000000000000000e+00L, + +2.6327965667180882569382524e-20L, + +8.3765863521895191129661899e-20L, + +3.9798705777454504249209575e-20L, + +1.0668046596651558640993042e-19L, + +1.9376009847285360448117114e-20L, + +6.7081819456112953751277576e-21L, + +1.9711680502629186462729727e-20L, + +2.9932584438449523689104569e-20L, + +6.8887754153039109411061914e-20L, + +6.8002718741225378942847820e-20L, + +6.5846917376975403439742349e-20L, + +1.2171958727511372194876001e-20L, + +3.5625253228704087115438260e-20L, + +3.1129551559077560956309179e-20L, + +5.7519192396164779846216492e-20L, + +3.7900651177865141593101239e-20L, + +1.1659262405698741798080115e-20L, + +7.1364385105284695967172478e-20L, + +5.2631003710812203588788949e-20L, + +2.6328853788732632868460580e-20L, + +5.4583950085438242788190141e-20L, + +9.5803254376938269960718656e-20L, + +7.6837733983874245823512279e-21L, + +2.4415965910835093824202087e-20L, + +2.6052966871016580981769728e-20L, + +2.6876456344632553875309579e-21L, + +1.2861930155613700201703279e-20L, + +8.8166633394037485606572294e-20L, + +2.9788615389580190940837037e-20L, + +5.2352341619805098677422139e-20L, + +5.2578463064010463732242363e-20L, +#else + +0.00000000000000000000000000000000000e+00L, + +1.80506787420330954745573333054573786e-35L, +-9.37452029228042742195756741973083214e-35L, +-1.59696844729275877071290963023149997e-35L, + +9.11249341012502297851168610167248666e-35L, +-6.50422820697854828723037477525938871e-35L, +-8.14846884452585113732569176748815532e-35L, +-5.06621457672180031337233074514290335e-35L, +-1.35983097468881697374987563824591912e-35L, + +9.49742763556319647030771056643324660e-35L, +-3.28317052317699860161506596533391526e-36L, +-5.01723570938719041029018653045842895e-35L, +-2.39147479768910917162283430160264014e-35L, +-8.35057135763390881529889073794408385e-36L, + +7.03675688907326504242173719067187644e-35L, +-5.18248485306464645753689301856695619e-35L, + +9.42224254862183206569211673639406488e-35L, +-3.96750082539886230916730613021641828e-35L, + +7.14352899156330061452327361509276724e-35L, + +1.15987125286798512424651783410044433e-35L, + +4.69693347835811549530973921320187447e-35L, +-3.38651317599500471079924198499981917e-35L, +-8.58731877429824706886865593510387445e-35L, +-9.60595154874935050318549936224606909e-35L, + +9.60973393212801278450755869714178581e-35L, + +6.37839792144002843924476144978084855e-35L, + +7.79243078569586424945646112516927770e-35L, + +7.36133776758845652413193083663393220e-35L, +-6.47299514791334723003521457561217053e-35L, + +8.58747441795369869427879806229522962e-35L, + +2.37181542282517483569165122830269098e-35L, +-3.02689168209611877300459737342190031e-37L, +#endif +}; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * return tgamma(x) scaled by 2**-m for 8> 16) - 0x3fff; /* exponent of x, range:3-10 */ + y = scalbnl(x, -n2); /* y = scale x to [1,2] */ + n2 += n2; /* 2n */ + j = (ix >> 10) & 0x3f; /* j */ + z = 1.0078125L + (long double) j * 0.015625L; /* z[j]=1+j/64+1/128 */ + j2 = j + j; + t1 = y + z; + t2 = y - z; + r = one / t1; + u = r * t2; /* u = (y-z)/(y+z) */ + t1 = CHOPPED(t1); + t4 = T2[j2 + 1] + T1[n2 + 1]; + z2 = u * u; + k = H0_WORD(u) & 0x7fffffff; + t3 = T2[j2] + T1[n2]; + for (t5 = T3[6], i = 5; i >= 0; i--) + t5 = z2 * t5 + T3[i]; + if ((k >> 16) < 0x3fec) { /* |u|<2**-19 */ + t2 = t4 + u * (two + z2 * t5); + } else { + t5 = t4 + (u * z2) * t5; + u2 = u + u; + v = (long double) ((int) (u2 * t24)) * p24; + t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z))); + t3 += v; + } + ss_h = CHOPPED((t2 + t3)); + ss_l = t2 - (ss_h - t3); +/* INDENT OFF */ +/* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 in already in extra precision + */ + /* INDENT ON */ + z = one / x; + r = x - half; + r_h = CHOPPED((r)); + w_h = r_h * ss_h + hln2pim1_h; + z2 = z * z; + w = (r - r_h) * ss_h + r * ss_l; + t1 = GP[19]; + for (i = 18; i > 0; i--) + t1 = z2 * t1 + GP[i]; + w += hln2pim1_l; + w_l = z * (GP[0] + z2 * t1) + w; + k = (int) ((w_h + w_l) * invln2_32 + half); + + /* compute the exponential of w_h+w_l */ + + j = k & 0x1f; + *m = k >> 5; + t3 = (long double) k; + + /* perform w - k*ln2_32 (represent as w_h - w_l) */ + t1 = w_h - t3 * ln2_32hi; + t2 = t3 * ln2_32lo; + w = t2 - w_l; + w_h = t1 - w; + w_l = w - (t1 - w_h); + + /* compute exp(w_h-w_l) */ + z = w_h - w_l; + for (t1 = Et[10], i = 9; i >= 0; i--) + t1 = z * t1 + Et[i]; + t3 = w_h - (w_l - (z * z) * t1); /* t3 = expm1(z) */ + zz.l = S_trail[j] * (one + t3) + S[j] * t3; + zz.h = S[j]; + return (zz); +} + +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 11 27 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x + ... + ks[12]*x + */ +static const long double ks[] = { + -1.64493406684822643647241516664602518705158902870e+0000L, + +8.11742425283353643637002772405874238094995726160e-0001L, + -1.90751824122084213696472111835337366232282723933e-0001L, + +2.61478478176548005046532613563241288115395517084e-0002L, + -2.34608103545582363750893072647117829448016479971e-0003L, + +1.48428793031071003684606647212534027556262040158e-0004L, + -6.97587366165638046518462722252768122615952898698e-0006L, + +2.53121740413702536928659271747187500934840057929e-0007L, + -7.30471182221385990397683641695766121301933621956e-0009L, + +1.71653847451163495739958249695549313987973589884e-0010L, + -3.34813314714560776122245796929054813458341420565e-0012L, + +5.50724992262622033449487808306969135431411753047e-0014L, + -7.67678132753577998601234393215802221104236979928e-0016L, +}; +/* INDENT ON */ + +/* + * assume x is not tiny and positive + */ +static struct LDouble +kpsin(long double x) { + long double z, t1, t2; + struct LDouble xx; + int i; + + z = x * x; + xx.h = x; + for (t2 = ks[12], i = 11; i > 0; i--) + t2 = z * t2 + ks[i]; + t1 = z * x; + t2 *= z * t1; + xx.l = t1 * ks[0] + t2; + return (xx); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 8 10 12 + * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x + * + * 2 4 6 8 10 22 + * = 1/pi - pi/2*x +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +...+kc[9]*x + * + * -pi/2*x*x = (npi_2_h + npi_2_l) * (x_f+x_l)*(x_f+x_l) + * = npi_2_h*(x_f+x_l)*(x_f+x_l) + npi_2_l*x*x + * = npi_2_h*x_f*x_f + npi_2_h*(x*x-x_f*x_f) + npi_2_l*x*x + * = npi_2_h*x_f*x_f + npi_2_h*(x+x_f)*(x-x_f) + npi_2_l*x*x + * Here x_f = (long double) (float)x + * Note that pi/2(in hex) = + * 1.921FB54442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 + * npi_2_h = -pi/2 chopped to 25 bits = -1.921FB50000000000000000000000000 = + * -1.570796310901641845703125000000000 and + * npi_2_l = + * -0.0000004442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 = + * -.0000000158932547735281966916397514420985846996875529104874722961539 = + * -1.5893254773528196691639751442098584699687552910487472296153e-8 + * 1/pi(in hex) = + * .517CC1B727220A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B + * will be splitted into: + * one_pi_h = 1/pi chopped to 48 bits = .517CC1B727220000000000... and + * one_pi_l = .0000000000000A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B + */ + +static const long double +#if defined(__i386) +one_pi_h = 0.3183098861481994390487670898437500L, /* 31 bits */ +one_pi_l = 3.559123248900043690127872406891929148e-11L, +#else +one_pi_h = 0.31830988618379052468299050815403461456298828125L, +one_pi_l = 1.46854777018590994109505931010230912897495334688117e-16L, +#endif +npi_2_h = -1.570796310901641845703125000000000L, +npi_2_l = -1.5893254773528196691639751442098584699687552910e-8L; + +static const long double kc[] = { + +1.29192819501249250731151312779548918765320728489e+0000L, + -4.25027339979557573976029596929319207009444090366e-0001L, + +7.49080661650990096109672954618317623888421628613e-0002L, + -8.21458866111282287985539464173976555436050215120e-0003L, + +6.14202578809529228503205255165761204750211603402e-0004L, + -3.33073432691149607007217330302595267179545908740e-0005L, + +1.36970959047832085796809745461530865597993680204e-0006L, + -4.41780774262583514450246512727201806217271097336e-0008L, + +1.14741409212381858820016567664488123478660705759e-0009L, + -2.44261236114707374558437500654381006300502749632e-0011L, +}; +/* INDENT ON */ + +/* + * assume x is not tiny and positive + */ +static struct LDouble +kpcos(long double x) { + long double z, t1, t2, t3, t4, x4, x8; + int i; + struct LDouble xx; + + z = x * x; + xx.h = one_pi_h; + t1 = (long double) ((float) x); + x4 = z * z; + t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1); + for (i = 8, t3 = kc[9]; i >= 0; i--) + t3 = z * t3 + kc[i]; + t3 = one_pi_l + x4 * t3; + t4 = t1 * t1 * npi_2_h; + x8 = t2 + t3; + xx.l = x8 + t4; + return (xx); +} + +/* INDENT OFF */ +static const long double + /* 0.13486180573279076968979393577465291700642511139552429398233 */ +#if defined(__i386) +t0z1 = 0.1348618057327907696779385054997035808810L, +t0z1_l = 1.1855430274949336125392717150257379614654e-20L, +#else +t0z1 = 0.1348618057327907696897939357746529168654L, +t0z1_l = 1.4102088588676879418739164486159514674310e-37L, +#endif + /* 0.46163214496836234126265954232572132846819620400644635129599 */ +#if defined(__i386) +t0z2 = 0.4616321449683623412538115843295472018326L, +t0z2_l = 8.84795799617412663558532305039261747030640e-21L, +#else +t0z2 = 0.46163214496836234126265954232572132343318L, +t0z2_l = 5.03501162329616380465302666480916271611101e-36L, +#endif + /* 0.81977310110050060178786870492160699631174407846245179119586 */ +#if defined(__i386) +t0z3 = 0.81977310110050060178773362329351925836817L, +t0z3_l = 1.350816280877379435658077052534574556256230e-22L +#else +t0z3 = 0.8197731011005006017878687049216069516957449L, +t0z3_l = 4.461599916947014419045492615933551648857380e-35L +#endif +; +/* INDENT ON */ + +/* + * gamma(x+i) for 0 <= x < 1 + */ +static struct LDouble +gam_n(int i, long double x) { + struct LDouble rr, yy; + long double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845L) { + if (x > 0.6374L) { + r1 = x - t0z3; + r2 = CHOPPED((r1 - t0z3_l)); + t2 = r1 - r2; + yy = GT3(r2, t2 - t0z3_l); + } else { + r1 = x - t0z2; + r2 = CHOPPED((r1 - t0z2_l)); + t2 = r1 - r2; + yy = GT2(r2, t2 - t0z2_l); + } + } else { + r1 = x - t0z1; + r2 = CHOPPED((r1 - t0z1_l)); + t2 = r1 - r2; + yy = GT1(r2, t2 - t0z1_l); + } + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0= 0x7fff0000) + return (x * ((hx < 0)? zero : x)); /* Inf or NaN */ + if (x > overflow) /* overflow threshold */ + return (x * 1.0e4932L); + if (hx >= 0x40020000) { /* x >= 8 */ + ww = large_gam(x, &m); + w = ww.h + ww.l; + return (scalbnl(w, m)); + } + + if (hx > 0) { /* x from 0 to 8 */ + i = (int) x; + ww = gam_n(i, x - (long double) i); + return (ww.h + ww.l); + } + /* INDENT OFF */ + /* negative x */ + /* + * compute xk = + * -2 ... x is an even int (-inf is considered an even #) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; +#if defined(__i386) + if (ix >= 0x403e0000) { /* x >= 2**63 } */ + if (ix >= 0x403f0000) + xk = -2; + else + xk = -2 + (lx & 1); +#else + if (ix >= 0x406f0000) { /* x >= 2**112 */ + if (ix >= 0x40700000) + xk = -2; + else + xk = -2 + (lx & 1); +#endif + } else if (ix >= 0x3fff0000) { + w = -x; + t1 = floorl(w); + t2 = t1 * half; + t3 = floorl(t2); + if (t1 == w) { + if (t2 == t3) + xk = -2; + else + xk = -1; + } else { + if (t2 == t3) + xk = 0; + else + xk = 1; + } + } + + if (xk < 0) { + /* return NaN. Ideally gamma(-n)= (-1)**(n+1) * inf */ + return (x - x) / (x - x); + } + + /* + * negative underflow thresold -(1774+9ulp) + */ + if (x < -1774.0000000000000000000000000000017749370L) { + z = tiny / x; + if (xk == 1) + z = -z; + return (z * tiny); + } + + /* INDENT OFF */ + /* + * now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x + */ + /* + * First compute ss = -sin(pi*y)/pi so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + /* INDENT ON */ + y = -x; + j = (int) y; + z = y - (long double) j; + if (z > 0.3183098861837906715377675L) + if (z > 0.6816901138162093284622325L) + ss = kpsin(one - z); + else + ss = kpcos(0.5L - z); + else + ss = kpsin(z); + if (xk == 0) { + ss.h = -ss.h; + ss.l = -ss.l; + } + + /* Then compute ww = gamma(1+y), note that result scale to 2**m */ + m = 0; + if (j < 7) { + ww = gam_n(j + 1, z); + } else { + w = y + one; + if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */ + ww = large_gam(w, &m); + } else { + t = w - one; + if (t == y) { /* y+one exact */ + ww = large_gam(w, &m); + } else { /* use y*gamma(y) */ + if (j == 7) + ww = gam_n(j, z); + else + ww = large_gam(y, &m); + t4 = ww.h + ww.l; + t1 = CHOPPED((y)); + t2 = CHOPPED((t4)); + /* t4 will not be too large */ + ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2; + ww.h = t1 * t2; + } + } + } + + /* compute 1/(ss*ww) */ + t3 = ss.h + ss.l; + t4 = ww.h + ww.l; + t1 = CHOPPED((t3)); + t2 = CHOPPED((t4)); + z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */ + z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */ + t3 = t3 * t4; /* t3 = ss*ww */ + z3 = one / t3; /* z3 = 1/(ss*ww) */ + t5 = t1 * t2; + z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */ + t1 = CHOPPED((t3)); /* (t1,z1) = ss*ww */ + z1 = z5 - (t1 - t5); + t2 = CHOPPED((z3)); /* leading 1/(ss*ww) */ + z2 = z3 * (t2 * z1 - (one - t2 * t1)); + z = t2 - z2; + + return (scalbnl(z, -m)); +} diff --git a/usr/src/libm/src/m9x/trunc.c b/usr/src/libm/src/m9x/trunc.c new file mode 100644 index 0000000..3797730 --- /dev/null +++ b/usr/src/libm/src/m9x/trunc.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)trunc.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak trunc = __trunc +#endif + +#include "libm.h" + +double +trunc(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) /* |x| < 1 */ + return (sx ? -0.0 : 0.0); + + /* chop x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] &= ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] &= ~(i | (i - 1)); + } + return (xx.d); + } else if (hx < 0x7ff00000) + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/truncf.c b/usr/src/libm/src/m9x/truncf.c new file mode 100644 index 0000000..e2cc454 --- /dev/null +++ b/usr/src/libm/src/m9x/truncf.c @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)truncf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak truncf = __truncf +#endif + +#include "libm.h" + +float +truncf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) /* |x| < 1 */ + return (sx ? -0.0F : 0.0F); + + /* chop x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i &= ~((i << 1) - 1); + return (xx.f); + } else if (hx < 0x7f800000) /* |x| is integral */ + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx > 0x7f800000 ? x * x : x + x); +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/truncl.c b/usr/src/libm/src/m9x/truncl.c new file mode 100644 index 0000000..ba0724a --- /dev/null +++ b/usr/src/libm/src/m9x/truncl.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)truncl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak truncl = __truncl +#endif + +#include "libm.h" + +#if defined(__sparc) +long double +truncl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */ + return (hx >= 0x7fff0000 ? x + x : x); + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) + return (sx ? -0.0L : 0.0L); + + j = 0x406f - (hx >> 16); /* 1 <= j <= 112 */ + xx.i[0] = hx; + if (j >= 96) { /* 96 <= j <= 112 */ + xx.i[0] &= ~((1 << (j - 96)) - 1); + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } else if (j >= 64) { /* 64 <= j <= 95 */ + xx.i[1] &= ~((1 << (j - 64)) - 1); + xx.i[2] = xx.i[3] = 0; + } else if (j >= 32) { /* 32 <= j <= 63 */ + xx.i[2] &= ~((1 << (j - 32)) - 1); + xx.i[3] = 0; + } else /* 1 <= j <= 31 */ + xx.i[3] &= ~((1 << j) - 1); + + /* negate result if need be */ + if (sx) + xx.i[0] |= 0x80000000; + return (xx.q); +} +#elif defined(__i386) +long double +truncl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) /* |x| < 1 */ + return (sx ? -0.0L : 0.0L); + + /* chop x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] &= ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] &= ~(i | (i - 1)); + } + return (xx.e); + } else if (ex < 0x7fff) /* x is integral */ + return (x); + else /* inf or nan */ + return (x + x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/mvec/__vTBL_atan1.c b/usr/src/libm/src/mvec/__vTBL_atan1.c new file mode 100644 index 0000000..b3b4b37 --- /dev/null +++ b/usr/src/libm/src/mvec/__vTBL_atan1.c @@ -0,0 +1,616 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vTBL_atan1.c 1.3 06/01/31 SMI" + +#pragma align 32 (__vlibm_TBL_atan1) +const double __vlibm_TBL_atan1[] = { + +/* i= -2 conup conlo = 0.0 */ 0.0 , 0.0 , +/* i= -1 PI/2 upper, lower */ 1.570796326794896558E+00, 6.123233995736765886e-17, + /* 3ff921fb54442d18, 3c91a62633145c07, */ + + +/* i= 0 atan(3F900000...) */ 1.56237286204768313E-02, -4.91360013656630395E-19, +/* i= 0 atan(3F900000...) 3F8FFF555BBB729B, BC2220C39D4DFF50, */ + +/* i= 1 atan(3F910000...) */ 1.66000375562312640E-02, 1.12189118956867269E-18, +/* i= 1 atan(3F910000...) 3F90FF99A9AA60D7, 3C34B1FB39D277D8, */ + +/* i= 2 atan(3F920000...) */ 1.75763148444955872E-02, 6.59519250301009539E-19, +/* i= 2 atan(3F920000...) 3F91FF8685C3E636, 3C2854FBB35044B1, */ + +/* i= 3 atan(3F930000...) */ 1.85525586258889763E-02, 1.39203477545012197E-19, +/* i= 3 atan(3F930000...) 3F92FF712238A4B8, 3C048AF56CEBE552, */ + +/* i= 4 atan(3F940000...) */ 1.95287670414137082E-02, -9.79999553454266918E-19, +/* i= 4 atan(3F940000...) 3F93FF595F18A700, BC3213EAC36CFB2C, */ + +/* i= 5 atan(3F950000...) */ 2.05049382324763683E-02, -8.40094761552091156E-19, +/* i= 5 atan(3F950000...) 3F94FF3F1C75BEE7, BC2EFE787F0F4330, */ + +/* i= 6 atan(3F960000...) */ 2.14810703409090559E-02, -4.19450646799657488E-20, +/* i= 6 atan(3F960000...) 3F95FF223A639D5C, BBE8C28F1824574A, */ + +/* i= 7 atan(3F970000...) */ 2.24571615089905717E-02, -1.30959312135654387E-18, +/* i= 7 atan(3F970000...) 3F96FF0298F7EA3F, BC382860F0066622, */ + +/* i= 8 atan(3F980000...) */ 2.34332098794675855E-02, -1.09469246421805015E-18, +/* i= 8 atan(3F980000...) 3F97FEE0184A5C36, BC343189FC0A354B, */ + +/* i= 9 atan(3F990000...) */ 2.44092135955758099E-02, -1.47897509599299710E-18, +/* i= 9 atan(3F990000...) 3F98FEBA9874D084, BC3B48432E1BE204, */ + +/* i= 10 atan(3F9A0000...) */ 2.53851708010611396E-02, -1.34303200040391535E-18, +/* i= 10 atan(3F9A0000...) 3F99FE91F99362D6, BC38C64A0FD5DBE3, */ + +/* i= 11 atan(3F9B0000...) */ 2.63610796402007873E-02, 1.37267443271608158E-18, +/* i= 11 atan(3F9B0000...) 3F9AFE661BC4850F, 3C395245904A67C3, */ + +/* i= 12 atan(3F9C0000...) */ 2.73369382578244127E-02, -8.16108165671393861E-19, +/* i= 12 atan(3F9C0000...) 3F9BFE36DF291712, BC2E1BEC7756100E, */ + +/* i= 13 atan(3F9D0000...) */ 2.83127447993351995E-02, 8.59249306270865423E-19, +/* i= 13 atan(3F9D0000...) 3F9CFE0423E47E7D, 3C2FB36157FAFE79, */ + +/* i= 14 atan(3F9E0000...) */ 2.92884974107309737E-02, -7.76024364493026302E-19, +/* i= 14 atan(3F9E0000...) 3F9DFDCDCA1CBE70, BC2CA157C8222A15, */ + +/* i= 15 atan(3F9F0000...) */ 3.02641942386252458E-02, -1.66574467444210944E-18, +/* i= 15 atan(3F9F0000...) 3F9EFD93B1FA8F3E, BC3EBA41BEEDF844, */ + +/* i= 16 atan(3FA00000...) */ 3.12398334302682774E-02, -1.18844271158774798E-18, +/* i= 16 atan(3FA00000...) 3F9FFD55BBA97625, BC35EC431444912C, */ + +/* i= 17 atan(3FA10000...) */ 3.31909314971115949E-02, -9.42939153905567217E-19, +/* i= 17 atan(3FA10000...) 3FA0FE66DA9B94EE, BC3164E77D4EB175, */ + +/* i= 18 atan(3FA20000...) */ 3.51417768027967800E-02, 2.65885150818196357E-18, +/* i= 18 atan(3FA20000...) 3FA1FE1A5C2EC497, 3C4886091E8FC4CB, */ + +/* i= 19 atan(3FA30000...) */ 3.70923545503918164E-02, -1.94050652720581784E-18, +/* i= 19 atan(3FA30000...) 3FA2FDC4E3737DDD, BC41E5E438D0BA04, */ + +/* i= 20 atan(3FA40000...) */ 3.90426499551669928E-02, 6.27126337421308897E-19, +/* i= 20 atan(3FA40000...) 3FA3FD65F169C9D9, 3C27230A716461B5, */ + +/* i= 21 atan(3FA50000...) */ 4.09926482452637811E-02, 2.47687641119150859E-18, +/* i= 21 atan(3FA50000...) 3FA4FCFD072DFF79, 3C46D85BEC38D078, */ + +/* i= 22 atan(3FA60000...) */ 4.29423346623621707E-02, 2.03095297887322147E-18, +/* i= 22 atan(3FA60000...) 3FA5FC89A5FA3B2D, 3C42BB73BF4E7F99, */ + +/* i= 23 atan(3FA70000...) */ 4.48916944623464972E-02, 2.31751818996581527E-19, +/* i= 23 atan(3FA70000...) 3FA6FC0B4F27D5BB, 3C1119AB07E9C009, */ + +/* i= 24 atan(3FA80000...) */ 4.68407129159696539E-02, -1.65567744225495210E-19, +/* i= 24 atan(3FA80000...) 3FA7FB818430DA2A, BC086EF8F794F105, */ + +/* i= 25 atan(3FA90000...) */ 4.87893753095156174E-02, 2.91348767453902927E-18, +/* i= 25 atan(3FA90000...) 3FA8FAEBC6B17ABA, 3C4ADF473CC8D797, */ + +/* i= 26 atan(3FAA0000...) */ 5.07376669454602178E-02, 2.07462271032410652E-18, +/* i= 26 atan(3FAA0000...) 3FA9FA49986984DF, 3C4322907AF0ABC2, */ + +/* i= 27 atan(3FAB0000...) */ 5.26855731431300420E-02, 2.86866232988833092E-18, +/* i= 27 atan(3FAB0000...) 3FAAF99A7B3DD42F, 3C4A756FFAAB786E, */ + +/* i= 28 atan(3FAC0000...) */ 5.46330792393594777E-02, -2.66980035901898370E-18, +/* i= 28 atan(3FAC0000...) 3FABF8DDF139C444, BC489FE34B2A7FA8, */ + +/* i= 29 atan(3FAD0000...) */ 5.65801705891457105E-02, 3.25489507698250449E-18, +/* i= 29 atan(3FAD0000...) 3FACF8137C90A177, 3C4E0567596F063F, */ + +/* i= 30 atan(3FAE0000...) */ 5.85268325663017702E-02, -2.48271181407783583E-19, +/* i= 30 atan(3FAE0000...) 3FADF73A9F9F1882, BC1251B5C410BCB4, */ + +/* i= 31 atan(3FAF0000...) */ 6.04730505641073168E-02, -5.66989890333967427E-19, +/* i= 31 atan(3FAF0000...) 3FAEF652DCECA4DC, BC24EB116F8EA623, */ + +/* i= 32 atan(3FB00000...) */ 6.24188099959573500E-02, -1.54907563082950458E-18, +/* i= 32 atan(3FB00000...) 3FAFF55BB72CFDEA, BC3C934D86D23F1D, */ + +/* i= 33 atan(3FB10000...) */ 6.63088949198234884E-02, -4.88592398930400059E-19, +/* i= 33 atan(3FB10000...) 3FB0F99EA71D52A7, BC22069FEEC3624F, */ + +/* i= 34 atan(3FB20000...) */ 7.01969710718705203E-02, -1.79819216032204589E-18, +/* i= 34 atan(3FB20000...) 3FB1F86DBF082D59, BC4095DC7732EF81, */ + +/* i= 35 atan(3FB30000...) */ 7.40829225490337306E-02, 1.35448289530322996E-19, +/* i= 35 atan(3FB30000...) 3FB2F719318A4A9A, 3C03FD1779B9801F, */ + +/* i= 36 atan(3FB40000...) */ 7.79666338315423008E-02, 5.80455187314335664E-18, +/* i= 36 atan(3FB40000...) 3FB3F59F0E7C559D, 3C5AC4CE285DF847, */ + +/* i= 37 atan(3FB50000...) */ 8.18479898030765457E-02, 1.73846131383378367E-18, +/* i= 37 atan(3FB50000...) 3FB4F3FD677292FB, 3C4008D36264979E, */ + +/* i= 38 atan(3FB60000...) */ 8.57268757707448092E-02, 5.34719414350295085E-18, +/* i= 38 atan(3FB60000...) 3FB5F2324FD2D7B2, 3C58A8DA4401318E, */ + +/* i= 39 atan(3FB70000...) */ 8.96031774848717461E-02, -1.08082588355136405E-18, +/* i= 39 atan(3FB70000...) 3FB6F03BDCEA4B0D, BC33F00E512FA17D, */ + +/* i= 40 atan(3FB80000...) */ 9.34767811585894698E-02, -6.28447259954209545E-18, +/* i= 40 atan(3FB80000...) 3FB7EE182602F10F, BC5CFB654C0C3D98, */ + +/* i= 41 atan(3FB90000...) */ 9.73475734872236709E-02, 2.51506589544357698E-18, +/* i= 41 atan(3FB90000...) 3FB8EBC54478FB28, 3C4732880CAD24CC, */ + +/* i= 42 atan(3FBA0000...) */ 1.01215441667466668E-01, 5.68120255862341373E-18, +/* i= 42 atan(3FBA0000...) 3FB9E94153CFDCF1, 3C5A332E1D69C47E, */ + +/* i= 43 atan(3FBB0000...) */ 1.05080273416329528E-01, 3.03631931857741762E-18, +/* i= 43 atan(3FBB0000...) 3FBAE68A71C722B8, 3C4C014E6910B9DB, */ + +/* i= 44 atan(3FBC0000...) */ 1.08941956989865793E-01, 6.82671220724095851E-18, +/* i= 44 atan(3FBC0000...) 3FBBE39EBE6F07C3, 3C5F7B8F29A05987, */ + +/* i= 45 atan(3FBD0000...) */ 1.12800381201659389E-01, 1.86724154759436245E-18, +/* i= 45 atan(3FBD0000...) 3FBCE07C5C3CCA32, 3C4138E6425918A7, */ + +/* i= 46 atan(3FBE0000...) */ 1.16655435441069349E-01, 5.48792581210869929E-18, +/* i= 46 atan(3FBE0000...) 3FBDDD21701EBA6E, 3C594EFFCD76FE58, */ + +/* i= 47 atan(3FBF0000...) */ 1.20507009691224562E-01, -5.32529096262256550E-19, +/* i= 47 atan(3FBF0000...) 3FBED98C2190043B, BC23A598592C7B13, */ + +/* i= 48 atan(3FC00000...) */ 1.24354994546761438E-01, -3.12532414245393831E-18, +/* i= 48 atan(3FC00000...) 3FBFD5BA9AAC2F6E, BC4CD37686760C17, */ + +/* i= 49 atan(3FC10000...) */ 1.32039761614638762E-01, -1.27692540070995953E-17, +/* i= 49 atan(3FC10000...) 3FC0E6ADCCF40882, BC6D71A31BB98D0D, */ + +/* i= 50 atan(3FC20000...) */ 1.39708874289163648E-01, -2.95798642473158131E-18, +/* i= 50 atan(3FC20000...) 3FC1E1FAFB043727, BC4B485914DACF8C, */ + +/* i= 51 atan(3FC30000...) */ 1.47361481088651630E-01, 5.40959914766629796E-18, +/* i= 51 atan(3FC30000...) 3FC2DCBDB2FBA1FF, 3C58F28705561534, */ + +/* i= 52 atan(3FC40000...) */ 1.54996741923940973E-01, 9.58541559411432383E-18, +/* i= 52 atan(3FC40000...) 3FC3D6EEE8C6626C, 3C661A3B0CE9281B, */ + +/* i= 53 atan(3FC50000...) */ 1.62613828597948568E-01, 7.78447064310625246E-18, +/* i= 53 atan(3FC50000...) 3FC4D087A9DA4F17, 3C61F323F1ADF158, */ + +/* i= 54 atan(3FC60000...) */ 1.70211925285474408E-01, -3.54116407980212514E-18, +/* i= 54 atan(3FC60000...) 3FC5C9811E3EC26A, BC5054AB2C010F3D, */ + +/* i= 55 atan(3FC70000...) */ 1.77790228992676075E-01, -4.02958210085442233E-18, +/* i= 55 atan(3FC70000...) 3FC6C1D4898933D9, BC52954A7603C427, */ + +/* i= 56 atan(3FC80000...) */ 1.85347949995694761E-01, 4.18069226884307898E-18, +/* i= 56 atan(3FC80000...) 3FC7B97B4BCE5B02, 3C5347B0B4F881CA, */ + +/* i= 57 atan(3FC90000...) */ 1.92884312257974672E-01, -7.41459017624724575E-18, +/* i= 57 atan(3FC90000...) 3FC8B06EE2879C29, BC6118CD30308C4F, */ + +/* i= 58 atan(3FCA0000...) */ 2.00398553825878512E-01, 3.13995428718444929E-18, +/* i= 58 atan(3FCA0000...) 3FC9A6A8E96C8626, 3C4CF601E7B4348E, */ + +/* i= 59 atan(3FCB0000...) */ 2.07889927202262986E-01, 7.33316066652089850E-18, +/* i= 59 atan(3FCB0000...) 3FCA9C231B403279, 3C60E8BBE89CCA85, */ + +/* i= 60 atan(3FCC0000...) */ 2.15357699697738048E-01, 4.73816013007873192E-19, +/* i= 60 atan(3FCC0000...) 3FCB90D7529260A2, 3C217B10D2E0E5AA, */ + +/* i= 61 atan(3FCD0000...) */ 2.22801153759394521E-01, -5.49882217244684317E-18, +/* i= 61 atan(3FCD0000...) 3FCC84BF8A742E6E, BC595BDD0682EA26, */ + +/* i= 62 atan(3FCE0000...) */ 2.30219587276843718E-01, 1.23134045291427032E-17, +/* i= 62 atan(3FCE0000...) 3FCD77D5DF205736, 3C6C648D1534597E, */ + +/* i= 63 atan(3FCF0000...) */ 2.37612313865471242E-01, 1.05823143137111299E-17, +/* i= 63 atan(3FCF0000...) 3FCE6A148E96EC4D, 3C6866B22029F765, */ + +/* i= 64 atan(3FD00000...) */ 2.44978663126864143E-01, 1.06987556187344514E-17, +/* i= 64 atan(3FD00000...) 3FCF5B75F92C80DD, 3C68AB6E3CF7AFBD, */ + +/* i= 65 atan(3FD10000...) */ 2.59629629408257512E-01, 1.92387549246153041E-17, +/* i= 65 atan(3FD10000...) 3FD09DC597D86362, 3C762E47390CB865, */ + +/* i= 66 atan(3FD20000...) */ 2.74167451119658789E-01, 8.26135357516377194E-18, +/* i= 66 atan(3FD20000...) 3FD18BF5A30BF178, 3C630CA4748B1BF8, */ + +/* i= 67 atan(3FD30000...) */ 2.88587361894077410E-01, -1.42836995737725708E-17, +/* i= 67 atan(3FD30000...) 3FD278372057EF46, BC7077CDD36DFC81, */ + +/* i= 68 atan(3FD40000...) */ 3.02884868374971417E-01, -1.10108279030013690E-17, +/* i= 68 atan(3FD40000...) 3FD362773707EBCC, BC6963A544B672D8, */ + +/* i= 69 atan(3FD50000...) */ 3.17055753209147029E-01, -1.89392892429264215E-17, +/* i= 69 atan(3FD50000...) 3FD44AA436C2AF0A, BC75D5E43C55B3BA, */ + +/* i= 70 atan(3FD60000...) */ 3.31096076704132103E-01, -7.95261037579379870E-18, +/* i= 70 atan(3FD60000...) 3FD530AD9951CD4A, BC62566480884082, */ + +/* i= 71 atan(3FD70000...) */ 3.45002177207105132E-01, -2.29388047555783039E-17, +/* i= 71 atan(3FD70000...) 3FD614840309CFE2, BC7A725715711F00, */ + +/* i= 72 atan(3FD80000...) */ 3.58770670270572245E-01, -2.46238155826386349E-17, +/* i= 72 atan(3FD80000...) 3FD6F61941E4DEF1, BC7C63AAE6F6E918, */ + +/* i= 73 atan(3FD90000...) */ 3.72398446676754202E-01, 1.96123115048456534E-17, +/* i= 73 atan(3FD90000...) 3FD7D5604B63B3F7, 3C769C885C2B249A, */ + +/* i= 74 atan(3FDA0000...) */ 3.85882669398073752E-01, 2.37882273249194087E-17, +/* i= 74 atan(3FDA0000...) 3FD8B24D394A1B25, 3C7B6D0BA3748FA8, */ + +/* i= 75 atan(3FDB0000...) */ 3.99220769575252543E-01, 2.24659810561704206E-17, +/* i= 75 atan(3FDB0000...) 3FD98CD5454D6B18, 3C79E6C988FD0A77, */ + +/* i= 76 atan(3FDC0000...) */ 4.12410441597387323E-01, -1.58765222777068909E-17, +/* i= 76 atan(3FDC0000...) 3FDA64EEC3CC23FD, BC724DEC1B50B7FF, */ + +/* i= 77 atan(3FDD0000...) */ 4.25449637370042266E-01, 2.33155307418928847E-17, +/* i= 77 atan(3FDD0000...) 3FDB3A911DA65C6C, 3C7AE187B1CA5040, */ + +/* i= 78 atan(3FDE0000...) */ 4.38336559857957830E-01, -2.49427703062654091E-17, +/* i= 78 atan(3FDE0000...) 3FDC0DB4C94EC9F0, BC7CC1CE70934C34, */ + +/* i= 79 atan(3FDF0000...) */ 4.51069655988523499E-01, -2.27037952294204745E-17, +/* i= 79 atan(3FDF0000...) 3FDCDE53432C1351, BC7A2CFA4418F1AD, */ + +/* i= 80 atan(3FE00000...) */ 4.63647609000806094E-01, 2.26987774529616871E-17, +/* i= 80 atan(3FE00000...) 3FDDAC670561BB4F, 3C7A2B7F222F65E2, */ + +/* i= 81 atan(3FE10000...) */ 4.88333951056405535E-01, -1.13732361893295846E-17, +/* i= 81 atan(3FE10000...) 3FDF40DD0B541418, BC6A3992DC382A23, */ + +/* i= 82 atan(3FE20000...) */ 5.12389460310737732E-01, -2.54627814728558035E-17, +/* i= 82 atan(3FE20000...) 3FE0657E94DB30D0, BC7D5B495F6349E6, */ + +/* i= 83 atan(3FE30000...) */ 5.35811237960463704E-01, -4.06379568348255750E-18, +/* i= 83 atan(3FE30000...) 3FE1255D9BFBD2A9, BC52BDAEE1C0EE35, */ + +/* i= 84 atan(3FE40000...) */ 5.58599315343562441E-01, -5.45563054859162639E-18, +/* i= 84 atan(3FE40000...) 3FE1E00BABDEFEB4, BC5928DF287A668F, */ + +/* i= 85 atan(3FE50000...) */ 5.80756353567670414E-01, -1.44146437819306691E-17, +/* i= 85 atan(3FE50000...) 3FE2958E59308E31, BC709E73B0C6C087, */ + +/* i= 86 atan(3FE60000...) */ 6.02287346134964152E-01, 2.95043073722840231E-17, +/* i= 86 atan(3FE60000...) 3FE345F01CCE37BB, 3C81021137C71102, */ + +/* i= 87 atan(3FE70000...) */ 6.23199329934065904E-01, 2.67240388514009508E-17, +/* i= 87 atan(3FE70000...) 3FE3F13FB89E96F4, 3C7ECF8B492644F0, */ + +/* i= 88 atan(3FE80000...) */ 6.43501108793284371E-01, 1.58347850514442862E-17, +/* i= 88 atan(3FE80000...) 3FE4978FA3269EE1, 3C72419A87F2A458, */ + +/* i= 89 atan(3FE90000...) */ 6.63202992706093286E-01, -3.07605486442964900E-17, +/* i= 89 atan(3FE90000...) 3FE538F57B89061F, BC81BB74ABDA520C, */ + +/* i= 90 atan(3FEA0000...) */ 6.82316554874748071E-01, 6.94322367156000774E-18, +/* i= 90 atan(3FEA0000...) 3FE5D58987169B18, 3C60028E4BC5E7CA, */ + +/* i= 91 atan(3FEB0000...) */ 7.00854407884450192E-01, -1.98762623433581612E-17, +/* i= 91 atan(3FEB0000...) 3FE66D663923E087, BC76EA6FEBE8BBBA, */ + +/* i= 92 atan(3FEC0000...) */ 7.18829999621624527E-01, -2.14783884444569830E-17, +/* i= 92 atan(3FEC0000...) 3FE700A7C5784634, BC78C34D25AADEF6, */ + +/* i= 93 atan(3FED0000...) */ 7.36257428981428097E-01, 3.47393764829945672E-17, +/* i= 93 atan(3FED0000...) 3FE78F6BBD5D315E, 3C8406A089803740, */ + +/* i= 94 atan(3FEE0000...) */ 7.53151280962194414E-01, -2.42569346591820681E-17, +/* i= 94 atan(3FEE0000...) 3FE819D0B7158A4D, BC7BF76229D3B917, */ + +/* i= 95 atan(3FEF0000...) */ 7.69526480405658297E-01, -3.70499190560272129E-17, +/* i= 95 atan(3FEF0000...) 3FE89FF5FF57F1F8, BC855B9A5E177A1B, */ + +/* i= 96 atan(3FF00000...) */ 7.85398163397448279E-01, 3.06161699786838302E-17, +/* i= 96 atan(3FF00000...) 3FE921FB54442D18, 3C81A62633145C07, */ + +/* i= 97 atan(3FF10000...) */ 8.15691923316223422E-01, -1.07145656277874308E-17, +/* i= 97 atan(3FF10000...) 3FEA1A25F2C82506, BC68B4C3611182FC, */ + +/* i= 98 atan(3FF20000...) */ 8.44153986113171051E-01, -4.84133701193491676E-17, +/* i= 98 atan(3FF20000...) 3FEB034F38649C88, BC8BE88D6936F833, */ + +/* i= 99 atan(3FF30000...) */ 8.70903457075652976E-01, -2.26982359074728705E-17, +/* i= 99 atan(3FF30000...) 3FEBDE70ED439FE7, BC7A2B56372C05EF, */ + +/* i= 100 atan(3FF40000...) */ 8.96055384571343927E-01, 2.92387628577430489E-17, +/* i= 100 atan(3FF40000...) 3FECAC7C57846F9E, 3C80DAE13AD18A6B, */ + +/* i= 101 atan(3FF50000...) */ 9.19719605350416858E-01, -4.05743941285276792E-17, +/* i= 101 atan(3FF50000...) 3FED6E57CF4F0ACA, BC8763B9456AE66E, */ + +/* i= 102 atan(3FF60000...) */ 9.42000040379463610E-01, 5.46083748584668763E-17, +/* i= 102 atan(3FF60000...) 3FEE24DD44C855D1, 3C8F7AC612AB33D8, */ + +/* i= 103 atan(3FF70000...) */ 9.62994330680936206E-01, -3.98666059521075245E-18, +/* i= 103 atan(3FF70000...) 3FEED0D97C9041C9, BC52629E3B5DA490, */ + +/* i= 104 atan(3FF80000...) */ 9.82793723247329054E-01, 1.39033110312309985E-17, +/* i= 104 atan(3FF80000...) 3FEF730BD281F69B, 3C7007887AF0CBBD, */ + +/* i= 105 atan(3FF90000...) */ 1.00148313569423464E+00, 9.43830802354539200E-17, +/* i= 105 atan(3FF90000...) 3FF006132E34D617, 3C9B343DFA868D93, */ + +/* i= 106 atan(3FFA0000...) */ 1.01914134426634972E+00, 1.00040188693667989E-17, +/* i= 106 atan(3FFA0000...) 3FF04E67277A01D7, 3C67115496C13EB6, */ + +/* i= 107 atan(3FFB0000...) */ 1.03584125300880014E+00, 3.19431398178450371E-17, +/* i= 107 atan(3FFB0000...) 3FF092CE471853CC, 3C8269F9B3E200C2, */ + +/* i= 108 atan(3FFC0000...) */ 1.05165021254837376E+00, -9.65056473146751351E-17, +/* i= 108 atan(3FFC0000...) 3FF0D38F2C5BA09F, BC9BD0DC231BFD70, */ + +/* i= 109 atan(3FFD0000...) */ 1.06663036531574362E+00, -5.95658963716037456E-17, +/* i= 109 atan(3FFD0000...) 3FF110EB007F39F7, BC912B2FF85E5500, */ + +/* i= 110 atan(3FFE0000...) */ 1.08083900054116833E+00, -1.56763225113590725E-17, +/* i= 110 atan(3FFE0000...) 3FF14B1DD5F90CE1, BC7212D570A63FA2, */ + +/* i= 111 atan(3FFF0000...) */ 1.09432890732118993E+00, -5.49067615502236423E-18, +/* i= 111 atan(3FFF0000...) 3FF1825F074030D9, BC59523F0AF0D3B5, */ + +/* i= 112 atan(40000000...) */ 1.10714871779409041E+00, 9.40447137356637941E-17, +/* i= 112 atan(40000000...) 3FF1B6E192EBBE44, 3C9B1B466A88828E, */ + +/* i= 113 atan(40010000...) */ 1.13095374397916038E+00, 7.12383380453844630E-17, +/* i= 113 atan(40010000...) 3FF21862F3FADE36, 3C94887628D68748, */ + +/* i= 114 atan(40020000...) */ 1.15257199721566761E+00, -9.15973850890037882E-17, +/* i= 114 atan(40020000...) 3FF270EF55A53A25, BC9A66B1AF5F84FB, */ + +/* i= 115 atan(40030000...) */ 1.17227388112847630E+00, 8.38518861402867437E-17, +/* i= 115 atan(40030000...) 3FF2C1A241D66DC3, 3C982B2D58B6A8E9, */ + +/* i= 116 atan(40040000...) */ 1.19028994968253166E+00, 7.68333362984206881E-17, +/* i= 116 atan(40040000...) 3FF30B6D796A4DA8, 3C96254CB03BB199, */ + +/* i= 117 atan(40050000...) */ 1.20681737028525249E+00, 4.17246763886143912E-17, +/* i= 117 atan(40050000...) 3FF34F1FBB19EB09, 3C880D79B4CF61D5, */ + +/* i= 118 atan(40060000...) */ 1.22202532321098967E+00, -2.97916286489284927E-17, +/* i= 118 atan(40060000...) 3FF38D6A6CE13353, BC812C77E8A80F5C, */ + +/* i= 119 atan(40070000...) */ 1.23605948947808186E+00, 7.87975273945942128E-17, +/* i= 119 atan(40070000...) 3FF3C6E650B38047, 3C96B63B358E746D, */ + +/* i= 120 atan(40080000...) */ 1.24904577239825443E+00, -2.19620379961231129E-18, +/* i= 120 atan(40080000...) 3FF3FC176B7A8560, BC4441A3BD3F1084, */ + +/* i= 121 atan(40090000...) */ 1.26109338225244039E+00, 3.24213962153496050E-17, +/* i= 121 atan(40090000...) 3FF42D70411F9EC1, 3C82B08DB7F10896, */ + +/* i= 122 atan(400A0000...) */ 1.27229739520871732E+00, 2.24587501503450703E-17, +/* i= 122 atan(400A0000...) 3FF45B54837351A0, 3C79E4A72EEDACC4, */ + +/* i= 123 atan(400B0000...) */ 1.28274087974427076E+00, -9.28318875426612948E-18, +/* i= 123 atan(400B0000...) 3FF4861B4CFBE710, BC6567D3D25932D1, */ + +/* i= 124 atan(400C0000...) */ 1.29249666778978534E+00, -6.83080476892666033E-17, +/* i= 124 atan(400C0000...) 3FF4AE10FC6589A5, BC93B03E8A27F555, */ + +/* i= 125 atan(400D0000...) */ 1.30162883400919616E+00, -1.23691849982462667E-17, +/* i= 125 atan(400D0000...) 3FF4D378C1999A0D, BC6C857A639541C8, */ + +/* i= 126 atan(400E0000...) */ 1.31019393504755555E+00, 8.74541373478027883E-17, +/* i= 126 atan(400E0000...) 3FF4F68DEA672617, 3C9934F9F2B0020E, */ + +/* i= 127 atan(400F0000...) */ 1.31824205101683711E+00, -6.31939403114467626E-17, +/* i= 127 atan(400F0000...) 3FF51784FA1544BA, BC9236E3C857C019, */ + +/* i= 128 atan(40100000...) */ 1.32581766366803255E+00, -8.82442937395113632E-17, +/* i= 128 atan(40100000...) 3FF5368C951E9CFD, BC996F47948A99F1, */ + +/* i= 129 atan(40110000...) */ 1.33970565959899957E+00, -2.59901186030413438E-17, +/* i= 129 atan(40110000...) 3FF56F6F33A3E6A7, BC7DF6EDD6F1EC3B, */ + +/* i= 130 atan(40120000...) */ 1.35212738092095464E+00, 2.14767425075115096E-17, +/* i= 130 atan(40120000...) 3FF5A25052114E60, 3C78C2D0C89DE218, */ + +/* i= 131 atan(40130000...) */ 1.36330010035969384E+00, 1.09324617152693622E-16, +/* i= 131 atan(40130000...) 3FF5D013C41ADABD, 3C9F82BBA194DD5D, */ + +/* i= 132 atan(40140000...) */ 1.37340076694501589E+00, -3.30771035576951650E-17, +/* i= 132 atan(40140000...) 3FF5F97315254857, BC831151A43B51CA, */ + +/* i= 133 atan(40150000...) */ 1.38257482149012589E+00, -3.56149043864823010E-17, +/* i= 133 atan(40150000...) 3FF61F06C6A92B89, BC8487D50BCEB1A5, */ + +/* i= 134 atan(40160000...) */ 1.39094282700241845E+00, -9.84371213348884259E-17, +/* i= 134 atan(40160000...) 3FF6414D44094C7C, BC9C5F60A65C7397, */ + +/* i= 135 atan(40170000...) */ 1.39860551227195762E+00, -2.32406118259162798E-17, +/* i= 135 atan(40170000...) 3FF660B02C736A06, BC7ACB6AFB332A0F, */ + +/* i= 136 atan(40180000...) */ 1.40564764938026987E+00, -8.92263013823449239E-17, +/* i= 136 atan(40180000...) 3FF67D8863BC99BD, BC99B7BD2E1E8C9C, */ + +/* i= 137 atan(40190000...) */ 1.41214106460849531E+00, -9.57380711055722328E-17, +/* i= 137 atan(40190000...) 3FF698213A9D5053, BC9B9839085189E3, */ + +/* i= 138 atan(401A0000...) */ 1.41814699839963154E+00, -8.26388378251101363E-17, +/* i= 138 atan(401A0000...) 3FF6B0BAE830C070, BC97D1AB82FFB70B, */ + +/* i= 139 atan(401B0000...) */ 1.42371797140649403E+00, 8.72187092222396751E-17, +/* i= 139 atan(401B0000...) 3FF6C78C7EDEB195, 3C99239AD620FFE2, */ + +/* i= 140 atan(401C0000...) */ 1.42889927219073276E+00, -6.45713474323875439E-17, +/* i= 140 atan(401C0000...) 3FF6DCC57BB565FD, BC929C86447928E7, */ + +/* i= 141 atan(401D0000...) */ 1.43373015248470903E+00, -4.39620446676763619E-17, +/* i= 141 atan(401D0000...) 3FF6F08F07435FEC, BC8957A7170DF016, */ + +/* i= 142 atan(401E0000...) */ 1.43824479449822262E+00, -2.49301991026456555E-17, +/* i= 142 atan(401E0000...) 3FF7030CF9403197, BC7CBE1896221608, */ + +/* i= 143 atan(401F0000...) */ 1.44247309910910193E+00, -1.10511943543031571E-16, +/* i= 143 atan(401F0000...) 3FF7145EAC2088A4, BC9FDA5797B32A0B, */ + +/* i= 144 atan(40200000...) */ 1.44644133224813509E+00, 9.21132397154505156E-17, +/* i= 144 atan(40200000...) 3FF7249FAA996A21, 3C9A8CC1E7480C68, */ + +/* i= 145 atan(40210000...) */ 1.45368758222803240E+00, -6.81876925015134676E-17, +/* i= 145 atan(40210000...) 3FF7424DE90454D4, BC93A75D182E1A5F, */ + +/* i= 146 atan(40220000...) */ 1.46013910562100091E+00, 6.26097470783084416E-17, +/* i= 146 atan(40220000...) 3FF75CBAD2A40BD5, 3C920BC8AF35C4D5, */ + +/* i= 147 atan(40230000...) */ 1.46591938806466282E+00, -9.71125555407483218E-17, +/* i= 147 atan(40230000...) 3FF77467E364F601, BC9BFDA44F3537B8, */ + +/* i= 148 atan(40240000...) */ 1.47112767430373470E+00, -1.08492227620614239E-16, +/* i= 148 atan(40240000...) 3FF789BD2C160054, BC9F45503CCAD255, */ + +/* i= 149 atan(40250000...) */ 1.47584462045214027E+00, 3.38755967276631476E-17, +/* i= 149 atan(40250000...) 3FF79D0F3FAD1C92, 3C838727DC4FB7D1, */ + +/* i= 150 atan(40260000...) */ 1.48013643959415142E+00, 8.50262547607966975E-17, +/* i= 150 atan(40260000...) 3FF7AEA38C1ACBD1, 3C9881D48AE6DE92, */ + +/* i= 151 atan(40270000...) */ 1.48405798811891154E+00, -3.44545106786359401E-17, +/* i= 151 atan(40270000...) 3FF7BEB396C5699A, BC83DC969C7E2365, */ + +/* i= 152 atan(40280000...) */ 1.48765509490645531E+00, 7.84437173946107664E-17, +/* i= 152 atan(40280000...) 3FF7CD6F6DC59DB4, 3C969C1FED612CFC, */ + +/* i= 153 atan(40290000...) */ 1.49096634108265924E+00, 6.22143476002012210E-17, +/* i= 153 atan(40290000...) 3FF7DAFF85A63058, 3C91EE9BCCA84EB2, */ + +/* i= 154 atan(402A0000...) */ 1.49402443552511865E+00, -7.47641750277645943E-17, +/* i= 154 atan(402A0000...) 3FF7E7862AA0157C, BC958C9F564B028C, */ + +/* i= 155 atan(402B0000...) */ 1.49685728913695626E+00, 1.69600762125511713E-17, +/* i= 155 atan(402B0000...) 3FF7F320A0F9F587, 3C738DBB20936502, */ + +/* i= 156 atan(402C0000...) */ 1.49948886200960629E+00, -8.69233960451104982E-19, +/* i= 156 atan(402C0000...) 3FF7FDE80870C2A0, BC3008D760C989AB, */ + +/* i= 157 atan(402D0000...) */ 1.50193983749385196E+00, 6.06189958407581368E-17, +/* i= 157 atan(402D0000...) 3FF807F2112987C7, 3C9178E474EC8C66, */ + +/* i= 158 atan(402E0000...) */ 1.50422816301907281E+00, 9.13778153422684716E-18, +/* i= 158 atan(402E0000...) 3FF811518CDE39A6, 3C6511FE80FBB230, */ + +/* i= 159 atan(402F0000...) */ 1.50636948736934317E+00, -1.05533910133197090E-16, +/* i= 159 atan(402F0000...) 3FF81A16E43F190B, BC9E6B0733383AD4, */ + +/* i= 160 atan(40300000...) */ 1.50837751679893928E+00, -6.60752345087512057E-18, +/* i= 160 atan(40300000...) 3FF82250768AC529, BC5E78C96D05AFCB, */ + +/* i= 161 atan(40310000...) */ 1.51204050407917401E+00, -8.17827248696306499E-17, +/* i= 161 atan(40310000...) 3FF831516233F561, BC97927FFEC5F9DC, */ + +/* i= 162 atan(40320000...) */ 1.51529782154917969E+00, 9.27265838320600392E-17, +/* i= 162 atan(40320000...) 3FF83EA8EDB40F72, 3C9ABA03A56FDC09, */ + +/* i= 163 atan(40330000...) */ 1.51821326518395483E+00, 7.14053211560016173E-17, +/* i= 163 atan(40330000...) 3FF84A99FE25186B, 3C9494C8619D0BBC, */ + +/* i= 164 atan(40340000...) */ 1.52083793107295384E+00, 1.64275464789776791E-17, +/* i= 164 atan(40340000...) 3FF8555A2787981F, 3C72F08E51763131, */ + +/* i= 165 atan(40350000...) */ 1.52321322351791322E+00, 6.06514977555146142E-18, +/* i= 165 atan(40350000...) 3FF85F14D43D81BE, 3C5BF8770A76AFAF, */ + +/* i= 166 atan(40360000...) */ 1.52537304737331958E+00, 2.48298338570039438E-17, +/* i= 166 atan(40360000...) 3FF867ED918AB138, 3C7CA07933F18E43, */ + +/* i= 167 atan(40370000...) */ 1.52734543140336587E+00, -9.47004210780093541E-17, +/* i= 167 atan(40370000...) 3FF87001C35928D4, BC9B4BA860ADA728, */ + +/* i= 168 atan(40380000...) */ 1.52915374769630819E+00, 9.96025861033048094E-18, +/* i= 168 atan(40380000...) 3FF87769EB8E956B, 3C66F77FB9BAEBA6, */ + +/* i= 169 atan(40390000...) */ 1.53081763967160667E+00, -8.91334763349872231E-17, +/* i= 169 atan(40390000...) 3FF87E3AA32878AE, BC99B0E3C3BBC6CF, */ + +/* i= 170 atan(403A0000...) */ 1.53235373677370856E+00, 7.35876234111923764E-17, +/* i= 170 atan(403A0000...) 3FF884855A158B25, 3C9535CEE7C891BB, */ + +/* i= 171 atan(403B0000...) */ 1.53377621092096650E+00, 9.37735480657284383E-17, +/* i= 171 atan(403B0000...) 3FF88A58EC949D14, 3C9B07443DD06AD8, */ + +/* i= 172 atan(403C0000...) */ 1.53509721411557254E+00, 1.10616555458501787E-16, +/* i= 172 atan(403C0000...) 3FF88FC218ACE9DB, 3C9FE20FA7E1E941, */ + +/* i= 173 atan(403D0000...) */ 1.53632722579538861E+00, -1.73373217093894906E-18, +/* i= 173 atan(403D0000...) 3FF894CBDB6BEDFC, BC3FFB5195F35C00, */ + +/* i= 174 atan(403E0000...) */ 1.53747533091664934E+00, 8.11685860076124202E-17, +/* i= 174 atan(403E0000...) 3FF8997FBB8B19C0, 3C97652F3D7700A3, */ + +/* i= 175 atan(403F0000...) */ 1.53854944435964280E+00, -1.04663067143013889E-16, +/* i= 175 atan(403F0000...) 3FF89DE605ACDBB3, BC9E2AC570EAC042, */ + +/* i= 176 atan(40400000...) */ 1.53955649336462841E+00, -6.59487545533283128E-17, +/* i= 176 atan(40400000...) 3FF8A205FD558740, BC930228C09A91B4, */ + +/* i= 177 atan(40410000...) */ 1.54139303859089161E+00, -1.02574621979876286E-16, +/* i= 177 atan(40410000...) 3FF8A98BBF307AA8, BC9D90ABD3CB737A, */ + +/* i= 178 atan(40420000...) */ 1.54302569020147562E+00, -3.65410017872781400E-17, +/* i= 178 atan(40420000...) 3FF8B03BB4C4D9C4, BC851080044823F8, */ + +/* i= 179 atan(40430000...) */ 1.54448660954197448E+00, -4.84886962896552125E-17, +/* i= 179 atan(40430000...) 3FF8B63797517BB5, BC8BF3AB273B6CE0, */ + +/* i= 180 atan(40440000...) */ 1.54580153317597646E+00, -1.28017749694693433E-18, +/* i= 180 atan(40440000...) 3FF8BB9A63718F45, BC379D77A1373742, */ + +/* i= 181 atan(40450000...) */ 1.54699130060982659E+00, 8.40387156476469915E-17, +/* i= 181 atan(40450000...) 3FF8C079F3350D26, 3C9838F674C6574D, */ + +/* i= 182 atan(40460000...) */ 1.54807296595325550E+00, 5.63378094641568198E-17, +/* i= 182 atan(40460000...) 3FF8C4E82889748C, 3C903CFF21ED4F81, */ + +/* i= 183 atan(40470000...) */ 1.54906061995310385E+00, 1.07720671947039880E-16, +/* i= 183 atan(40470000...) 3FF8C8F3C9E38564, 3C9F0C61F67DF753, */ + +/* i= 184 atan(40480000...) */ 1.54996600675867957E+00, -3.65867202631610758E-17, +/* i= 184 atan(40480000...) 3FF8CCA927CF0B3D, BC85173F363FCD3B, */ + +/* i= 185 atan(40490000...) */ 1.55079899282174605E+00, 3.88158322748794045E-17, +/* i= 185 atan(40490000...) 3FF8D0129ACD6D1C, 3C866034AEC68494, */ + +/* i= 186 atan(404A0000...) */ 1.55156792769518947E+00, -6.25939220821526366E-17, +/* i= 186 atan(404A0000...) 3FF8D338E42F92C4, BC920A9DC23967F4, */ + +/* i= 187 atan(404B0000...) */ 1.55227992472688747E+00, 1.03058038268892371E-16, +/* i= 187 atan(404B0000...) 3FF8D623796F0778, 3C9DB4574D874450, */ + +/* i= 188 atan(404C0000...) */ 1.55294108165534417E+00, -6.37987893547135838E-17, +/* i= 188 atan(404C0000...) 3FF8D8D8BF65316F, BC9263850ED82243, */ + +/* i= 189 atan(404D0000...) */ 1.55355665560036682E+00, 1.03636378617620221E-16, +/* i= 189 atan(404D0000...) 3FF8DB5E3944965E, 3C9DDF03D7D94A94, */ + +/* i= 190 atan(404E0000...) */ 1.55413120308095598E+00, -1.10032784474653953E-16, +/* i= 190 atan(404E0000...) 3FF8DDB8AE2ED03E, BC9FB6FC889F3B9F, */ + +/* i= 191 atan(404F0000...) */ 1.55466869295126031E+00, 7.12642375326129392E-17, +/* i= 191 atan(404F0000...) 3FF8DFEC478573A0, 3C948A5F6312C3FA, */ + +/* i= 192 atan(40500000...) */ 1.55517259817441977E+00, 1.48861661196504977E-17, +/* i= 192 atan(40500000...) 3FF8E1FCA98CB633, 3C71299EE93BE016, */ + +}; diff --git a/usr/src/libm/src/mvec/__vTBL_atan2.c b/usr/src/libm/src/mvec/__vTBL_atan2.c new file mode 100644 index 0000000..c8f9066 --- /dev/null +++ b/usr/src/libm/src/mvec/__vTBL_atan2.c @@ -0,0 +1,353 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vTBL_atan2.c 1.3 06/01/31 SMI" + +const double __vlibm_TBL_atan2[] = { + 7.8539816339744827900e-01, 3.0616169978683830179e-17, + 1.0000000000000000000e+00, 0, + 7.7198905126506112140e-01, 2.6989956960083153652e-16, + 9.7353506088256835938e-01, 0, + 7.6068143954461309164e-01, -3.5178810518941914972e-16, + 9.5174932479858398438e-01, 0, + 7.4953661876353638860e-01, -3.2548100004524337476e-16, + 9.3073129653930664062e-01, 0, + 7.3854614984728339522e-01, -2.0775571023910406668e-16, + 9.1042709350585937500e-01, 0, + 7.2770146962041337702e-01, 3.8883249403168348802e-16, + 8.9078664779663085938e-01, 0, + 7.1699492488093774512e-01, -4.0468841511547224071e-16, + 8.7176513671875000000e-01, 0, + 7.0641813488653149022e-01, 5.6902424353981484031e-17, + 8.5331964492797851562e-01, 0, + 6.9596351101035658360e-01, 2.8245513321075021303e-16, + 8.3541154861450195312e-01, 0, + 6.8562363680534943455e-01, -4.2316970721658854064e-16, + 8.1800508499145507812e-01, 0, + 6.7539055666438230219e-01, 4.3535917281300047233e-16, + 8.0106592178344726562e-01, 0, + 6.6525763346931832132e-01, 1.1830431602404727977e-17, + 7.8456401824951171875e-01, 0, + 6.5521767574310185722e-01, -1.7435923100651044208e-16, + 7.6847028732299804688e-01, 0, + 6.4526390999481897381e-01, -1.4741927403093983947e-16, + 7.5275802612304687500e-01, 0, + 6.3538979894204850041e-01, 1.5734535069995660853e-16, + 7.3740243911743164062e-01, 0, + 6.2558914346942717799e-01, -2.8175588856316910960e-16, + 7.2238063812255859375e-01, 0, + 6.1585586476157949676e-01, -4.3056167357725226449e-16, + 7.0767116546630859375e-01, 0, + 6.0618408027576098362e-01, 1.5018013918429320289e-16, + 6.9325399398803710938e-01, 0, + 5.9656817827486730010e-01, 5.5271942033557644157e-17, + 6.7911052703857421875e-01, 0, + 5.8700289083426504533e-01, -8.2411369282676383293e-17, + 6.6522359848022460938e-01, 0, + 5.7748303053627658699e-01, 4.9400383775709159558e-17, + 6.5157699584960937500e-01, 0, + 5.6800353968303252117e-01, 2.9924431103311109543e-16, + 6.3815546035766601562e-01, 0, + 5.5855953863493823519e-01, -2.0306003403868777403e-16, + 6.2494468688964843750e-01, 0, + 5.4914706708329674711e-01, 2.8255378613779667461e-17, + 6.1193227767944335938e-01, 0, + 5.3976176660618069292e-01, 1.6370248781078747995e-16, + 5.9910583496093750000e-01, 0, + 5.3039888601412332747e-01, -7.6196097360093680134e-17, + 5.8645296096801757812e-01, 0, + 5.2105543924318808990e-01, -2.2400815668154739561e-16, + 5.7396411895751953125e-01, 0, + 5.1172778873967050828e-01, -3.6888136019899681185e-16, + 5.6162929534912109375e-01, 0, + 5.0241199666452196482e-01, -2.5412891474397011281e-16, + 5.4943847656250000000e-01, 0, + 4.9310493954293743712e-01, 4.4132186128251152229e-16, + 5.3738307952880859375e-01, 0, + 4.8380436844750995817e-01, -2.7844387907776656488e-16, + 5.2545595169067382812e-01, 0, + 4.7450670361463753721e-01, -2.0494355197368286028e-16, + 5.1364850997924804688e-01, 0, + 4.6367660027976320691e-01, 3.1709878607954760668e-16, + 5.0003623962402343750e-01, 0, + 4.5304753104003925301e-01, 3.3593436122420574865e-16, + 4.8681926727294921875e-01, 0, + 4.4423658037407065535e-01, 2.1987183192008082015e-17, + 4.7596645355224609375e-01, 0, + 4.3567016972500294258e-01, 3.0118422805369552650e-16, + 4.6550178527832031250e-01, 0, + 4.2733152672544871820e-01, -3.2667693224866479909e-16, + 4.5539522171020507812e-01, 0, + 4.1920540176693954493e-01, -2.2454273841113897647e-16, + 4.4561982154846191406e-01, 0, + 4.1127722812701872357e-01, -3.1620568973494653391e-16, + 4.3615055084228515625e-01, 0, + 4.0353384063084263289e-01, -3.5932009901481421723e-16, + 4.2696499824523925781e-01, 0, + 3.9596319345246833166e-01, -4.0281533417458698585e-16, + 4.1804289817810058594e-01, 0, + 3.8855405220339722661e-01, 1.6132231486045176674e-16, + 4.0936565399169921875e-01, 0, + 3.8129566313738116889e-01, 1.7684657060650804570e-16, + 4.0091586112976074219e-01, 0, + 3.7417884791401867517e-01, 2.6897604227426977619e-16, + 3.9267849922180175781e-01, 0, + 3.6719421967585041955e-01, -4.5886151448673745001e-17, + 3.8463878631591796875e-01, 0, + 3.6033388248727771241e-01, 1.5804115573136074946e-16, + 3.7678408622741699219e-01, 0, + 3.5358982224579182940e-01, 1.2624619863035782939e-16, + 3.6910200119018554688e-01, 0, + 3.4695498404186952968e-01, 9.3221684607372865177e-17, + 3.6158156394958496094e-01, 0, + 3.4042268308109679964e-01, 2.7697913559445449137e-16, + 3.5421252250671386719e-01, 0, + 3.3398684598563566084e-01, 3.6085337449716011085e-16, + 3.4698557853698730469e-01, 0, + 3.2764182824591436827e-01, 2.0581506352606456186e-16, + 3.3989214897155761719e-01, 0, + 3.2138200938788497041e-01, -1.9015787485430693661e-16, + 3.3292388916015625000e-01, 0, + 3.1520245348069497737e-01, 2.6961839659264087022e-16, + 3.2607340812683105469e-01, 0, + 3.0909871873117023000e-01, -1.5641891686756272625e-16, + 3.1933403015136718750e-01, 0, + 3.0306644308947827682e-01, 2.8801634211591956223e-16, + 3.1269931793212890625e-01, 0, + 2.9710135482774191473e-01, -4.3148994478973365819e-16, + 3.0616307258605957031e-01, 0, + 2.9120015759141004708e-01, -6.8539854790808585159e-17, + 2.9972028732299804688e-01, 0, + 2.8535879880370362827e-01, -1.2231638445300492682e-16, + 2.9336524009704589844e-01, 0, + 2.7957422506893880865e-01, -4.6707752931043135528e-17, + 2.8709340095520019531e-01, 0, + 2.7384352102802367313e-01, -4.1215636366229625876e-16, + 2.8090047836303710938e-01, 0, + 2.6816369484161040049e-01, -2.3700583122400495333e-16, + 2.7478218078613281250e-01, 0, + 2.6253212627627764419e-01, 2.3123213692190889610e-16, + 2.6873469352722167969e-01, 0, + 2.5694635355759309903e-01, -4.0638513814701264145e-16, + 2.6275444030761718750e-01, 0, + 2.5140385572454615470e-01, -3.4795333793554943723e-16, + 2.5683784484863281250e-01, 0, + 2.4500357070096612233e-01, 6.6542334848010259289e-17, + 2.5002646446228027344e-01, 0, + 2.3877766609573036760e-01, -2.7756633678549343650e-16, + 2.4342155456542968750e-01, 0, + 2.3365669377188336142e-01, 3.2700803838522067998e-16, + 2.3800384998321533203e-01, 0, + 2.2870810463931334766e-01, -4.4279127662219799521e-16, + 2.3278105258941650391e-01, 0, + 2.2391820542294382790e-01, 3.7558889374284208052e-16, + 2.2773718833923339844e-01, 0, + 2.1927501815429550902e-01, -1.4829838176513811186e-16, + 2.2285830974578857422e-01, 0, + 2.1476740847367459253e-01, -2.0535381496063397578e-17, + 2.1813154220581054688e-01, 0, + 2.1038568111737454558e-01, -4.2826767738736168650e-16, + 2.1354568004608154297e-01, 0, + 2.0612057974373865221e-01, 4.2108051749502232359e-16, + 2.0909011363983154297e-01, 0, + 2.0196410359405447821e-01, 3.5157118083511092869e-16, + 2.0475566387176513672e-01, 0, + 1.9790861144712756925e-01, 3.7894950972257700994e-16, + 2.0053362846374511719e-01, 0, + 1.9394752160084305359e-01, 2.8270367403478935534e-16, + 1.9641649723052978516e-01, 0, + 1.9007440763641536563e-01, -2.0842758095683676397e-16, + 1.9239699840545654297e-01, 0, + 1.8628369629742813629e-01, 3.4710917040399448932e-16, + 1.8846881389617919922e-01, 0, + 1.8256998712939509488e-01, 1.1053834120570125251e-16, + 1.8462586402893066406e-01, 0, + 1.7892875067284830237e-01, 3.0486232913366680305e-16, + 1.8086302280426025391e-01, 0, + 1.7535529778449010507e-01, -2.3810135019970148624e-16, + 1.7717504501342773438e-01, 0, + 1.7184559192514736736e-01, 5.1432582846210893916e-17, + 1.7355740070343017578e-01, 0, + 1.6839590847744290159e-01, 3.1605623296041433586e-18, + 1.7000591754913330078e-01, 0, + 1.6500283902547518977e-01, 1.5405422268770998251e-16, + 1.6651678085327148438e-01, 0, + 1.6166306303174859949e-01, 4.0042241517254928672e-16, + 1.6308629512786865234e-01, 0, + 1.5837358268281231943e-01, -2.2786616251622967291e-16, + 1.5971112251281738281e-01, 0, + 1.5513160990288810126e-01, -3.7547723514797166336e-16, + 1.5638816356658935547e-01, 0, + 1.5193468535499299321e-01, 4.3497510505554267446e-16, + 1.5311467647552490234e-01, 0, + 1.4878033155427861089e-01, -2.3102860235324261895e-16, + 1.4988791942596435547e-01, 0, + 1.4566628729590647140e-01, 9.9227592950040279415e-17, + 1.4670538902282714844e-01, 0, + 1.4259050967286590605e-01, -3.3869909683813096906e-18, + 1.4356482028961181641e-01, 0, + 1.3955105903633846509e-01, 1.5500435650773331566e-17, + 1.4046406745910644531e-01, 0, + 1.3654610022831903393e-01, 3.3965918616682805753e-16, + 1.3740110397338867188e-01, 0, + 1.3357402082462854764e-01, 2.7572431581527535421e-16, + 1.3437414169311523438e-01, 0, + 1.3063319828908959153e-01, -3.4667213797076707331e-16, + 1.3138139247894287109e-01, 0, + 1.2772200049776749609e-01, 3.1089261947725651968e-16, + 1.2842106819152832031e-01, 0, + 1.2436931430778752627e-01, -4.0654251891464630059e-16, + 1.2501454353332519531e-01, 0, + 1.2111683701666819957e-01, -3.9381654342464836012e-16, + 1.2171256542205810547e-01, 0, + 1.1844801833536511282e-01, -3.6673155595150283444e-16, + 1.1900508403778076172e-01, 0, + 1.1587365536613614125e-01, -1.5026628801318421951e-16, + 1.1639505624771118164e-01, 0, + 1.1338607085741525538e-01, 1.2886806274050538880e-16, + 1.1387449502944946289e-01, 0, + 1.1097844020819369604e-01, 2.3848343623577768044e-16, + 1.1143630743026733398e-01, 0, + 1.0864456107308662069e-01, 4.2065430313285469408e-16, + 1.0907405614852905273e-01, 0, + 1.0637891628473727934e-01, -4.6883543790348472687e-18, + 1.0678201913833618164e-01, 0, + 1.0417650062205296990e-01, 1.4774925414624453292e-16, + 1.0455501079559326172e-01, 0, + 1.0203276464730581807e-01, -1.5677032794816452332e-16, + 1.0238832235336303711e-01, 0, + 9.9943617083734892503e-02, 3.4511310907979792828e-16, + 1.0027772188186645508e-01, 0, + 9.7905249824711049200e-02, 3.4489485563461708496e-16, + 9.8219275474548339844e-02, 0, + 9.5914316649349906641e-02, -1.3214510886789011569e-17, + 9.6209526062011718750e-02, 0, + 9.3967698614664918466e-02, 1.1048427091217964090e-16, + 9.4245254993438720703e-02, 0, + 9.2062564267554769515e-02, -3.7297463814697759309e-16, + 9.2323541641235351562e-02, 0, + 9.0196252506350660383e-02, -3.5280143043576718079e-16, + 9.0441644191741943359e-02, 0, + 8.8366391663268650802e-02, -6.1140673227541621183e-17, + 8.8597118854522705078e-02, 0, + 8.6570782100201526532e-02, -2.0998844594957629702e-16, + 8.6787700653076171875e-02, 0, + 8.4807337678923566671e-02, 3.9530981588194673068e-16, + 8.5011243820190429688e-02, 0, + 8.3074323040850828193e-02, -4.3022503210464894539e-17, + 8.3265960216522216797e-02, 0, + 8.1369880712663267275e-02, -6.3063867569127169744e-18, + 8.1549942493438720703e-02, 0, + 7.9692445771216036121e-02, -5.0787623072962671502e-17, + 7.9861581325531005859e-02, 0, + 7.8040568735575632786e-02, -3.8810063021216721741e-16, + 7.8199386596679687500e-02, 0, + 7.6412797391314235540e-02, 4.1246529500495762995e-16, + 7.6561868190765380859e-02, 0, + 7.4807854772808823896e-02, -3.7025599052186724156e-16, + 7.4947714805603027344e-02, 0, + 7.3224639528778112663e-02, 4.2209138483206712401e-17, + 7.3355793952941894531e-02, 0, + 7.1661929761571485642e-02, -3.2074473649855177622e-16, + 7.1784853935241699219e-02, 0, + 7.0118738881148168218e-02, -2.5371257235753296804e-16, + 7.0233881473541259766e-02, 0, + 6.8594137996416115755e-02, 3.3796987842548399135e-16, + 6.8701922893524169922e-02, 0, + 6.7087137393172291411e-02, 5.5061492696328852397e-17, + 6.7187964916229248047e-02, 0, + 6.5596983299946565182e-02, -2.1580863111502565280e-16, + 6.5691232681274414062e-02, 0, + 6.4122802037412718335e-02, -3.1315661827469233434e-16, + 6.4210832118988037109e-02, 0, + 6.2426231582525915087e-02, -2.5758980071296622188e-16, + 6.2507450580596923828e-02, 0, + 6.0781559928021700046e-02, 1.3736899336217710591e-16, + 6.0856521129608154297e-02, 0, + 5.9432882624005145544e-02, 2.2246097394328856474e-16, + 5.9502959251403808594e-02, 0, + 5.8132551274581167888e-02, -6.2525053236379489390e-18, + 5.8198124170303344727e-02, 0, + 5.6876611930681164608e-02, -2.6589930995607417149e-16, + 5.6938022375106811523e-02, 0, + 5.5661522654748551986e-02, -4.2736362859832186197e-16, + 5.5719077587127685547e-02, 0, + 5.4484124463757943602e-02, -1.6708067365310384253e-16, + 5.4538100957870483398e-02, 0, + 5.3341582449436764080e-02, 3.3271673004611311850e-17, + 5.3392231464385986328e-02, 0, + 5.2231267345892007370e-02, -3.5593396674200571616e-16, + 5.2278816699981689453e-02, 0, + 5.1150874758829623090e-02, 1.4432815841187114832e-16, + 5.1195532083511352539e-02, 0, + 5.0098306612679444072e-02, 9.4680943793589404083e-17, + 5.0140261650085449219e-02, 0, + 4.9071641675614507960e-02, 2.1131168520301896817e-16, + 4.9111068248748779297e-02, 0, + 4.8069135772851545596e-02, 1.6035336741307516296e-16, + 4.8106193542480468750e-02, 0, + 4.7089192241088539959e-02, -2.2491738698796901479e-16, + 4.7124028205871582031e-02, 0, + 4.6130362086062248750e-02, -1.5111423469578965206e-16, + 4.6163111925125122070e-02, 0, + 4.5191314382707403752e-02, 4.1989325207399786612e-16, + 4.5222103595733642578e-02, 0, + 4.4270836390474244126e-02, -4.1432635292331004454e-16, + 4.4299781322479248047e-02, 0, + 4.3367774164955186222e-02, -3.0615383054587355892e-16, + 4.3394982814788818359e-02, 0, + 4.2481121875321825598e-02, -3.6730166956273555173e-16, + 4.2506694793701171875e-02, 0, + 4.1609902899457651415e-02, -4.4226425958068821782e-16, + 4.1633933782577514648e-02, 0, + 4.0753259129372665370e-02, 1.9801161516527046872e-16, + 4.0775835514068603516e-02, 0, + 3.9910361780060910064e-02, 8.2560620036613164573e-18, + 3.9931565523147583008e-02, 0, + 3.9080441183869218946e-02, 3.9908991939242971628e-17, + 3.9100348949432373047e-02, 0, + 3.8262816593271686827e-02, 9.5182237812195590276e-17, + 3.8281500339508056641e-02, 0, + 3.7456806948784837630e-02, 1.5213508760679563439e-16, + 3.7474334239959716797e-02, 0, + 3.6661849947035918262e-02, 7.3335516005184616486e-17, + 3.6678284406661987305e-02, 0, + 3.5877353272533163420e-02, -1.3007348019891714540e-16, + 3.5892754793167114258e-02, 0, + 3.5102754135096780885e-02, -2.9903662298950558656e-16, + 3.5117179155349731445e-02, 0, + 3.4337638360670830195e-02, 2.9656295131966114331e-16, + 3.4351140260696411133e-02, 0, + 3.3581472523789734907e-02, 3.4810947205572817820e-16, + 3.3594101667404174805e-02, 0, + 3.2833871859357266487e-02, -3.8885440174405159838e-16, + 3.2845675945281982422e-02, 0, + 3.2094421679560447558e-02, 5.8805134853032009978e-17, + 3.2105445861816406250e-02, 0, + 3.1243584858944295490e-02, 2.8737383773884313066e-17, + 3.1253755092620849609e-02, 0, + 0, 0, 0, 0 +}; diff --git a/usr/src/libm/src/mvec/__vTBL_rsqrt.c b/usr/src/libm/src/mvec/__vTBL_rsqrt.c new file mode 100644 index 0000000..979260f --- /dev/null +++ b/usr/src/libm/src/mvec/__vTBL_rsqrt.c @@ -0,0 +1,168 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vTBL_rsqrt.c 1.3 06/01/31 SMI" + +#pragma align 32 (__vlibm_TBL_rsqrt) + +/* + i = [0,128] + TBL[2*i ] = (double)(1.0 / sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 46)))); + TBL[2*i+1] = (double)(1.0 / sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 46))) - TBL[2*i]); +*/ + +const double __vlibm_TBL_rsqrt[] = { + 1.4142135623730951455e+00, -9.6672933134529134511e-17, + 1.4032928308912466786e+00, 6.4948026279769118919e-17, + 1.3926212476455828160e+00, -1.1055881989569260189e-16, + 1.3821894809301762397e+00, -6.3734410461405640301e-17, + 1.3719886811400707760e+00, -7.6980807939588139983e-17, + 1.3620104492139977204e+00, 2.8850217265224690802e-17, + 1.3522468075656264297e+00, 9.4322960168092127774e-17, + 1.3426901732747025253e+00, 4.7150841580269266495e-18, + 1.3333333333333332593e+00, 7.4014868308343765253e-17, + 1.3241694217637887121e+00, 7.7131873618846925903e-18, + 1.3151918984428583315e+00, -2.0328800352543524759e-17, + 1.3063945294843617440e+00, -9.1582083631189420602e-17, + 1.2977713690461003537e+00, -4.8412149406758561904e-17, + 1.2893167424406084542e+00, 2.3274915882478143921e-17, + 1.2810252304406970492e+00, 1.8704771066280918649e-17, + 1.2728916546811681609e+00, -8.8457926431820830415e-17, + 1.2649110640673517647e+00, -3.1906346897860143141e-17, + 1.2570787221094177344e+00, 8.6769863266554017163e-17, + 1.2493900951088485751e+00, -5.0929983362732175622e-17, + 1.2418408411301324890e+00, 8.8840637867087758165e-17, + 1.2344267996967352996e+00, -1.7516410189877601154e-17, + 1.2271439821557927896e+00, -9.0396673750943792696e-17, + 1.2199885626608373279e+00, 2.7575041782657058896e-18, + 1.2129568697262453902e+00, 5.0766000649864922701e-17, + 1.2060453783110545167e+00, -2.6141724617295359467e-17, + 1.1992507023933782762e+00, 3.5079005878814235254e-17, + 1.1925695879998878812e+00, -4.3139588510944642176e-17, + 1.1859989066577618644e+00, 2.2700827457352136295e-17, + 1.1795356492391770864e+00, -1.8736930872699025425e-17, + 1.1731769201708264205e+00, -1.0717525135280878089e-16, + 1.1669199319831564665e+00, -1.9717488453279445066e-17, + 1.1607620001760186046e+00, 7.0604910402531185787e-17, + 1.1547005383792514621e+00, 6.6900561478712689458e-17, + 1.1487330537883810866e+00, -1.1022220198146414245e-16, + 1.1428571428571427937e+00, 6.3441315692866084503e-17, + 1.1370704872299222110e+00, 1.0524397995692614457e-16, + 1.1313708498984760276e+00, 1.1479495462389219323e-17, + 1.1257560715684669095e+00, 6.0574394710210801304e-17, + 1.1202240672224077489e+00, 9.3922898547554319150e-17, + 1.1147728228665882977e+00, -4.5491044078590048284e-17, + 1.1094003924504582947e+00, -5.0709657003823779908e-17, + 1.1041048949477667573e+00, -8.8666430365492392908e-18, + 1.0988845115895122806e+00, -8.8730050685366661178e-17, + 1.0937374832394612945e+00, -1.0139924803906119049e-16, + 1.0886621079036347126e+00, -2.3035347176474180687e-18, + 1.0836567383657542685e+00, -9.7789672372212451307e-17, + 1.0787197799411873955e+00, -5.7527821233647078927e-17, + 1.0738496883424388795e+00, 1.9216919863927710029e-17, + 1.0690449676496975862e+00, -4.7415720102268737205e-17, + 1.0643041683803828867e+00, -3.0438242811018816132e-19, + 1.0596258856520350822e+00, -3.6947737086388254690e-17, + 1.0550087574332591700e+00, 3.7548847295491266968e-17, + 1.0504514628777803509e+00, 1.0231500228552561044e-16, + 1.0459527207369814228e+00, 8.0806674896943551777e-17, + 1.0415112878465908608e+00, 7.8292411070687721348e-17, + 1.0371259576834630511e+00, -2.6664053809928624719e-17, + 1.0327955589886446131e+00, -1.1033761728824692438e-16, + 1.0285189544531601058e+00, -7.0307587734203009158e-17, + 1.0242950394631678002e+00, -1.0770393913594349379e-17, + 1.0201227409013413627e+00, -9.8717216425570547616e-17, + 1.0160010160015240377e+00, -3.5150724174046424206e-17, + 1.0119288512538813229e+00, 6.3292764451724411186e-17, + 1.0079052613579393416e+00, -6.9021193162451496902e-17, + 1.0039292882210537616e+00, -6.9245436618476016139e-17, + 1.0000000000000000000e+00, 0.0000000000000000000e+00, + 9.9227787671366762812e-01, 2.1405178579048182592e-17, + 9.8473192783466190203e-01, -4.0158639458782051420e-17, + 9.7735555485044178781e-01, -3.4924457286878990179e-19, + 9.7014250014533187638e-01, 1.7693410507027811240e-17, + 9.6308682468615358641e-01, 1.9691102487554127121e-17, + 9.5618288746751489704e-01, 1.4935376108861049295e-17, + 9.4942532655508271588e-01, -5.3278073247766967031e-17, + 9.4280904158206335630e-01, 9.5662462186576827694e-18, + 9.3632917756904454620e-01, -3.4655680606790736102e-17, + 9.2998110995055427441e-01, -2.8820206372616569176e-17, + 9.2376043070340119190e-01, 3.1315988690467019525e-17, + 9.1766293548224708854e-01, -2.4907828666661326139e-17, + 9.1168461167710357351e-01, 1.7178891233165183242e-17, + 9.0582162731567661407e-01, -1.3578665987704751967e-17, + 9.0007032074081916306e-01, -3.9003513621620290514e-17, + 8.9442719099991585541e-01, 2.3156459848049343849e-17, + 8.8888888888888883955e-01, 4.9343245538895843502e-17, + 8.8345220859877238162e-01, -2.7808199947420238654e-17, + 8.7811407991752277180e-01, 1.2001012979479060187e-17, + 8.7287156094396955996e-01, -3.4900338036123033814e-17, + 8.6772183127462465535e-01, 3.2650033503527982608e-17, + 8.6266218562750729415e-01, 3.1665473509444755614e-17, + 8.5769002787023584933e-01, 1.6930198090043138729e-17, + 8.5280286542244176928e-01, -3.2089317494821048697e-17, + 8.4799830400508802164e-01, -3.8599776100732649845e-17, + 8.4327404271156780613e-01, 1.5736536222265119505e-17, + 8.3862786937753464045e-01, -3.8316227580533944669e-18, + 8.3405765622829908246e-01, -3.1744458177500410304e-17, + 8.2956135578434020417e-01, 1.0522097091084975821e-17, + 8.2513699700703468931e-01, 3.6488948923760358306e-17, + 8.2078268166812329287e-01, -1.6507622733959848503e-17, + 8.1649658092772603446e-01, -1.7276510382355637441e-18, + 8.1227693210689522196e-01, 1.2819865235943699943e-17, + 8.0812203564176865456e-01, -5.5241676076873786747e-17, + 8.0403025220736967782e-01, -1.7427816411530239645e-17, + 8.0000000000000004441e-01, -4.4408920985006264082e-17, + 7.9602975216799132241e-01, -1.3876860654527447191e-17, + 7.9211803438133943089e-01, 1.6428787126265500350e-17, + 7.8826342253143455441e-01, -3.2571002717425679181e-17, + 7.8446454055273617811e-01, -5.0417296289807987128e-17, + 7.8072005835882651859e-01, 2.4898247108034524775e-17, + 7.7702868988581130782e-01, 3.6763699589769887870e-17, + 7.7338919123653082632e-01, 4.9918835031221789176e-17, + 7.6980035891950104876e-01, -2.9414493989201982553e-17, + 7.6626102817692109959e-01, 1.4524522292996552738e-17, + 7.6277007139647390321e-01, -5.0856154603265522966e-17, + 7.5932639660199918730e-01, 8.9842992531287086391e-18, + 7.5592894601845450619e-01, -5.1765894871838619595e-17, + 7.5257669470687782454e-01, 9.6579665081799721467e-18, + 7.4926864926535519107e-01, -1.8380676468162380710e-17, + 7.4600384659225105199e-01, -3.9485726539632463848e-17, + 7.4278135270820744296e-01, 9.6276948503597478238e-18, + 7.3960026163363878915e-01, 4.0208430305794580702e-17, + 7.3645969431865865307e-01, 4.0077997112003520937e-17, + 7.3335879762256905856e-01, -2.2493399096927370000e-17, + 7.3029674334022143256e-01, 5.2048227304015206987e-17, + 7.2727272727272729291e-01, -2.0185873175002846750e-17, + 7.2428596834014824513e-01, 2.3633090263928220565e-18, + 7.2133570773394584119e-01, -9.5131613777431479940e-18, + 7.1842120810709964029e-01, -3.7440154323260191964e-17, + 7.1554175279993270653e-01, -3.6792926140636546510e-18, + 7.1269664509979835376e-01, 5.3969540859927280847e-18, + 7.0988520753289097165e-01, 4.4593566535489654887e-17, + 7.0710678118654757274e-01, -4.8336466567264567255e-17, +}; + diff --git a/usr/src/libm/src/mvec/__vTBL_sincos.c b/usr/src/libm/src/mvec/__vTBL_sincos.c new file mode 100644 index 0000000..85d0030 --- /dev/null +++ b/usr/src/libm/src/mvec/__vTBL_sincos.c @@ -0,0 +1,333 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vTBL_sincos.c 1.3 06/01/31 SMI" + +const double __vlibm_TBL_sincos_hi[] = { + 1.55614992773556032e-01, 9.87817783816471895e-01, +-1.55614992773556032e-01,-9.87817783816471895e-01, + 1.59472458931843419e-01, 9.87202377854830448e-01, +-1.59472458931843419e-01,-9.87202377854830448e-01, + 1.63327491736612845e-01, 9.86571908399497599e-01, +-1.63327491736612845e-01,-9.86571908399497599e-01, + 1.67180032364806747e-01, 9.85926385070661437e-01, +-1.67180032364806747e-01,-9.85926385070661437e-01, + 1.71030022031395029e-01, 9.85265817718213865e-01, +-1.71030022031395029e-01,-9.85265817718213865e-01, + 1.74877401990272185e-01, 9.84590216421599829e-01, +-1.74877401990272185e-01,-9.84590216421599829e-01, + 1.78722113535153659e-01, 9.83899591489663994e-01, +-1.78722113535153659e-01,-9.83899591489663994e-01, + 1.82564098000471547e-01, 9.83193953460493097e-01, +-1.82564098000471547e-01,-9.83193953460493097e-01, + 1.86403296762269882e-01, 9.82473313101255297e-01, +-1.86403296762269882e-01,-9.82473313101255297e-01, + 1.90239651239099056e-01, 9.81737681408035745e-01, +-1.90239651239099056e-01,-9.81737681408035745e-01, + 1.94073102892909799e-01, 9.80987069605669171e-01, +-1.94073102892909799e-01,-9.80987069605669171e-01, + 1.97903593229946273e-01, 9.80221489147568126e-01, +-1.97903593229946273e-01,-9.80221489147568126e-01, + 2.01731063801638799e-01, 9.79440951715548347e-01, +-2.01731063801638799e-01,-9.79440951715548347e-01, + 2.05555456205495507e-01, 9.78645469219650899e-01, +-2.05555456205495507e-01,-9.78645469219650899e-01, + 2.09376712085993649e-01, 9.77835053797959763e-01, +-2.09376712085993649e-01,-9.77835053797959763e-01, + 2.13194773135469889e-01, 9.77009717816417433e-01, +-2.13194773135469889e-01,-9.77009717816417433e-01, + 2.17009581095010146e-01, 9.76169473868635285e-01, +-2.17009581095010146e-01,-9.76169473868635285e-01, + 2.20821077755338491e-01, 9.75314334775702285e-01, +-2.20821077755338491e-01,-9.75314334775702285e-01, + 2.24629204957705303e-01, 9.74444313585988930e-01, +-2.24629204957705303e-01,-9.74444313585988930e-01, + 2.28433904594774750e-01, 9.73559423574948180e-01, +-2.28433904594774750e-01,-9.73559423574948180e-01, + 2.32235118611511471e-01, 9.72659678244912729e-01, +-2.32235118611511471e-01,-9.72659678244912729e-01, + 2.36032789006066335e-01, 9.71745091324889509e-01, +-2.36032789006066335e-01,-9.71745091324889509e-01, + 2.39826857830661572e-01, 9.70815676770349412e-01, +-2.39826857830661572e-01,-9.70815676770349412e-01, + 2.43617267192474896e-01, 9.69871448763015342e-01, +-2.43617267192474896e-01,-9.69871448763015342e-01, + 2.47403959254522937e-01, 9.68912421710644733e-01, +-2.47403959254522937e-01,-9.68912421710644733e-01, + 2.54965960415878490e-01, 9.66950029230677854e-01, +-2.54965960415878490e-01,-9.66950029230677854e-01, + 2.62512399769153304e-01, 9.64928619104771013e-01, +-2.62512399769153304e-01,-9.64928619104771013e-01, + 2.70042816718585044e-01, 9.62848314709379705e-01, +-2.70042816718585044e-01,-9.62848314709379705e-01, + 2.77556751646336308e-01, 9.60709243015561931e-01, +-2.77556751646336308e-01,-9.60709243015561931e-01, + 2.85053745940547443e-01, 9.58511534581228619e-01, +-2.85053745940547443e-01,-9.58511534581228619e-01, + 2.92533342023327536e-01, 9.56255323543175328e-01, +-2.92533342023327536e-01,-9.56255323543175328e-01, + 2.99995083378683025e-01, 9.53940747608894690e-01, +-2.99995083378683025e-01,-9.53940747608894690e-01, + 3.07438514580380851e-01, 9.51567948048172241e-01, +-3.07438514580380851e-01,-9.51567948048172241e-01, + 3.14863181319745222e-01, 9.49137069684462986e-01, +-3.14863181319745222e-01,-9.49137069684462986e-01, + 3.22268630433386605e-01, 9.46648260886053361e-01, +-3.22268630433386605e-01,-9.46648260886053361e-01, + 3.29654409930860148e-01, 9.44101673557004362e-01, +-3.29654409930860148e-01,-9.44101673557004362e-01, + 3.37020069022253066e-01, 9.41497463127881073e-01, +-3.37020069022253066e-01,-9.41497463127881073e-01, + 3.44365158145698402e-01, 9.38835788546265482e-01, +-3.44365158145698402e-01,-9.38835788546265482e-01, + 3.51689228994814085e-01, 9.36116812267055343e-01, +-3.51689228994814085e-01,-9.36116812267055343e-01, + 3.58991834546065036e-01, 9.33340700242548449e-01, +-3.58991834546065036e-01,-9.33340700242548449e-01, + 3.66272529086047571e-01, 9.30507621912314287e-01, +-3.66272529086047571e-01,-9.30507621912314287e-01, + 3.73530868238692970e-01, 9.27617750192851864e-01, +-3.73530868238692970e-01,-9.27617750192851864e-01, + 3.80766408992390171e-01, 9.24671261467036043e-01, +-3.80766408992390171e-01,-9.24671261467036043e-01, + 3.87978709727025028e-01, 9.21668335573351927e-01, +-3.87978709727025028e-01,-9.21668335573351927e-01, + 3.95167330240934256e-01, 9.18609155794918308e-01, +-3.95167330240934256e-01,-9.18609155794918308e-01, + 4.02331831777773097e-01, 9.15493908848301174e-01, +-4.02331831777773097e-01,-9.15493908848301174e-01, + 4.09471777053295072e-01, 9.12322784872117820e-01, +-4.09471777053295072e-01,-9.12322784872117820e-01, + 4.16586730282041129e-01, 9.09095977415431022e-01, +-4.16586730282041129e-01,-9.09095977415431022e-01, + 4.23676257203938034e-01, 9.05813683425936378e-01, +-4.23676257203938034e-01,-9.05813683425936378e-01, + 4.30739925110803223e-01, 9.02476103237941474e-01, +-4.30739925110803223e-01,-9.02476103237941474e-01, + 4.37777302872755125e-01, 8.99083440560138447e-01, +-4.37777302872755125e-01,-8.99083440560138447e-01, + 4.44787960964527218e-01, 8.95635902463170708e-01, +-4.44787960964527218e-01,-8.95635902463170708e-01, + 4.51771471491683785e-01, 8.92133699366994382e-01, +-4.51771471491683785e-01,-8.92133699366994382e-01, + 4.58727408216736576e-01, 8.88577045028035584e-01, +-4.58727408216736576e-01,-8.88577045028035584e-01, + 4.65655346585160168e-01, 8.84966156526143299e-01, +-4.65655346585160168e-01,-8.84966156526143299e-01, + 4.72554863751304455e-01, 8.81301254251340649e-01, +-4.72554863751304455e-01,-8.81301254251340649e-01, + 4.79425538604203005e-01, 8.77582561890372759e-01, +-4.79425538604203005e-01,-8.77582561890372759e-01, + 4.93078685753923052e-01, 8.69984718058417372e-01, +-4.93078685753923052e-01,-8.69984718058417372e-01, + 5.06611454814257400e-01, 8.62174479934880500e-01, +-5.06611454814257400e-01,-8.62174479934880500e-01, + 5.20020541953727045e-01, 8.54153754277385380e-01, +-5.20020541953727045e-01,-8.54153754277385380e-01, + 5.33302673536020122e-01, 8.45924499231067939e-01, +-5.33302673536020122e-01,-8.45924499231067939e-01, + 5.46454606919203556e-01, 8.37488723850523642e-01, +-5.46454606919203556e-01,-8.37488723850523642e-01, + 5.59473131247366862e-01, 8.28848487609325724e-01, +-5.59473131247366862e-01,-8.28848487609325724e-01, + 5.72355068234507214e-01, 8.20005899897234047e-01, +-5.72355068234507214e-01,-8.20005899897234047e-01, + 5.85097272940462210e-01, 8.10963119505217933e-01, +-5.85097272940462210e-01,-8.10963119505217933e-01, + 5.97696634538701477e-01, 8.01722354098418410e-01, +-5.97696634538701477e-01,-8.01722354098418410e-01, + 6.10150077075791386e-01, 7.92285859677178572e-01, +-6.10150077075791386e-01,-7.92285859677178572e-01, + 6.22454560222343689e-01, 7.82655940026272812e-01, +-6.22454560222343689e-01,-7.82655940026272812e-01, + 6.34607080015269331e-01, 7.72834946152471503e-01, +-6.34607080015269331e-01,-7.72834946152471503e-01, + 6.46604669591152370e-01, 7.62825275710576234e-01, +-6.46604669591152370e-01,-7.62825275710576234e-01, + 6.58444399910567579e-01, 7.52629372418066489e-01, +-6.58444399910567579e-01,-7.52629372418066489e-01, + 6.70123380473162888e-01, 7.42249725458501319e-01, +-6.70123380473162888e-01,-7.42249725458501319e-01, + 6.81638760023334123e-01, 7.31688868873820897e-01, +-6.81638760023334123e-01,-7.31688868873820897e-01, + 6.92987727246317964e-01, 7.20949380945696383e-01, +-6.92987727246317964e-01,-7.20949380945696383e-01, + 7.04167511454533712e-01, 7.10033883566079660e-01, +-7.04167511454533712e-01,-7.10033883566079660e-01 +}; + +const double __vlibm_TBL_sincos_lo[] = { + 8.88605337234228782e-18, 4.91917302237681002e-17, +-8.88605337234228782e-18,-4.91917302237681002e-17, + 5.81822082653163949e-19, 4.19401745952789211e-17, +-5.81822082653163949e-19,-4.19401745952789211e-17, + 5.48356943034715901e-18,-1.03274445882754459e-17, +-5.48356943034715901e-18, 1.03274445882754459e-17, +-1.21877614400540502e-17,-1.63494100549760754e-18, + 1.21877614400540502e-17, 1.63494100549760754e-18, +-9.95477472645292259e-18,-4.92572126294455489e-17, + 9.95477472645292259e-18, 4.92572126294455489e-17, + 4.43433505081671336e-18,-2.26634179854541132e-17, +-4.43433505081671336e-18, 2.26634179854541132e-17, +-1.62404059010738783e-20,-2.16479885316442748e-17, + 1.62404059010738783e-20, 2.16479885316442748e-17, + 7.94348727702255030e-18,-2.49458400454010874e-17, +-7.94348727702255030e-18, 2.49458400454010874e-17, + 2.34937969012815731e-18,-3.91992037542008779e-17, +-2.34937969012815731e-18, 3.91992037542008779e-17, + 6.04001694249999295e-18, 3.13336233097345808e-17, +-6.04001694249999295e-18,-3.13336233097345808e-17, +-7.83274121019861488e-18, 1.96784118087030288e-17, + 7.83274121019861488e-18,-1.96784118087030288e-17, + 1.16502095128541978e-17,-2.95181339018270543e-17, +-1.16502095128541978e-17, 2.95181339018270543e-17, + 5.58723281546011280e-18, 1.31087695215267578e-17, +-5.58723281546011280e-18,-1.31087695215267578e-17, + 1.06518785731668444e-17,-3.07669849664887505e-17, +-1.06518785731668444e-17, 3.07669849664887505e-17, +-5.53640369317216307e-18, 2.99100284927694838e-17, + 5.53640369317216307e-18,-2.99100284927694838e-17, + 1.22477058822641605e-18,-4.86093565810892311e-17, +-1.22477058822641605e-18, 4.86093565810892311e-17, + 1.11700710733643761e-17,-7.85069060928502747e-18, +-1.11700710733643761e-17, 7.85069060928502747e-18, +-1.47298004525206156e-19, 4.12921182559656912e-17, + 1.47298004525206156e-19,-4.12921182559656912e-17, +-1.05859041643290307e-17, 4.99012883492139510e-17, + 1.05859041643290307e-17,-4.99012883492139510e-17, +-4.98254439531455880e-18,-8.05559790337166344e-18, + 4.98254439531455880e-18, 8.05559790337166344e-18, +-8.31808085268720599e-18, 2.39202645464901648e-17, + 8.31808085268720599e-18,-2.39202645464901648e-17, +-9.89486060733470012e-19,-4.18461124842153636e-17, + 9.89486060733470012e-19, 4.18461124842153636e-17, +-7.26081066097971201e-18, 5.12857925321536470e-17, + 7.26081066097971201e-18,-5.12857925321536470e-17, +-9.57516421953495973e-18, 2.52768896842457810e-18, + 9.57516421953495973e-18,-2.52768896842457810e-18, +-7.53102495590705992e-18, 5.07143666240393522e-17, + 7.53102495590705992e-18,-5.07143666240393522e-17, +-2.23100354354259536e-17,-3.23777029770769223e-17, + 2.23100354354259536e-17, 3.23777029770769223e-17, +-2.25345975279021249e-17,-3.03455426810186255e-18, + 2.25345975279021249e-17, 3.03455426810186255e-18, +-1.21032650978877771e-17,-4.64600977172424097e-18, + 1.21032650978877771e-17, 4.64600977172424097e-18, + 1.76740702627918219e-17,-2.80782706351672909e-17, +-1.76740702627918219e-17, 2.80782706351672909e-17, +-1.81620831076181184e-17, 8.13462149294625475e-18, + 1.81620831076181184e-17,-8.13462149294625475e-18, + 7.51694493032735190e-18,-3.14845086884162891e-17, +-7.51694493032735190e-18, 3.14845086884162891e-17, + 2.60639277793073401e-17, 4.37575894717349784e-17, +-2.60639277793073401e-17,-4.37575894717349784e-17, + 1.10043664427652965e-19,-3.86148346756741172e-17, +-1.10043664427652965e-19, 3.86148346756741172e-17, + 2.85898059254855721e-17, 4.14914804609944515e-17, +-2.85898059254855721e-17,-4.14914804609944515e-17, + 2.09377335812660597e-17,-3.91168333493415196e-17, +-2.09377335812660597e-17, 3.91168333493415196e-17, + 2.35998378957031002e-17,-1.60176532845458484e-17, +-2.35998378957031002e-17, 1.60176532845458484e-17, + 1.03122798607872161e-17,-4.85238302367970955e-18, +-1.03122798607872161e-17, 4.85238302367970955e-18, + 5.88166458751798880e-18, 6.91932945992178774e-18, +-5.88166458751798880e-18,-6.91932945992178774e-18, +-2.56162087360699421e-17,-5.23503020396832165e-17, + 2.56162087360699421e-17, 5.23503020396832165e-17, + 1.74954828401588476e-17,-1.32285954777808795e-17, +-1.74954828401588476e-17, 1.32285954777808795e-17, +-9.93881456210652418e-18, 4.48876000332807380e-18, + 9.93881456210652418e-18,-4.48876000332807380e-18, +-2.37566914410618903e-17, 4.53509425735919737e-17, + 2.37566914410618903e-17,-4.53509425735919737e-17, + 2.13725286462113737e-17, 5.54441253880345633e-17, +-2.13725286462113737e-17,-5.54441253880345633e-17, + 1.75979951033595287e-17,-8.55069309786724315e-18, +-1.75979951033595287e-17, 8.55069309786724315e-18, +-1.96134878714142281e-17,-4.05641501045149965e-17, + 1.96134878714142281e-17, 4.05641501045149965e-17, + 1.44138754527020067e-17, 5.41337556683804221e-17, +-1.44138754527020067e-17,-5.41337556683804221e-17, +-5.67940300009126604e-18, 2.63490402114133324e-17, + 5.67940300009126604e-18,-2.63490402114133324e-17, +-9.61085068253371493e-18, 2.92000611384121121e-17, + 9.61085068253371493e-18,-2.92000611384121121e-17, +-2.33180070006887094e-17, 4.28646664908052081e-17, + 2.33180070006887094e-17,-4.28646664908052081e-17, +-2.62128796074765330e-17, 3.11249067465132618e-17, + 2.62128796074765330e-17,-3.11249067465132618e-17, + 7.64345629962023030e-18, 9.07695177507561595e-18, +-7.64345629962023030e-18,-9.07695177507561595e-18, +-6.65539297734492513e-18,-8.85404388576271590e-18, + 6.65539297734492513e-18, 8.85404388576271590e-18, +-8.23407394209890257e-18, 2.31606552113801660e-17, + 8.23407394209890257e-18,-2.31606552113801660e-17, + 1.60809820962183558e-17,-4.03449199835716708e-17, +-1.60809820962183558e-17, 4.03449199835716708e-17, + 1.45987039105142601e-17,-7.69055777598735693e-18, +-1.45987039105142601e-17, 7.69055777598735693e-18, +-3.60879070379054568e-18,-4.97307318930606626e-17, + 3.60879070379054568e-18, 4.97307318930606626e-17, +-5.10396986055601290e-18,-4.26231498642799968e-17, + 5.10396986055601290e-18, 4.26231498642799968e-17, + 5.60508397387175474e-18, 1.65738511074092287e-17, +-5.60508397387175474e-18,-1.65738511074092287e-17, +-3.26941342361816774e-17, 4.41324275781058045e-18, + 3.26941342361816774e-17,-4.41324275781058045e-18, +-3.98326674569845477e-17, 5.42056510267528622e-18, + 3.98326674569845477e-17,-5.42056510267528622e-18, + 5.12931811503204399e-17, 1.54950664735032887e-17, +-5.12931811503204399e-17,-1.54950664735032887e-17, + 8.39975484092950739e-18, 4.33370260439483957e-17, +-8.39975484092950739e-18,-4.33370260439483957e-17, + 1.57556551448872803e-17, 1.11639354066174440e-17, +-1.57556551448872803e-17,-1.11639354066174440e-17, + 2.65758723572153157e-17,-3.91243174820912803e-17, +-2.65758723572153157e-17, 3.91243174820912803e-17, +-5.48839724611618050e-17,-3.09133348612217870e-17, + 5.48839724611618050e-17, 3.09133348612217870e-17, + 5.45032359305438502e-17, 4.01345333110870077e-17, +-5.45032359305438502e-17,-4.01345333110870077e-17, +-1.47982699075898800e-17,-2.90497793128345697e-17, + 1.47982699075898800e-17, 2.90497793128345697e-17, +-6.04903576570970714e-18,-1.47407164121148702e-17, + 6.04903576570970714e-18, 1.47407164121148702e-17, +-3.45685823926249648e-17, 4.23101492189102265e-17, + 3.45685823926249648e-17,-4.23101492189102265e-17, + 4.56764771439328899e-19, 1.66729950215466278e-17, +-4.56764771439328899e-19,-1.66729950215466278e-17, +-3.77363867003067107e-17,-1.29709930131505256e-17, + 3.77363867003067107e-17, 1.29709930131505256e-17, + 6.18353672557495936e-18,-1.23393036048695210e-17, +-6.18353672557495936e-18, 1.23393036048695210e-17, + 4.41046731319790287e-17,-1.04758243065127675e-17, +-4.41046731319790287e-17, 1.04758243065127675e-17, +-5.35432907989094549e-17, 3.49498670147881544e-17, + 5.35432907989094549e-17,-3.49498670147881544e-17, +-3.94095700584824985e-17, 1.50527221189129099e-17, + 3.94095700584824985e-17,-1.50527221189129099e-17, +}; diff --git a/usr/src/libm/src/mvec/__vTBL_sincos2.c b/usr/src/libm/src/mvec/__vTBL_sincos2.c new file mode 100644 index 0000000..d067bdd --- /dev/null +++ b/usr/src/libm/src/mvec/__vTBL_sincos2.c @@ -0,0 +1,145 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vTBL_sincos2.c 1.3 06/01/31 SMI" + +/* + * Let arg(x) denote a double precision number near x such that both + * sin(arg(x)) and cos(arg(x)) are approximated by double precision + * numbers to within a relative error less than 2^-61. + * + * Then for i = 5, ..., 101 + * + * __vlibm_TBL_sincos2[4*i] := arg(i/128), + * __vlibm_TBL_sincos2[4*i+1] := sin(arg(i/128)), and + * __vlibm_TBL_sincos2[4*i+2] := cos(arg(i/128)) + * + * (For i = 0, ..., 4, use zero instead of arg(i/128) above.) + */ +const double __vlibm_TBL_sincos2[] = { + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 0.0000000000000000000e+00, 0.0000000000000000000e+00, 1.0000000000000000000e+00, 0.0, + 3.9062500000301640657e-02, 3.9052566650723562203e-02, 9.9923715755469721955e-01, 0.0, + 4.6874999999606224710e-02, 4.6857835747740897436e-02, 9.9890156833846133200e-01, 0.0, + 5.4687499999642848192e-02, 5.4660244884709843771e-02, 9.9850501131899360718e-01, 0.0, + 6.2500000000560454461e-02, 6.2459317842939558740e-02, 9.9804751070006414437e-01, 0.0, + 7.0312499999974784060e-02, 7.0254578604834888589e-02, 9.9752909440529957674e-01, 0.0, + 7.8125000000139249723e-02, 7.8045551390106132628e-02, 9.9694979407601780341e-01, 0.0, + 8.5937500000010338952e-02, 8.5831760676889648498e-02, 9.9630964506979713402e-01, 0.0, + 9.3749999999981376009e-02, 9.3612731235494350823e-02, 9.9560868645800348897e-01, 0.0, + 1.0156249999991998068e-01, 1.0138798815545004006e-01, 9.9484696102354874814e-01, 0.0, + 1.0937499999996859457e-01, 1.0915705687529114742e-01, 9.9402451525821255984e-01, 0.0, + 1.1718749999982362719e-01, 1.1691946321080448623e-01, 9.9314139935987832963e-01, 0.0, + 1.2500000000009922618e-01, 1.2467473338532614191e-01, 9.9219766722931668212e-01, 0.0, + 1.3281249999975877629e-01, 1.3242239405610808922e-01, 9.9119337646720018231e-01, 0.0, + 1.4062500000063443695e-01, 1.4016197234769187108e-01, 9.9012858837001815893e-01, 0.0, + 1.4843749999955710428e-01, 1.4789299587297158323e-01, 9.8900336792738841041e-01, 0.0, + 1.5624999999999389377e-01, 1.5561499277355000936e-01, 9.8781778381647289411e-01, 0.0, + 1.6406250000016783797e-01, 1.6332749173677843513e-01, 9.8657190839947017658e-01, 0.0, + 1.7187500000029506952e-01, 1.7103002203168574114e-01, 9.8526581771816335031e-01, 0.0, + 1.7968750000084471319e-01, 1.7872211353598477235e-01, 9.8389959148951300349e-01, 0.0, + 1.8749999999944111373e-01, 1.8640329676172079365e-01, 9.8247331310135943561e-01, 0.0, + 1.9531249999999666933e-01, 1.9407310289290652383e-01, 9.8098706960566983692e-01, 0.0, + 2.0312500000009747758e-01, 2.0173106380173427832e-01, 9.7944095171552869594e-01, 0.0, + 2.1093750000010619283e-01, 2.0937671208609748286e-01, 9.7783505379793755896e-01, 0.0, + 2.1875000000030794811e-01, 2.1700958109531076623e-01, 9.7616947386856844915e-01, 0.0, + 2.2656249999987468358e-01, 2.2462920495758317840e-01, 9.7444431358601713011e-01, 0.0, + 2.3437500000010527690e-01, 2.3223511861161386105e-01, 9.7265967824488830384e-01, 0.0, + 2.4218749999999975020e-01, 2.3982685783066132190e-01, 9.7081567677034952268e-01, 0.0, + 2.4999999999974262255e-01, 2.4740395925427355328e-01, 9.6891242171070846023e-01, 0.0, + 2.5781250000144378953e-01, 2.5496596041727453974e-01, 9.6695002923030970443e-01, 0.0, + 2.6562500000037131409e-01, 2.6251239976951157296e-01, 9.6492861910467353503e-01, 0.0, + 2.7343750000018046675e-01, 2.7004281671875879356e-01, 9.6284831470933096575e-01, 0.0, + 2.8125000000148109303e-01, 2.7755675164775922559e-01, 9.6070924301515081556e-01, 0.0, + 2.8906250000049193982e-01, 2.8505374594101895447e-01, 9.5851153458108839800e-01, 0.0, + 2.9687499999876038048e-01, 2.9253334202214215098e-01, 9.5625532354353792730e-01, 0.0, + 3.0468750000020183855e-01, 2.9999508337887559328e-01, 9.5394074760883418307e-01, 0.0, + 3.1249999999968136599e-01, 3.0743851458007764865e-01, 9.5156794804827016243e-01, 0.0, + 3.2031250000105265796e-01, 3.1486318132074436749e-01, 9.4913706968413158460e-01, 0.0, + 3.2812499999976940668e-01, 3.2226863043316833490e-01, 9.4664826088612763488e-01, 0.0, + 3.3593749999946614926e-01, 3.2965440993035616257e-01, 9.4410167355718033200e-01, 0.0, + 3.4375000000042527093e-01, 3.3702006902265346788e-01, 9.4149746312773774370e-01, 0.0, + 3.5156249999849442656e-01, 3.4436515814428492188e-01, 9.3883578854678395587e-01, 0.0, + 3.5937500000102234887e-01, 3.5168922899577109709e-01, 9.3611681226669574141e-01, 0.0, + 3.6718749999811656215e-01, 3.5899183454430716456e-01, 9.3334070024322457471e-01, 0.0, + 3.7500000000009731105e-01, 3.6627252908613811000e-01, 9.3050762191227864850e-01, 0.0, + 3.8281249999980870857e-01, 3.7353086823851550102e-01, 9.2761775019292336264e-01, 0.0, + 3.9062500000029726221e-01, 3.8076640899266506191e-01, 9.2467126146692291133e-01, 0.0, + 3.9843749999969407805e-01, 3.8797870972674308732e-01, 9.2166833557347060957e-01, 0.0, + 4.0625000000035305092e-01, 3.9516733024125855200e-01, 9.1860915579477875337e-01, 0.0, + 4.1406249999977551290e-01, 4.0233183177756759452e-01, 9.1549390884839154658e-01, 0.0, + 4.2187500000064509509e-01, 4.0947177705388360103e-01, 9.1232278487185369809e-01, 0.0, + 4.2968750000090671914e-01, 4.1658673028286541395e-01, 9.0909597741505332458e-01, 0.0, + 4.3749999999977579046e-01, 4.2367625720373491838e-01, 9.0581368342603141297e-01, 0.0, + 4.4531249999998151479e-01, 4.3073992511078651457e-01, 9.0247610323794946741e-01, 0.0, + 4.5312499999986916022e-01, 4.3777730287263749709e-01, 8.9908344056019573465e-01, 0.0, + 4.6093749998776573085e-01, 4.4478796095356976092e-01, 8.9563590246861235489e-01, 0.0, + 4.6874999999894750857e-01, 4.5177147149074481369e-01, 8.9213369936746989008e-01, 0.0, + 4.7656249999993238742e-01, 4.5872740821667651323e-01, 8.8857704502806655888e-01, 0.0, + 4.8437500000085281782e-01, 4.6565534658591489769e-01, 8.8496615652574617261e-01, 0.0, + 4.9218750000026373348e-01, 4.7255486375153687995e-01, 8.8130125425121597083e-01, 0.0, + 5.0000000000063071770e-01, 4.7942553860475650707e-01, 8.7758256189007033399e-01, 0.0, + 5.0781250000246225262e-01, 4.8626695179542711589e-01, 8.7381030641185719610e-01, 0.0, + 5.1562499999926780792e-01, 4.9307868575328606120e-01, 8.6998471805877841678e-01, 0.0, + 5.2343749999866429068e-01, 4.9986032473185659786e-01, 8.6610603032132438273e-01, 0.0, + 5.3125000000045408122e-01, 5.0661145481464886497e-01, 8.6217447993465046174e-01, 0.0, + 5.3906250000013333779e-01, 5.1333166394358564766e-01, 8.5819030686259190066e-01, 0.0, + 5.4687499999851685306e-01, 5.2002054195246016910e-01, 8.5415375427815665166e-01, 0.0, + 5.5468749999993749444e-01, 5.2667768059033359673e-01, 8.5006506854945318441e-01, 0.0, + 5.6249999999973876452e-01, 5.3330267353579918765e-01, 8.4592449923120727195e-01, 0.0, + 5.7031249999981425969e-01, 5.3989511643504806138e-01, 8.4173229904143864744e-01, 0.0, + 5.7812499995867461244e-01, 5.4645460688459401855e-01, 8.3748872387310613341e-01, 0.0, + 5.8593749999782485105e-01, 5.5298074462871504853e-01, 8.3319403266578417888e-01, 0.0, + 5.9374999999819222385e-01, 5.5947313124586850464e-01, 8.2884848761033713682e-01, 0.0, + 6.0156250000116751053e-01, 5.6593137050886854755e-01, 8.2445235391376847645e-01, 0.0, + 6.0937499999740707413e-01, 5.7235506823238102569e-01, 8.2000589989871808250e-01, 0.0, + 6.1718749999640543091e-01, 5.7874383235483894961e-01, 8.1550939694845581140e-01, 0.0, + 6.2500000000776623210e-01, 5.8509727294676028286e-01, 8.1096311950067390129e-01, 0.0, + 6.3281250000034772185e-01, 5.9141500220159670675e-01, 8.0636734505489826574e-01, 0.0, + 6.4062499999937538853e-01, 5.9769663453820076615e-01, 8.0172235409879177848e-01, 0.0, + 6.4843750000738653583e-01, 6.0394178656004393613e-01, 7.9702843013700730435e-01, 0.0, + 6.5625000000061406435e-01, 6.1015007707627788580e-01, 7.9228585967680387192e-01, 0.0, + 6.6406249999753186319e-01, 6.1632112717960729764e-01, 7.8749493216912724858e-01, 0.0, + 6.7187500000431277236e-01, 6.2245456022571910015e-01, 7.8265594002358829240e-01, 0.0, + 6.7968749999981381560e-01, 6.2855000184488485360e-01, 7.7776917860043492947e-01, 0.0, + 6.8749999999877509094e-01, 6.3460708001432264425e-01, 7.7283494615324888066e-01, 0.0, + 6.9531250000506295006e-01, 6.4062542504411801314e-01, 7.6785354383960691127e-01, 0.0, + 7.0312499999963207209e-01, 6.4660466959087170569e-01, 7.6282527571081415463e-01, 0.0, + 7.1093749999987698729e-01, 6.5254444872567274327e-01, 7.5775044865529961324e-01, 0.0, + 7.1875000000017263968e-01, 6.5844439991069747542e-01, 7.5262937241795280219e-01, 0.0, + 7.2656250000154842805e-01, 6.6430416304410366823e-01, 7.4746235956218753937e-01, 0.0, + 7.3437500000182720505e-01, 6.7012338047451913692e-01, 7.4224972545727685436e-01, 0.0, + 7.4218750000178623782e-01, 6.7590169702749525182e-01, 7.3699178825503341983e-01, 0.0, + 7.5000000000121047616e-01, 6.8163876002421985856e-01, 7.3168886887299577904e-01, 0.0, + 7.5781249999863331546e-01, 6.8733421930288085555e-01, 7.2634129097504795958e-01, 0.0, + 7.6562500000199784633e-01, 6.9298772724775825615e-01, 7.2094938094431193498e-01, 0.0, + 7.7343750000033728575e-01, 6.9859893878992307403e-01, 7.1551346788274594601e-01, 0.0, + 7.8125000000087474472e-01, 7.0416751145515477095e-01, 7.1003388356546370819e-01, 0.0, + 7.8906249999555477803e-01, 7.0969310536076801732e-01, 7.0451096244372934940e-01, 0.0, +}; diff --git a/usr/src/libm/src/mvec/__vTBL_sqrtf.c b/usr/src/libm/src/mvec/__vTBL_sqrtf.c new file mode 100644 index 0000000..bbc7e27 --- /dev/null +++ b/usr/src/libm/src/mvec/__vTBL_sqrtf.c @@ -0,0 +1,553 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vTBL_sqrtf.c 1.3 06/01/31 SMI" + +#pragma align 32 (__vlibm_TBL_sqrtf) + +/* + i = [0,255] + TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 44))); + TBL[2*i+1] = sqrt(*(double*)&(0x3ff0000000000000LL + (i << 44)))/sqrt(2); + TBL[512+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 44))); + TBL[512+2*i+1] = sqrt(*(double*)&(0x3ff0000000000000LL + (i << 44))); +*/ + +const double __vlibm_TBL_sqrtf[] = { + 1.0000000000000000000, 0.7071067811865474617, + 0.9961089494163424263, 0.7084865030471646508, + 0.9922480620155038622, 0.7098635432250340882, + 0.9884169884169884401, 0.7112379172963151364, + 0.9846153846153846700, 0.7126096406869610878, + 0.9808429118773945854, 0.7139787286747413253, + 0.9770992366412213359, 0.7153451963912248468, + 0.9733840304182509451, 0.7167090588237321480, + 0.9696969696969697239, 0.7180703308172535770, + 0.9660377358490566113, 0.7194290270763336048, + 0.9624060150375939315, 0.7207851621669246756, + 0.9588014981273408344, 0.7221387505182088606, + 0.9552238805970149071, 0.7234898064243890925, + 0.9516728624535315539, 0.7248383440464502003, + 0.9481481481481481843, 0.7261843774138906360, + 0.9446494464944649172, 0.7275279204264260002, + 0.9411764705882352811, 0.7288689868556624818, + 0.9377289377289377281, 0.7302075903467450946, + 0.9343065693430656626, 0.7315437444199764938, + 0.9309090909090909083, 0.7328774624724109232, + 0.9275362318840579823, 0.7342087577794206288, + 0.9241877256317689859, 0.7355376434962387355, + 0.9208633093525180335, 0.7368641326594745911, + 0.9175627240143369168, 0.7381882381886073485, + 0.9142857142857142572, 0.7395099728874520162, + 0.9110320284697508431, 0.7408293494456060779, + 0.9078014184397162900, 0.7421463804398696906, + 0.9045936395759717197, 0.7434610783356448982, + 0.9014084507042253724, 0.7447734554883115310, + 0.8982456140350877360, 0.7460835241445826771, + 0.8951048951048951041, 0.7473912964438372830, + 0.8919860627177700341, 0.7486967844194336585, + 0.8888888888888888395, 0.7499999999999998890, + 0.8858131487889273625, 0.7513009550107067058, + 0.8827586206896551602, 0.7525996611745184861, + 0.8797250859106529042, 0.7538961301134260440, + 0.8767123287671232390, 0.7551903733496606597, + 0.8737201365187713398, 0.7564824023068876802, + 0.8707482993197278587, 0.7577722283113837998, + 0.8677966101694915002, 0.7590598625931948007, + 0.8648648648648649129, 0.7603453162872774174, + 0.8619528619528619151, 0.7616286004346212168, + 0.8590604026845637398, 0.7629097259833563793, + 0.8561872909698996503, 0.7641887037898427160, + 0.8533333333333333881, 0.7654655446197431434, + 0.8504983388704319136, 0.7667402591490810604, + 0.8476821192052980125, 0.7680128579652816256, + 0.8448844884488448947, 0.7692833515681981593, + 0.8421052631578946901, 0.7705517503711221128, + 0.8393442622950819665, 0.7718180647017791607, + 0.8366013071895425091, 0.7730823048033113043, + 0.8338762214983713728, 0.7743444808352416553, + 0.8311688311688312236, 0.7756046028744285614, + 0.8284789644012945375, 0.7768626809160033009, + 0.8258064516129032251, 0.7781187248742956752, + 0.8231511254019292512, 0.7793727445837452805, + 0.8205128205128204844, 0.7806247497997997886, + 0.8178913738019168989, 0.7818747501998001281, + 0.8152866242038216971, 0.7831227553838541189, + 0.8126984126984126977, 0.7843687748756957845, + 0.8101265822784810000, 0.7856128181235333408, + 0.8075709779179810477, 0.7868548945008857487, + 0.8050314465408805464, 0.7880950133074056119, + 0.8025078369905955800, 0.7893331837696929698, + 0.8000000000000000444, 0.7905694150420947697, + 0.7975077881619937470, 0.7918037162074953450, + 0.7950310559006210642, 0.7930360962780950151, + 0.7925696594427245056, 0.7942665641961771383, + 0.7901234567901234129, 0.7954951288348659499, + 0.7876923076923076916, 0.7967217989988725213, + 0.7852760736196319202, 0.7979465834252315037, + 0.7828746177370030646, 0.7991694907840263262, + 0.7804878048780488076, 0.8003905296791060664, + 0.7781155015197568359, 0.8016097086487912193, + 0.7757575757575757569, 0.8028270361665704735, + 0.7734138972809667667, 0.8040425206417879389, + 0.7710843373493976305, 0.8052561704203202719, + 0.7687687687687687621, 0.8064679937852462510, + 0.7664670658682635196, 0.8076779989575053609, + 0.7641791044776119479, 0.8088861940965489383, + 0.7619047619047618625, 0.8100925873009824363, + 0.7596439169139466152, 0.8112971866091980289, + 0.7573964497041419941, 0.8124999999999998890, + 0.7551622418879055942, 0.8137010353932209172, + 0.7529411764705882248, 0.8149003006503310331, + 0.7507331378299120228, 0.8160978035750371395, + 0.7485380116959063912, 0.8172935519138762039, + 0.7463556851311953233, 0.8184875533567996797, + 0.7441860465116278966, 0.8196798155377500450, + 0.7420289855072463858, 0.8208703460352310133, + 0.7398843930635837784, 0.8220591523728690841, + 0.7377521613832852543, 0.8232462420199680997, + 0.7356321839080459668, 0.8244316223920574727, + 0.7335243553008595763, 0.8256153008514316438, + 0.7314285714285714279, 0.8267972847076845433, + 0.7293447293447293811, 0.8279775812182355033, + 0.7272727272727272929, 0.8291561975888499525, + 0.7252124645892351618, 0.8303331409741513403, + 0.7231638418079096020, 0.8315084184781292853, + 0.7211267605633803202, 0.8326820371546392874, + 0.7191011235955055980, 0.8338540040078957771, + 0.7170868347338935633, 0.8350243259929617246, + 0.7150837988826815872, 0.8361930100162282553, + 0.7130919220055710328, 0.8373600629358912695, + 0.7111111111111111382, 0.8385254915624210659, + 0.7091412742382271484, 0.8396893026590250830, + 0.7071823204419889208, 0.8408515029421067544, + 0.7052341597796143446, 0.8420120990817173690, + 0.7032967032967033516, 0.8431710977020024922, + 0.7013698630136986356, 0.8443285053816433905, + 0.6994535519125683054, 0.8454843286542926828, + 0.6975476839237056970, 0.8466385740090041079, + 0.6956521739130434590, 0.8477912478906584060, + 0.6937669376693766932, 0.8489423567003827609, + 0.6918918918918919303, 0.8500919067959651354, + 0.6900269541778976112, 0.8512399044922647207, + 0.6881720430107527431, 0.8523863560616159463, + 0.6863270777479892892, 0.8535312677342289378, + 0.6844919786096256287, 0.8546746456985838680, + 0.6826666666666666439, 0.8558164961018219774, + 0.6808510638297872175, 0.8569568250501304885, + 0.6790450928381962514, 0.8580956386091237453, + 0.6772486772486772111, 0.8592329428042199124, + 0.6754617414248020868, 0.8603687436210126771, + 0.6736842105263157743, 0.8615030470056387335, + 0.6719160104986876547, 0.8626358588651412695, + 0.6701570680628272658, 0.8637671850678283469, + 0.6684073107049608442, 0.8648970314436278395, + 0.6666666666666666297, 0.8660254037844384856, + 0.6649350649350649345, 0.8671523078444753896, + 0.6632124352331606465, 0.8682777493406126368, + 0.6614987080103359451, 0.8694017339527221333, + 0.6597938144329896781, 0.8705242673240073392, + 0.6580976863753212891, 0.8716453550613345591, + 0.6564102564102564097, 0.8727650027355586815, + 0.6547314578005115626, 0.8738832158818476969, + 0.6530612244897958663, 0.8749999999999998890, + 0.6513994910941476313, 0.8761153605547615797, + 0.6497461928934009645, 0.8772293029761374372, + 0.6481012658227848222, 0.8783418326596996728, + 0.6464646464646465196, 0.8794529549668930191, + 0.6448362720403022497, 0.8805626752253356004, + 0.6432160804020100597, 0.8816709987291176942, + 0.6416040100250626210, 0.8827779307390958285, + 0.6400000000000000133, 0.8838834764831843271, + 0.6384039900249376398, 0.8849876411566435230, + 0.6368159203980099381, 0.8860904299223640868, + 0.6352357320099255578, 0.8871918479111493561, + 0.6336633663366336711, 0.8882919002219933358, + 0.6320987654320987525, 0.8893905919223566992, + 0.6305418719211822731, 0.8904879280484380155, + 0.6289926289926289771, 0.8915839136054440894, + 0.6274509803921568540, 0.8926785535678561923, + 0.6259168704156479190, 0.8937718528796931849, + 0.6243902439024390238, 0.8948638164547719764, + 0.6228710462287104788, 0.8959544491769656505, + 0.6213592233009708199, 0.8970437559004575956, + 0.6198547215496368334, 0.8981317414499945251, + 0.6183574879227052845, 0.8992184106211348338, + 0.6168674698795181266, 0.9003037681804957337, + 0.6153846153846154188, 0.9013878188659971702, + 0.6139088729016786150, 0.9024705673871031841, + 0.6124401913875597847, 0.9035520184250599440, + 0.6109785202863962095, 0.9046321766331330005, + 0.6095238095238095788, 0.9057110466368397672, + 0.6080760095011876754, 0.9067886330341817791, + 0.6066350710900474397, 0.9078649403958718445, + 0.6052009456264775267, 0.9089399732655616404, + 0.6037735849056603543, 0.9100137361600647568, + 0.6023529411764705355, 0.9110862335695781855, + 0.6009389671361502483, 0.9121574699579014789, + 0.5995316159250585475, 0.9132274497626535759, + 0.5981308411214952825, 0.9142961773954870752, + 0.5967365967365967361, 0.9153636572423006212, + 0.5953488372093023173, 0.9164298936634486248, + 0.5939675174013920866, 0.9174948909939498742, + 0.5925925925925925597, 0.9185586535436917055, + 0.5912240184757505679, 0.9196211855976350602, + 0.5898617511520737322, 0.9206824914160146589, + 0.5885057471264367734, 0.9217425752345390633, + 0.5871559633027523262, 0.9228014412645875186, + 0.5858123569794050356, 0.9238590936934051312, + 0.5844748858447488260, 0.9249155366842962689, + 0.5831435079726651205, 0.9259707743768158528, + 0.5818181818181817899, 0.9270248108869577619, + 0.5804988662131519428, 0.9280776503073435713, + 0.5791855203619910020, 0.9291292967074065157, + 0.5778781038374717349, 0.9301797541335758979, + 0.5765765765765765716, 0.9312290266094586100, + 0.5752808988764045450, 0.9322771181360186565, + 0.5739910313901345207, 0.9333240326917547902, + 0.5727069351230424932, 0.9343697742328782585, + 0.5714285714285713969, 0.9354143466934853324, + 0.5701559020044543180, 0.9364577539857310562, + 0.5688888888888888884, 0.9375000000000000000, + 0.5676274944567627490, 0.9385410886050753465, + 0.5663716814159291957, 0.9395810236483067568, + 0.5651214128035320083, 0.9406198089557756825, + 0.5638766519823789070, 0.9416574483324601230, + 0.5626373626373626369, 0.9426939455623971620, + 0.5614035087719297934, 0.9437293044088436167, + 0.5601750547045951656, 0.9447635286144357991, + 0.5589519650655021543, 0.9457966219013471676, + 0.5577342047930283764, 0.9468285879714447573, + 0.5565217391304347894, 0.9478594305064437231, + 0.5553145336225596695, 0.9488891531680609948, + 0.5541125541125541121, 0.9499177595981663780, + 0.5529157667386609409, 0.9509452534189335449, + 0.5517241379310344751, 0.9519716382329884707, + 0.5505376344086021501, 0.9529969176235565387, + 0.5493562231759656633, 0.9540210951546090890, + 0.5481798715203426431, 0.9550441743710077480, + 0.5470085470085470636, 0.9560661587986472032, + 0.5458422174840085184, 0.9570870519445969782, + 0.5446808510638297962, 0.9581068572972432085, + 0.5435244161358810944, 0.9591255783264254209, + 0.5423728813559322015, 0.9601432184835759776, + 0.5412262156448203188, 0.9611597812018561893, + 0.5400843881856539630, 0.9621752698962906525, + 0.5389473684210526194, 0.9631896879639025855, + 0.5378151260504201447, 0.9642030387838443906, + 0.5366876310272536976, 0.9652153257175312140, + 0.5355648535564853097, 0.9662265521087691766, + 0.5344467640918579843, 0.9672367212838850481, + 0.5333333333333333259, 0.9682458365518541443, + 0.5322245322245322541, 0.9692539012044263380, + 0.5311203319502074693, 0.9702609185162514027, + 0.5300207039337474502, 0.9712668917450032469, + 0.5289256198347107585, 0.9722718241315028154, + 0.5278350515463917647, 0.9732757188998396591, + 0.5267489711934156826, 0.9742785792574933934, + 0.5256673511293634693, 0.9752804083954520475, + 0.5245901639344262568, 0.9762812094883317471, + 0.5235173824130879838, 0.9772809856944930651, + 0.5224489795918367818, 0.9782797401561579287, + 0.5213849287169042279, 0.9792774759995248601, + 0.5203252032520325754, 0.9802741963348825527, + 0.5192697768762677413, 0.9812699042567237795, + 0.5182186234817813819, 0.9822646028438568599, + 0.5171717171717171713, 0.9832582951595170151, + 0.5161290322580645018, 0.9842509842514762797, + 0.5150905432595573874, 0.9852426731521528591, + 0.5140562248995983463, 0.9862333648787187101, + 0.5130260521042083743, 0.9872230624332070104, + 0.5120000000000000107, 0.9882117688026185176, + 0.5109780439121756057, 0.9891994869590258199, + 0.5099601593625497920, 0.9901862198596785847, + 0.5089463220675943811, 0.9911719704471065873, + 0.5079365079365079083, 0.9921567416492214075, + 0.5069306930693069368, 0.9931405363794189034, + 0.5059288537549406772, 0.9941233575366791309, + 0.5049309664694280331, 0.9951052080056659310, + 0.5039370078740157410, 0.9960860906568265172, + 0.5029469548133594925, 0.9970660083464885082, + 0.5019607843137254832, 0.9980449639169568510, + 0.5009784735812132794, 0.9990229601966111872, + 1.0000000000000000000, 1.0000000000000000000, + 0.9961089494163424263, 1.0019512213675874079, + 0.9922480620155038622, 1.0038986502630631303, + 0.9884169884169884401, 1.0058423087144425789, + 0.9846153846153846700, 1.0077822185373186414, + 0.9808429118773945854, 1.0097184013377193956, + 0.9770992366412213359, 1.0116508785149154193, + 0.9733840304182509451, 1.0135796712641784723, + 0.9696969696969697239, 1.0155048005794951038, + 0.9660377358490566113, 1.0174262872562316318, + 0.9624060150375939315, 1.0193441518937556012, + 0.9588014981273408344, 1.0212584148980119458, + 0.9552238805970149071, 1.0231690964840562952, + 0.9516728624535315539, 1.0250762166785454266, + 0.9481481481481481843, 1.0269797953221864173, + 0.9446494464944649172, 1.0288798520721456065, + 0.9411764705882352811, 1.0307764064044151464, + 0.9377289377289377281, 1.0326694776161440270, + 0.9343065693430656626, 1.0345590848279280216, + 0.9309090909090909083, 1.0364452469860625516, + 0.9275362318840579823, 1.0383279828647593579, + 0.9241877256317689859, 1.0402073110683274226, + 0.9208633093525180335, 1.0420832500333165882, + 0.9175627240143369168, 1.0439558180306292012, + 0.9142857142857142572, 1.0458250331675944533, + 0.9110320284697508431, 1.0476909133900131899, + 0.9078014184397162900, 1.0495534764841665254, + 0.9045936395759717197, 1.0514127400787951494, + 0.9014084507042253724, 1.0532687216470448810, + 0.8982456140350877360, 1.0551214385083833580, + 0.8951048951048951041, 1.0569709078304851957, + 0.8919860627177700341, 1.0588171466310885016, + 0.8888888888888888395, 1.0606601717798211926, + 0.8858131487889273625, 1.0625000000000000000, + 0.8827586206896551602, 1.0643366478704001654, + 0.8797250859106529042, 1.0661701318269987127, + 0.8767123287671232390, 1.0680004681646912967, + 0.8737201365187713398, 1.0698276730389806310, + 0.8707482993197278587, 1.0716517624676404896, + 0.8677966101694915002, 1.0734727523323541742, + 0.8648648648648649129, 1.0752906583803283347, + 0.8619528619528619151, 1.0771054962258803656, + 0.8590604026845637398, 1.0789172813520042649, + 0.8561872909698996503, 1.0807260291119114015, + 0.8533333333333333881, 1.0825317547305484123, + 0.8504983388704319136, 1.0843344733060920060, + 0.8476821192052980125, 1.0861341998114228957, + 0.8448844884488448947, 1.0879309490955757500, + 0.8421052631578946901, 1.0897247358851684940, + 0.8393442622950819665, 1.0915155747858111823, + 0.8366013071895425091, 1.0933034802834937782, + 0.8338762214983713728, 1.0950884667459519495, + 0.8311688311688312236, 1.0968705484240153236, + 0.8284789644012945375, 1.0986497394529342042, + 0.8258064516129032251, 1.1004260538536880798, + 0.8231511254019292512, 1.1021995055342748149, + 0.8205128205128204844, 1.1039701082909809671, + 0.8178913738019168989, 1.1057378758096332305, + 0.8152866242038216971, 1.1075028216668343362, + 0.8126984126984126977, 1.1092649593311780798, + 0.8101265822784810000, 1.1110243021644485850, + 0.8075709779179810477, 1.1127808634228035789, + 0.8050314465408805464, 1.1145346562579379057, + 0.8025078369905955800, 1.1162856937182343842, + 0.8000000000000000444, 1.1180339887498949025, + 0.7975077881619937470, 1.1197795541980573031, + 0.7950310559006210642, 1.1215224028078976115, + 0.7925696594427245056, 1.1232625472257142807, + 0.7901234567901234129, 1.1250000000000000000, + 0.7876923076923076916, 1.1267347735824966293, + 0.7852760736196319202, 1.1284668803292368100, + 0.7828746177370030646, 1.1301963325015702555, + 0.7804878048780488076, 1.1319231422671771625, + 0.7781155015197568359, 1.1336473217010658576, + 0.7757575757575757569, 1.1353688827865593414, + 0.7734138972809667667, 1.1370878374162658453, + 0.7710843373493976305, 1.1388041973930373985, + 0.7687687687687687621, 1.1405179744309161816, + 0.7664670658682635196, 1.1422291801560666702, + 0.7641791044776119479, 1.1439378261076953436, + 0.7619047619047618625, 1.1456439237389599572, + 0.7596439169139466152, 1.1473474844178637166, + 0.7573964497041419941, 1.1490485194281396808, + 0.7551622418879055942, 1.1507470399701229535, + 0.7529411764705882248, 1.1524430571616108843, + 0.7507331378299120228, 1.1541365820387117225, + 0.7485380116959063912, 1.1558276255566830582, + 0.7463556851311953233, 1.1575161985907584938, + 0.7441860465116278966, 1.1592023119369629924, + 0.7420289855072463858, 1.1608859763129193432, + 0.7398843930635837784, 1.1625672023586421933, + 0.7377521613832852543, 1.1642460006373223091, + 0.7356321839080459668, 1.1659223816361019566, + 0.7335243553008595763, 1.1675963557668378456, + 0.7314285714285714279, 1.1692679333668567487, + 0.7293447293447293811, 1.1709371246996995719, + 0.7272727272727272929, 1.1726039399558574328, + 0.7252124645892351618, 1.1742683892534959700, + 0.7231638418079096020, 1.1759304826391736576, + 0.7211267605633803202, 1.1775902300885483509, + 0.7191011235955055980, 1.1792476415070753948, + 0.7170868347338935633, 1.1809027267306990705, + 0.7150837988826815872, 1.1825554955265313861, + 0.7130919220055710328, 1.1842059575935259819, + 0.7111111111111111382, 1.1858541225631422655, + 0.7091412742382271484, 1.1875000000000000000, + 0.7071823204419889208, 1.1891435994025278955, + 0.7052341597796143446, 1.1907849302036030981, + 0.7032967032967033516, 1.1924240017711820183, + 0.7013698630136986356, 1.1940608234089249429, + 0.6994535519125683054, 1.1956954043568119861, + 0.6975476839237056970, 1.1973277537917510482, + 0.6956521739130434590, 1.1989578808281797784, + 0.6937669376693766932, 1.2005857945186590996, + 0.6918918918918919303, 1.2022115038544589627, + 0.6900269541778976112, 1.2038350177661389928, + 0.6881720430107527431, 1.2054563451241193661, + 0.6863270777479892892, 1.2070754947392479117, + 0.6844919786096256287, 1.2086924753633572216, + 0.6826666666666666439, 1.2103072956898177637, + 0.6808510638297872175, 1.2119199643540823352, + 0.6790450928381962514, 1.2135304899342249652, + 0.6772486772486772111, 1.2151388809514738210, + 0.6754617414248020868, 1.2167451458707365664, + 0.6736842105263157743, 1.2183492931011203897, + 0.6719160104986876547, 1.2199513309964460372, + 0.6701570680628272658, 1.2215512678557540749, + 0.6684073107049608442, 1.2231491119238078191, + 0.6666666666666666297, 1.2247448713915889407, + 0.6649350649350649345, 1.2263385543967864066, + 0.6632124352331606465, 1.2279301690242812040, + 0.6614987080103359451, 1.2295197233066250675, + 0.6597938144329896781, 1.2311072252245129910, + 0.6580976863753212891, 1.2326926827072512971, + 0.6564102564102564097, 1.2342761036332186020, + 0.6547314578005115626, 1.2358574958303243374, + 0.6530612244897958663, 1.2374368670764581690, + 0.6513994910941476313, 1.2390142250999380824, + 0.6497461928934009645, 1.2405895775799504754, + 0.6481012658227848222, 1.2421629321469869200, + 0.6464646464646465196, 1.2437342963832749287, + 0.6448362720403022497, 1.2453036778232047244, + 0.6432160804020100597, 1.2468710839537502366, + 0.6416040100250626210, 1.2484365222148861019, + 0.6400000000000000133, 1.2500000000000000000, + 0.6384039900249376398, 1.2515615246562992180, + 0.6368159203980099381, 1.2531211034852138830, + 0.6352357320099255578, 1.2546787437427957546, + 0.6336633663366336711, 1.2562344526401112432, + 0.6320987654320987525, 1.2577882373436317653, + 0.6305418719211822731, 1.2593401049756178800, + 0.6289926289926289771, 1.2608900626145009838, + 0.6274509803921568540, 1.2624381172952596764, + 0.6259168704156479190, 1.2639842760097927954, + 0.6243902439024390238, 1.2655285457072866784, + 0.6228710462287104788, 1.2670709332945808701, + 0.6213592233009708199, 1.2686114456365273906, + 0.6198547215496368334, 1.2701500895563484494, + 0.6183574879227052845, 1.2716868718359877199, + 0.6168674698795181266, 1.2732217992164600595, + 0.6153846153846154188, 1.2747548783981961229, + 0.6139088729016786150, 1.2762861160413836448, + 0.6124401913875597847, 1.2778155187663045034, + 0.6109785202863962095, 1.2793430931536700079, + 0.6095238095238095788, 1.2808688457449497466, + 0.6080760095011876754, 1.2823927830426995467, + 0.6066350710900474397, 1.2839149115108836607, + 0.6052009456264775267, 1.2854352375751958437, + 0.6037735849056603543, 1.2869537676233751000, + 0.6023529411764705355, 1.2884705080055189885, + 0.6009389671361502483, 1.2899854650343933749, + 0.5995316159250585475, 1.2914986449857390749, + 0.5981308411214952825, 1.2930100540985751678, + 0.5967365967365967361, 1.2945196985754987562, + 0.5953488372093023173, 1.2960275845829825059, + 0.5939675174013920866, 1.2975337182516684109, + 0.5925925925925925597, 1.2990381056766580059, + 0.5912240184757505679, 1.3005407529178008019, + 0.5898617511520737322, 1.3020416659999787257, + 0.5885057471264367734, 1.3035408509133881161, + 0.5871559633027523262, 1.3050383136138188345, + 0.5858123569794050356, 1.3065340600229295998, + 0.5844748858447488260, 1.3080280960285217695, + 0.5831435079726651205, 1.3095204274848102344, + 0.5818181818181817899, 1.3110110602126894275, + 0.5804988662131519428, 1.3125000000000000000, + 0.5791855203619910020, 1.3139872526017899457, + 0.5778781038374717349, 1.3154728237405741709, + 0.5765765765765765716, 1.3169567191065922884, + 0.5752808988764045450, 1.3184389443580617485, + 0.5739910313901345207, 1.3199195051214296370, + 0.5727069351230424932, 1.3213984069916233643, + 0.5714285714285713969, 1.3228756555322953581, + 0.5701559020044543180, 1.3243512562760682005, + 0.5688888888888888884, 1.3258252147247766572, + 0.5676274944567627490, 1.3272975363497063750, + 0.5663716814159291957, 1.3287682265918312474, + 0.5651214128035320083, 1.3302372908620476721, + 0.5638766519823789070, 1.3317047345414072534, + 0.5626373626373626369, 1.3331705629813463965, + 0.5614035087719297934, 1.3346347815039139029, + 0.5601750547045951656, 1.3360973954019967902, + 0.5589519650655021543, 1.3375584099395434468, + 0.5577342047930283764, 1.3390178303517843439, + 0.5565217391304347894, 1.3404756618454509720, + 0.5553145336225596695, 1.3419319095989930002, + 0.5541125541125541121, 1.3433865787627923272, + 0.5529157667386609409, 1.3448396744593758001, + 0.5517241379310344751, 1.3462912017836259349, + 0.5505376344086021501, 1.3477411658029889718, + 0.5493562231759656633, 1.3491895715576813775, + 0.5481798715203426431, 1.3506364240608943472, + 0.5470085470085470636, 1.3520817282989960884, + 0.5458422174840085184, 1.3535254892317321040, + 0.5446808510638297962, 1.3549677117924250336, + 0.5435244161358810944, 1.3564084008881691634, + 0.5423728813559322015, 1.3578475614000269367, + 0.5412262156448203188, 1.3592851981832216879, + 0.5400843881856539630, 1.3607213160673274910, + 0.5389473684210526194, 1.3621559198564605619, + 0.5378151260504201447, 1.3635890143294642218, + 0.5366876310272536976, 1.3650206042400971906, + 0.5355648535564853097, 1.3664506943172154418, + 0.5344467640918579843, 1.3678792892649556112, + 0.5333333333333333259, 1.3693063937629152971, + 0.5322245322245322541, 1.3707320124663318062, + 0.5311203319502074693, 1.3721561500062593453, + 0.5300207039337474502, 1.3735788109897444365, + 0.5289256198347107585, 1.3750000000000000000, + 0.5278350515463917647, 1.3764197215965774390, + 0.5267489711934156826, 1.3778379803155376138, + 0.5256673511293634693, 1.3792547806696193735, + 0.5245901639344262568, 1.3806701271484076443, + 0.5235173824130879838, 1.3820840242184988522, + 0.5224489795918367818, 1.3834964763236659024, + 0.5213849287169042279, 1.3849074878850211601, + 0.5203252032520325754, 1.3863170633011772104, + 0.5192697768762677413, 1.3877252069484073971, + 0.5182186234817813819, 1.3891319231808043622, + 0.5171717171717171713, 1.3905372163304368094, + 0.5161290322580645018, 1.3919410907075053796, + 0.5150905432595573874, 1.3933435506004971938, + 0.5140562248995983463, 1.3947446002763372874, + 0.5130260521042083743, 1.3961442439805422655, + 0.5120000000000000107, 1.3975424859373686282, + 0.5109780439121756057, 1.3989393303499619847, + 0.5099601593625497920, 1.4003347814005049354, + 0.5089463220675943811, 1.4017288432503627327, + 0.5079365079365079083, 1.4031215200402280541, + 0.5069306930693069368, 1.4045128158902644433, + 0.5059288537549406772, 1.4059027349002490848, + 0.5049309664694280331, 1.4072912811497126917, + 0.5039370078740157410, 1.4086784586980805045, + 0.5029469548133594925, 1.4100642715848097364, + 0.5019607843137254832, 1.4114487238295267968, + 0.5009784735812132794, 1.4128318194321642931, +}; + diff --git a/usr/src/libm/src/mvec/__vatan.c b/usr/src/libm/src/mvec/__vatan.c new file mode 100644 index 0000000..a71ea7d --- /dev/null +++ b/usr/src/libm/src/mvec/__vatan.c @@ -0,0 +1,315 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vatan.c 1.7 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +void +__vatan( int n, double * restrict x, int stridex, double * restrict y, int stridey ) +{ + double f , z, ans, ansu , ansl , tmp , poly , conup , conlo , dummy; + double f1, ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1; + double f2, ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2; + int index, sign, intf, intflo, intz, argcount; + int index1, sign1 ; + int index2, sign2 ; + double *yaddr,*yaddr1,*yaddr2; + extern const double __vlibm_TBL_atan1[]; + extern double fabs( double ); + +/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 + * Error = -3.08254E-18 On the interval |x| < 1/64 */ + +/* define dummy names for readability. Use parray to help compiler optimize loads */ +#define p3 parray[0] +#define p2 parray[1] +#define p1 parray[2] + + static const double parray[] = { + -1.428029046844299722E-01, /* p[3] */ + 1.999999917247000615E-01, /* p[2] */ + -3.333333333329292858E-01, /* p[1] */ + 1.0, /* not used for p[0], though */ + -1.0, /* used to flip sign of answer */ + }; + + if( n <= 0 ) return; /* if no. of elements is 0 or neg, do nothing */ + do + { + LOOP0: + + f = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ + { + if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) ) + { + ans = f - f; /* return NaN if x=NaN*/ + } + else if( intf < 0x3e300000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f; + dummy = dummy; + ans = f; + } + else if( intf > 0x43600000 ) /* avoid underflow for big arg */ + { + index = 2; + ans = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ + } + *y = (sign) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 0; /* initialize argcount */ + if ( --n <=0 ) break; /* we are done */ + goto LOOP0; /* otherwise, examine next arg */ + } + + index = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if(|x| > 64 */ + { f = -1.0/f; + index = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f = (f - z)/(1.0 + f*z); /* get reduced argument */ + index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index = index + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 1; /* we now have 1 good argument */ + if ( --n <=0 ) + { + f1 = 0.0; /* put dummy values in args 1,2 */ + f2 = 0.0; + index1 = 0; + index2 = 0; + goto UNROLL3; /* finish up with 1 good arg */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP1: + + f1 = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign1 = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ + { + if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) ) + { + ans = f1 - f1; /* return NaN if x=NaN*/ + } + else if( intf < 0x3e300000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f1; + dummy = dummy; + ans = f1; + } + else if( intf > 0x43600000 ) /* avoid underflow for big arg */ + { + index1 = 2; + ans = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low */ + } + *y = (sign1) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 1; /* we still have 1 good arg */ + if ( --n <=0 ) + { + f1 = 0.0; /* put dummy values in args 1,2 */ + f2 = 0.0; + index1 = 0; + index2 = 0; + goto UNROLL3; /* finish up with 1 good arg */ + } + goto LOOP1; /* otherwise, examine next arg */ + } + + index1 = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if(|x| > 64 */ + { f1 = -1.0/f1; + index1 = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f1 = (f1 - z)/(1.0 + f1*z); /* get reduced argument */ + index1 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr1 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 2; /* we now have 2 good arguments */ + if ( --n <=0 ) + { + f2 = 0.0; /* put dummy value in arg 2 */ + index2 = 0; + goto UNROLL3; /* finish up with 2 good args */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP2: + + f2 = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign2 = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ + { + if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) ) + { + ans = f2 - f2; /* return NaN if x=NaN*/ + } + else if( intf < 0x3e300000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f2; + dummy = dummy; + ans = f2; + } + else if( intf > 0x43600000 ) /* avoid underflow for big arg */ + { + index2 = 2; + ans = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low */ + } + *y = (sign2) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 2; /* we still have 2 good args */ + if ( --n <=0 ) + { + f2 = 0.0; /* put dummy value in arg 2 */ + index2 = 0; + goto UNROLL3; /* finish up with 2 good args */ + } + goto LOOP2; /* otherwise, examine next arg */ + } + + index2 = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if(|x| > 64 */ + { f2 = -1.0/f2; + index2 = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f2 = (f2 - z)/(1.0 + f2*z); /* get reduced argument */ + index2 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr2 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 3; /* we now have 3 good arguments */ + + +/* here is the 3 way unrolled section, + note, we may actually only have + 1,2, or 3 'real' arguments at this point +*/ + +UNROLL3: + + conup = __vlibm_TBL_atan1[index ]; /* upper table */ + conup1 = __vlibm_TBL_atan1[index1]; /* upper table */ + conup2 = __vlibm_TBL_atan1[index2]; /* upper table */ + + conlo = __vlibm_TBL_atan1[index +1]; /* lower table */ + conlo1 = __vlibm_TBL_atan1[index1+1]; /* lower table */ + conlo2 = __vlibm_TBL_atan1[index2+1]; /* lower table */ + + tmp = f *f ; + tmp1 = f1*f1; + tmp2 = f2*f2; + + poly = f *((p3*tmp + p2)*tmp + p1)*tmp ; + poly1 = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1; + poly2 = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2; + + ansu = conup + f ; /* compute atan(f) upper */ + ansu1 = conup1 + f1; /* compute atan(f) upper */ + ansu2 = conup2 + f2; /* compute atan(f) upper */ + + ansl = (((conup - ansu ) + f ) + poly ) + conlo ; + ansl1 = (((conup1 - ansu1) + f1) + poly1) + conlo1; + ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2; + + ans = ansu + ansl ; + ans1 = ansu1 + ansl1; + ans2 = ansu2 + ansl2; + +/* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */ + + *yaddr = sign ? -ans: ans; /* this one is always good */ + if(argcount < 3) break; /* end loop and finish up */ + *yaddr1 = sign1 ? -ans1: ans1; + *yaddr2 = sign2 ? -ans2: ans2; + + } while (--n > 0); + + if(argcount == 2) + { *yaddr1 = sign1 ? -ans1: ans1; + } +} diff --git a/usr/src/libm/src/mvec/__vatan2.c b/usr/src/libm/src/mvec/__vatan2.c new file mode 100644 index 0000000..4b54da0 --- /dev/null +++ b/usr/src/libm/src/mvec/__vatan2.c @@ -0,0 +1,451 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vatan2.c 1.7 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_atan2[]; + +static const double +zero = 0.0, +twom3 = 0.125, +one = 1.0, +two110 = 1.2980742146337069071e+33, +pio4 = 7.8539816339744827900e-01, +pio2 = 1.5707963267948965580e+00, +pio2_lo = 6.1232339957367658860e-17, +pi = 3.1415926535897931160e+00, +pi_lo = 1.2246467991473531772e-16, +p1 = -3.33333333333327571893331786354179101074860633009e-0001, +p2 = 1.99999999942671624230086497610394721817438631379e-0001, +p3 = -1.42856965565428636896183013324727205980484158356e-0001, +p4 = 1.10894981496317081405107718475040168084164825641e-0001; + +/* Don't __ the following; acomp will handle it */ +extern double fabs( double ); + +void +__vatan2( int n, double * restrict y, int stridey, double * restrict x, + int stridex, double * restrict z, int stridez ) +{ + double x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2; + double ah0, ah1, ah2, al0, al1, al2, t0, t1, t2; + double z0, z1, z2, sign0, sign1, sign2, xh; + int i, k, hx, hy, sx, sy; + + do + { +loop0: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign0 = ( sy )? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if ( hy > hx || ( hy == hx && LO(y) > LO(x) ) ) + { + i = hx; + hx = hy; + hy = i; + x0 = fabs( *y ); + y0 = fabs( *x ); + if ( sx ) + { + ah0 = pio2; + al0 = pio2_lo; + } + else + { + ah0 = -pio2; + al0 = -pio2_lo; + sign0 = -sign0; + } + } + else + { + x0 = fabs( *x ); + y0 = fabs( *y ); + if ( sx ) + { + ah0 = -pi; + al0 = -pi_lo; + sign0 = -sign0; + } + else + ah0 = al0 = zero; + } + + if ( hx >= 0x7fe00000 || hx - hy >= 0x03600000 ) + { + if ( hx >= 0x7ff00000 ) + { + if ( ( hx ^ 0x7ff00000 ) | LO(&x0) ) /* nan */ + ah0 = x0 + y0; + else if ( hy >= 0x7ff00000 ) + ah0 += pio4; + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + if ( hx - hy >= 0x03600000 ) + { + if ( (int) ah0 == 0 ) + ah0 = y0 / x0; + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + y0 *= twom3; + x0 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if ( hy < 0x00100000 ) + { + if ( ( hy | LO(&y0) ) == 0 ) + { + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + y0 *= two110; + x0 *= two110; + hy = HI(&y0); + hx = HI(&x0); + } + + k = ( ( ( hx - hy ) + 0x00004000 ) >> 13 ) & ~0x3; + if ( k > 644 ) + k = 644; + ah0 += __vlibm_TBL_atan2[k]; + al0 += __vlibm_TBL_atan2[k+1]; + t0 = __vlibm_TBL_atan2[k+2]; + + xh = x0; + LO(&xh) = 0; + z0 = ( ( y0 - t0 * xh ) - t0 * ( x0 - xh ) ) / ( x0 + y0 * t0 ); + pz0 = z; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + +loop1: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign1 = ( sy )? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if ( hy > hx || ( hy == hx && LO(y) > LO(x) ) ) + { + i = hx; + hx = hy; + hy = i; + x1 = fabs( *y ); + y1 = fabs( *x ); + if ( sx ) + { + ah1 = pio2; + al1 = pio2_lo; + } + else + { + ah1 = -pio2; + al1 = -pio2_lo; + sign1 = -sign1; + } + } + else + { + x1 = fabs( *x ); + y1 = fabs( *y ); + if ( sx ) + { + ah1 = -pi; + al1 = -pi_lo; + sign1 = -sign1; + } + else + ah1 = al1 = zero; + } + + if ( hx >= 0x7fe00000 || hx - hy >= 0x03600000 ) + { + if ( hx >= 0x7ff00000 ) + { + if ( ( hx ^ 0x7ff00000 ) | LO(&x1) ) /* nan */ + ah1 = x1 + y1; + else if ( hy >= 0x7ff00000 ) + ah1 += pio4; + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + if ( hx - hy >= 0x03600000 ) + { + if ( (int) ah1 == 0 ) + ah1 = y1 / x1; + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + y1 *= twom3; + x1 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if ( hy < 0x00100000 ) + { + if ( ( hy | LO(&y1) ) == 0 ) + { + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + y1 *= two110; + x1 *= two110; + hy = HI(&y1); + hx = HI(&x1); + } + + k = ( ( ( hx - hy ) + 0x00004000 ) >> 13 ) & ~0x3; + if ( k > 644 ) + k = 644; + ah1 += __vlibm_TBL_atan2[k]; + al1 += __vlibm_TBL_atan2[k+1]; + t1 = __vlibm_TBL_atan2[k+2]; + + xh = x1; + LO(&xh) = 0; + z1 = ( ( y1 - t1 * xh ) - t1 * ( x1 - xh ) ) / ( x1 + y1 * t1 ); + pz1 = z; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign2 = ( sy )? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if ( hy > hx || ( hy == hx && LO(y) > LO(x) ) ) + { + i = hx; + hx = hy; + hy = i; + x2 = fabs( *y ); + y2 = fabs( *x ); + if ( sx ) + { + ah2 = pio2; + al2 = pio2_lo; + } + else + { + ah2 = -pio2; + al2 = -pio2_lo; + sign2 = -sign2; + } + } + else + { + x2 = fabs( *x ); + y2 = fabs( *y ); + if ( sx ) + { + ah2 = -pi; + al2 = -pi_lo; + sign2 = -sign2; + } + else + ah2 = al2 = zero; + } + + if ( hx >= 0x7fe00000 || hx - hy >= 0x03600000 ) + { + if ( hx >= 0x7ff00000 ) + { + if ( ( hx ^ 0x7ff00000 ) | LO(&x2) ) /* nan */ + ah2 = x2 + y2; + else if ( hy >= 0x7ff00000 ) + ah2 += pio4; + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + if ( hx - hy >= 0x03600000 ) + { + if ( (int) ah2 == 0 ) + ah2 = y2 / x2; + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + y2 *= twom3; + x2 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if ( hy < 0x00100000 ) + { + if ( ( hy | LO(&y2) ) == 0 ) + { + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + y2 *= two110; + x2 *= two110; + hy = HI(&y2); + hx = HI(&x2); + } + + k = ( ( ( hx - hy ) + 0x00004000 ) >> 13 ) & ~0x3; + if ( k > 644 ) + k = 644; + ah2 += __vlibm_TBL_atan2[k]; + al2 += __vlibm_TBL_atan2[k+1]; + t2 = __vlibm_TBL_atan2[k+2]; + + xh = x2; + LO(&xh) = 0; + z2 = ( ( y2 - t2 * xh ) - t2 * ( x2 - xh ) ) / ( x2 + y2 * t2 ); + pz2 = z; + + x0 = z0 * z0; + x1 = z1 * z1; + x2 = z2 * z2; + + t0 = ah0 + ( z0 + ( al0 + ( z0 * x0 ) * ( p1 + x0 * + ( p2 + x0 * ( p3 + x0 * p4 ) ) ) ) ); + t1 = ah1 + ( z1 + ( al1 + ( z1 * x1 ) * ( p1 + x1 * + ( p2 + x1 * ( p3 + x1 * p4 ) ) ) ) ); + t2 = ah2 + ( z2 + ( al2 + ( z2 * x2 ) * ( p1 + x2 * + ( p2 + x2 * ( p3 + x2 * p4 ) ) ) ) ); + + *pz0 = sign0 * t0; + *pz1 = sign1 * t1; + *pz2 = sign2 * t2; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + if ( i > 1 ) + { + x1 = z1 * z1; + t1 = ah1 + ( z1 + ( al1 + ( z1 * x1 ) * ( p1 + x1 * + ( p2 + x1 * ( p3 + x1 * p4 ) ) ) ) ); + *pz1 = sign1 * t1; + } + + x0 = z0 * z0; + t0 = ah0 + ( z0 + ( al0 + ( z0 * x0 ) * ( p1 + x0 * + ( p2 + x0 * ( p3 + x0 * p4 ) ) ) ) ); + *pz0 = sign0 * t0; + } +} diff --git a/usr/src/libm/src/mvec/__vatan2f.c b/usr/src/libm/src/mvec/__vatan2f.c new file mode 100644 index 0000000..0675196 --- /dev/null +++ b/usr/src/libm/src/mvec/__vatan2f.c @@ -0,0 +1,475 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vatan2f.c 1.7 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_atan1[]; + +static const double +pio4 = 7.8539816339744827900e-01, +pio2 = 1.5707963267948965580e+00, +pi = 3.1415926535897931160e+00; + +static const float +zero = 0.0f, +one = 1.0f, +q1 = -3.3333333333296428046e-01f, +q2 = 1.9999999186853752618e-01f, +twop24 = 16777216.0f; + +void +__vatan2f( int n, float * restrict y, int stridey, float * restrict x, + int stridex, float * restrict z, int stridez ) +{ + float x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2; + double ah0, ah1, ah2; + double t0, t1, t2; + double sx0, sx1, sx2; + double sign0, sign1, sign2; + int i, j, k0, k1, k2, hx, sx, sy; + int hy0, hy1, hy2; + float base0, base1, base2; + double num0, num1, num2; + double den0, den1, den2; + double dx0, dx1, dx2; + double dy0, dy1, dy2; + double db0, db1, db2; + + do + { +loop0: + hy0 = *(int*)y; + hx = *(int*)x; + sign0 = one; + sy = hy0 & 0x80000000; + hy0 &= ~0x80000000; + + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if ( hy0 > hx ) + { + x0 = *y; + y0 = *x; + i = hx; + hx = hy0; + hy0 = i; + if ( sy ) + { + x0 = -x0; + sign0 = -sign0; + } + if ( sx ) + { + y0 = -y0; + ah0 = pio2; + } + else + { + ah0 = -pio2; + sign0 = -sign0; + } + } + else + { + y0 = *y; + x0 = *x; + if ( sy ) + { + y0 = -y0; + sign0 = -sign0; + } + if ( sx ) + { + x0 = -x0; + ah0 = -pi; + sign0 = -sign0; + } + else + ah0 = zero; + } + + if ( hx >= 0x7f800000 || hx - hy0 >= 0x0c800000 ) + { + if ( hx >= 0x7f800000 ) + { + if ( hx ^ 0x7f800000 ) /* nan */ + ah0 = x0 + y0; + else if ( hy0 >= 0x7f800000 ) + ah0 += pio4; + } + else if ( (int) ah0 == 0 ) + ah0 = y0 / x0; + *z = (sign0 == one) ? ah0 : -ah0; +/* sign0*ah0 would change nan behavior relative to previous release */ + x += stridex; + y += stridey; + z += stridez; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + if (hy0 < 0x00800000) { + if ( hy0 == 0 ) + { + *z = sign0 * (float) ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + y0 *= twop24; /* scale subnormal y */ + x0 *= twop24; /* scale possibly subnormal x */ + hy0 = *(int*)&y0; + hx = *(int*)&x0; + } + pz0 = z; + + k0 = ( hy0 - hx + 0x3f800000 ) & 0xfff80000; + if( k0 >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + *(int*)&base0 = k0; + k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + k0 += 4; + /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + k0 = 0; + base0 = zero; + } + + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + + +loop1: + hy1 = *(int*)y; + hx = *(int*)x; + sign1 = one; + sy = hy1 & 0x80000000; + hy1 &= ~0x80000000; + + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if ( hy1 > hx ) + { + x1 = *y; + y1 = *x; + i = hx; + hx = hy1; + hy1 = i; + if ( sy ) + { + x1 = -x1; + sign1 = -sign1; + } + if ( sx ) + { + y1 = -y1; + ah1 = pio2; + } + else + { + ah1 = -pio2; + sign1 = -sign1; + } + } + else + { + y1 = *y; + x1 = *x; + if ( sy ) + { + y1 = -y1; + sign1 = -sign1; + } + if ( sx ) + { + x1 = -x1; + ah1 = -pi; + sign1 = -sign1; + } + else + ah1 = zero; + } + + if ( hx >= 0x7f800000 || hx - hy1 >= 0x0c800000 ) + { + if ( hx >= 0x7f800000 ) + { + if ( hx ^ 0x7f800000 ) /* nan */ + ah1 = x1 + y1; + else if ( hy1 >= 0x7f800000 ) + ah1 += pio4; + } + else if ( (int) ah1 == 0 ) + ah1 = y1 / x1; + *z = (sign1 == one)? ah1 : -ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + if (hy1 < 0x00800000) { + if ( hy1 == 0 ) + { + *z = sign1 * (float) ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + y1 *= twop24; /* scale subnormal y */ + x1 *= twop24; /* scale possibly subnormal x */ + hy1 = *(int*)&y1; + hx = *(int*)&x1; + } + pz1 = z; + + k1 = ( hy1 - hx + 0x3f800000 ) & 0xfff80000; + if( k1 >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + *(int*)&base1 = k1; + k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + k1 += 4; + /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + k1 = 0; + base1 = zero; + } + + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hy2 = *(int*)y; + hx = *(int*)x; + sign2 = one; + sy = hy2 & 0x80000000; + hy2 &= ~0x80000000; + + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if ( hy2 > hx ) + { + x2 = *y; + y2 = *x; + i = hx; + hx = hy2; + hy2 = i; + if ( sy ) + { + x2 = -x2; + sign2 = -sign2; + } + if ( sx ) + { + y2 = -y2; + ah2 = pio2; + } + else + { + ah2 = -pio2; + sign2 = -sign2; + } + } + else + { + y2 = *y; + x2 = *x; + if ( sy ) + { + y2 = -y2; + sign2 = -sign2; + } + if ( sx ) + { + x2 = -x2; + ah2 = -pi; + sign2 = -sign2; + } + else + ah2 = zero; + } + + if ( hx >= 0x7f800000 || hx - hy2 >= 0x0c800000 ) + { + if ( hx >= 0x7f800000 ) + { + if ( hx ^ 0x7f800000 ) /* nan */ + ah2 = x2 + y2; + else if ( hy2 >= 0x7f800000 ) + ah2 += pio4; + } + else if ( (int) ah2 == 0 ) + ah2 = y2 / x2; + *z = (sign2 == one)? ah2 : -ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + if (hy2 < 0x00800000) { + if ( hy2 == 0 ) + { + *z = sign2 * (float) ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + y2 *= twop24; /* scale subnormal y */ + x2 *= twop24; /* scale possibly subnormal x */ + hy2 = *(int*)&y2; + hx = *(int*)&x2; + } + + pz2 = z; + + k2 = ( hy2 - hx + 0x3f800000 ) & 0xfff80000; + if( k2 >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + *(int*)&base2 = k2; + k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + k2 += 4; + /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + k2 = 0; + base2 = zero; + } + + goto endloop; + +endloop: + + ah2 += __vlibm_TBL_atan1[k2]; + ah1 += __vlibm_TBL_atan1[k1]; + ah0 += __vlibm_TBL_atan1[k0]; + + db2 = base2; + db1 = base1; + db0 = base0; + dy2 = y2; + dy1 = y1; + dy0 = y0; + dx2 = x2; + dx1 = x1; + dx0 = x0; + + num2 = dy2 - dx2 * db2; + den2 = dx2 + dy2 * db2; + + num1 = dy1 - dx1 * db1; + den1 = dx1 + dy1 * db1; + + num0 = dy0 - dx0 * db0; + den0 = dx0 + dy0 * db0; + + t2 = num2 / den2; + t1 = num1 / den1; + t0 = num0 / den0; + + sx2 = t2 * t2; + sx1 = t1 * t1; + sx0 = t0 * t0; + + t2 += t2 * sx2 * ( q1 + sx2 * q2 ); + t1 += t1 * sx1 * ( q1 + sx1 * q2 ); + t0 += t0 * sx0 * ( q1 + sx0 * q2 ); + + t2 += ah2; + t1 += ah1; + t0 += ah0; + + *pz2 = sign2 * t2; + *pz1 = sign1 * t1; + *pz0 = sign0 * t0; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while ( --n > 0 ); + + if ( i > 1 ) + { + ah1 += __vlibm_TBL_atan1[k1]; + t1 = ( y1 - x1 * (double)base1 ) / + ( x1 + y1 * (double)base1 ); + sx1 = t1 * t1; + t1 += t1 * sx1 * ( q1 + sx1 * q2 ); + t1 += ah1; + *pz1 = sign1 * t1; + } + + if ( i > 0 ) + { + ah0 += __vlibm_TBL_atan1[k0]; + t0 = ( y0 - x0 * (double)base0 ) / + ( x0 + y0 * (double)base0 ); + sx0 = t0 * t0; + t0 += t0 * sx0 * ( q1 + sx0 * q2 ); + t0 += ah0; + *pz0 = sign0 * t0; + } +} diff --git a/usr/src/libm/src/mvec/__vatanf.c b/usr/src/libm/src/mvec/__vatanf.c new file mode 100644 index 0000000..d03bfb5 --- /dev/null +++ b/usr/src/libm/src/mvec/__vatanf.c @@ -0,0 +1,405 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vatanf.c 1.4 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +void +__vatanf( int n, float * restrict x, int stridex, float * restrict y, int stridey ) +{ + extern const double __vlibm_TBL_atan1[]; + double conup0, conup1, conup2, conup3; + float dummy, ansf; + float f0, f1, f2, f3; + float ans0, ans1, ans2, ans3; + float poly0, poly1, poly2, poly3; + float sign0, sign1, sign2, sign3; + int intf, intz, argcount; + int index0, index1, index2, index3; + float z,*yaddr0,*yaddr1,*yaddr2,*yaddr3; + int *pz = (int *) &z; + +/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 + * Error = -3.08254E-18 On the interval |x| < 1/64 */ + + static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ; + static const float pone = 1.0f; + + if( n <= 0 ) return; /* if no. of elements is 0 or neg, do nothing */ + do + { + LOOP0: + + intf = *(int *) x; /* upper half of x, as integer */ + f0 = *x; + sign0 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f0 = -f0; + sign0 = -sign0; + } + + if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */ + { + if( intf > 0x7f800000 ) + { + ansf = f0- f0; /* return NaN if x=NaN*/ + } + else if( intf < 0x31800000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f0; + dummy = dummy; + ansf = f0; + } + else if( intf > 0x5B000000 ) /* avoid underflow for big arg */ + { + index0= 2; + ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */ + } + *y = sign0*ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 0; /* initialize argcount */ + if ( --n <=0 ) break; /* we are done */ + goto LOOP0; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if(|x| > 64 */ + { + f0 = -pone/f0; + index0 = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f0 = (f0 - z)/(pone + f0*z); + index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + index0 = 0; /* points to 0,0 in table */ + } + yaddr0 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 1; /* we now have 1 good argument */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 1 good arg */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP1: + + intf = *(int *) x; /* upper half of x, as integer */ + f1 = *x; + sign1 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f1 = -f1; + sign1 = -sign1; + } + + if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */ + { + if( intf > 0x7f800000 ) + { + ansf = f1 - f1; /* return NaN if x=NaN*/ + } + else if( intf < 0x31800000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f1; + dummy = dummy; + ansf = f1; + } + else if( intf > 0x5B000000 ) /* avoid underflow for big arg */ + { + index1 = 2; + ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */ + } + *y = sign1 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 1; /* we still have 1 good arg */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 1 good arg */ + } + goto LOOP1; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if(|x| > 64 */ + { + f1 = -pone/f1; + index1 = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f1 = (f1 - z)/(pone + f1*z); + index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + index1 = 0; /* points to 0,0 in table */ + } + + yaddr1 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 2; /* we now have 2 good arguments */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 2 good args */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP2: + + intf = *(int *) x; /* upper half of x, as integer */ + f2 = *x; + sign2 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f2 = -f2; + sign2 = -sign2; + } + + if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */ + { + if( intf > 0x7f800000 ) + { + ansf = f2 - f2; /* return NaN if x=NaN*/ + } + else if( intf < 0x31800000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f2; + dummy = dummy; + ansf = f2; + } + else if( intf > 0x5B000000 ) /* avoid underflow for big arg */ + { + index2 = 2; + ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */ + } + *y = sign2 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 2; /* we still have 2 good args */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 2 good args */ + } + goto LOOP2; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if(|x| > 64 */ + { + f2 = -pone/f2; + index2 = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f2 = (f2 - z)/(pone + f2*z); + index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + index2 = 0; /* points to 0,0 in table */ + } + yaddr2 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 3; /* we now have 3 good arguments */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 2 good args */ + } + + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +#ifdef UNROLL4 + LOOP3: + + intf = *(int *) x; /* upper half of x, as integer */ + f3 = *x; + sign3 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f3 = -f3; + sign3 = -sign3; + } + + if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */ + { + if( intf > 0x7f800000 ) + { + ansf = f3 - f3; /* return NaN if x=NaN*/ + } + else if( intf < 0x31800000 ) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f3; + dummy = dummy; + ansf = f3; + } + else if( intf > 0x5B000000 ) /* avoid underflow for big arg */ + { + index3 = 2; + ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */ + } + *y = sign3 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 3; /* we still have 3 good args */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 3 good args */ + } + goto LOOP3; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if(|x| > 64 */ + { + n3 = -pone; + d3 = f3; + f3 = n3/d3; + index3 = 2; /* point to pi/2 upper, lower */ + } + else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + n3 = (f3 - z); + d3 = (pone + f3*z); /* get reduced argument */ + f3 = n3/d3; + index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + n3 = f3; + d3 = pone; + index3 = 0; /* points to 0,0 in table */ + } + yaddr3 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 4; /* we now have 4 good arguments */ + if ( --n <=0 ) + { + goto UNROLL; /* finish up with 3 good args */ + } +#endif /* UNROLL4 */ + +/* here is the n-way unrolled section, + but we may actually have less than n + arguments at this point +*/ + +UNROLL: + +#ifdef UNROLL4 + if (argcount == 4) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + conup2 = __vlibm_TBL_atan1[index2]; + conup3 = __vlibm_TBL_atan1[index3]; + poly0 = p1*f0*f0*f0 + f0; + ans0 = sign0 * (float)(conup0 + poly0); + poly1 = p1*f1*f1*f1 + f1; + ans1 = sign1 * (float)(conup1 + poly1); + poly2 = p1*f2*f2*f2 + f2; + ans2 = sign2 * (float)(conup2 + poly2); + poly3 = p1*f3*f3*f3 + f3; + ans3 = sign3 * (float)(conup3 + poly3); + *yaddr0 = ans0; + *yaddr1 = ans1; + *yaddr2 = ans2; + *yaddr3 = ans3; + } + else +#endif + if (argcount == 3) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + conup2 = __vlibm_TBL_atan1[index2]; + poly0 = p1*f0*f0*f0 + f0; + poly1 = p1*f1*f1*f1 + f1; + poly2 = p1*f2*f2*f2 + f2; + ans0 = sign0 * (float)(conup0 + poly0); + ans1 = sign1 * (float)(conup1 + poly1); + ans2 = sign2 * (float)(conup2 + poly2); + *yaddr0 = ans0; + *yaddr1 = ans1; + *yaddr2 = ans2; + } + else + if (argcount == 2) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + poly0 = p1*f0*f0*f0 + f0; + poly1 = p1*f1*f1*f1 + f1; + ans0 = sign0 * (float)(conup0 + poly0); + ans1 = sign1 * (float)(conup1 + poly1); + *yaddr0 = ans0; + *yaddr1 = ans1; + } + else + if (argcount == 1) + { + conup0 = __vlibm_TBL_atan1[index0]; + poly0 = p1*f0*f0*f0 + f0; + ans0 = sign0 * (float)(conup0 + poly0); + *yaddr0 = ans0; + } + + } while (n > 0); + +} diff --git a/usr/src/libm/src/mvec/__vc_abs.c b/usr/src/libm/src/mvec/__vc_abs.c new file mode 100644 index 0000000..1e70ba6 --- /dev/null +++ b/usr/src/libm/src/mvec/__vc_abs.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vc_abs.c 1.4 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vhypotf( int, float *, int, float *, int, float *, int ); + +void +__vc_abs( int n, float * restrict x, int stridex, float * restrict y, + int stridey ) +{ + stridex <<= 1; + __vhypotf( n, x, stridex, x + 1, stridex, y, stridey ); +} diff --git a/usr/src/libm/src/mvec/__vc_exp.c b/usr/src/libm/src/mvec/__vc_exp.c new file mode 100644 index 0000000..852108d --- /dev/null +++ b/usr/src/libm/src/mvec/__vc_exp.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vc_exp.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vexpf( int, float *, int, float *, int ); +extern void __vsincosf( int, float *, int, float *, int, float *, int ); + +void +__vc_exp( int n, float * restrict x, int stridex, float * restrict y, + int stridey, float * restrict tmp ) +{ + int i, j, k; + + stridex <<= 1; + stridey <<= 1; + __vexpf( n, x, stridex, tmp, 1 ); + __vsincosf( n, x + 1, stridex, y + 1, stridey, y, stridey ); + for ( i = j = 0; i < n; i++, j += stridey ) + { + y[j] *= tmp[i]; + y[j+1] *= tmp[i]; + } +} diff --git a/usr/src/libm/src/mvec/__vc_log.c b/usr/src/libm/src/mvec/__vc_log.c new file mode 100644 index 0000000..c0b6a00 --- /dev/null +++ b/usr/src/libm/src/mvec/__vc_log.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vc_log.c 1.4 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vatan2f( int, float *, int, float *, int, float *, int ); +extern void __vhypotf( int, float *, int, float *, int, float *, int ); +extern void __vlogf( int, float *, int, float *, int ); + +void +__vc_log( int n, float * restrict x, int stridex, float * restrict y, + int stridey ) +{ + stridex <<= 1; + stridey <<= 1; + __vhypotf( n, x, stridex, x + 1, stridex, y + 1, stridey ); + __vlogf( n, y + 1, stridey, y, stridey ); + __vatan2f( n, x + 1, stridex, x, stridex, y + 1, stridey ); +} diff --git a/usr/src/libm/src/mvec/__vc_pow.c b/usr/src/libm/src/mvec/__vc_pow.c new file mode 100644 index 0000000..9236b1c --- /dev/null +++ b/usr/src/libm/src/mvec/__vc_pow.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vc_pow.c 1.4 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vc_exp( int, float *, int, float *, int, float * ); +extern void __vc_log( int, float *, int, float *, int ); + +void +__vc_pow( int n, float * restrict x, int stridex, float * restrict y, + int stridey, float * restrict z, int stridez, float * restrict tmp ) +{ + float r; + int i, j, k; + + __vc_log( n, x, stridex, tmp, 1 ); + stridey <<= 1; + for ( i = j = 0; i < n; i++, j += stridey ) + { + k = i << 1; + r = y[j] * tmp[k] - y[j+1] * tmp[k+1]; + tmp[k+1] = y[j+1] * tmp[k] + y[j] * tmp[k+1]; + tmp[k] = r; + } + __vc_exp( n, tmp, 1, z, stridez, tmp + n + n ); +} diff --git a/usr/src/libm/src/mvec/__vcos.c b/usr/src/libm/src/mvec/__vcos.c new file mode 100644 index 0000000..d1801e2 --- /dev/null +++ b/usr/src/libm/src/mvec/__vcos.c @@ -0,0 +1,1098 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vcos.c 1.6 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* + * vcos.1.c + * + * Vector cosine function. Just slight modifications to vsin.8.c, mainly + * in the primary range part. + * + * Modification to primary range processing. If an argument that does not + * fall in the primary range is encountered, then processing is continued + * in the medium range. + * + */ + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ + pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ + pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ + pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ + pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs( double ); +extern void __vlibm_vcos_big( int, double *, int, double *, int, int ); + +/* + * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n. + * + * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06. + * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. + * + * elts < 2^-27 use the approximation 1.0 ~ cos(x). + */ +void +__vcos( int n, double * restrict x, int stridex, double * restrict y, + int stridey ) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; + unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + do /* MAIN LOOP */ + { + /* Gotos here so _break_ exits MAIN LOOP. */ +LOOP0: /* Find first arg in right range. */ + xsb0 = HI(x); /* get most significant word */ + hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ + if ( hx0 > 0x3fe921fb ) { + /* Too big: arg reduction needed, so leave for second part */ + biguns = 1; + goto MEDIUM; + } + if ( hx0 < 0x3e400000 ) { + /* Too small. cos x ~ 1. */ + volatile int v = *x; + *y = 1.0; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto LOOP0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + +LOOP1: /* Get second arg, same as above. */ + xsb1 = HI(x); + hx1 = xsb1 & ~0x80000000; + if ( hx1 > 0x3fe921fb ) + { + biguns = 2; + goto MEDIUM; + } + if ( hx1 < 0x3e400000 ) + { + volatile int v = *x; + *y = 1.0; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto LOOP1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + +LOOP2: /* Get third arg, same as above. */ + xsb2 = HI(x); + hx2 = xsb2 & ~0x80000000; + if ( hx2 > 0x3fe921fb ) + { + biguns = 3; + goto MEDIUM; + } + if ( hx2 < 0x3e400000 ) + { + volatile int v = *x; + *y = 1.0; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto LOOP2; + } + x2 = *x; + py2 = y; + + /* + * 0x3fc40000 = 5/32 ~ 0.15625 + * Get msb after subtraction. Will be 1 only if + * hx0 - 5/32 is negative. + */ + i = ( hx0 - 0x3fc40000 ) >> 31; + i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2; + i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4; + switch ( i ) + { + double a0, a1, a2, w0, w1, w2; + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: /* All are > 5/32 */ + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 -= t0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a1 = __vlibm_TBL_sincos_hi[j1+1]; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) sin_hi(t) */ + t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 ); + t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 ); + t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 ); + + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 1: + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 ); + t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 ); + *py0 = one + t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x0 -= t0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 ); + t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 ); + *py0 = a0 + t0; + *py1 = one + t1; + *py2 = a2 + t2; + break; + + case 3: + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + a2 = __vlibm_TBL_sincos_hi[j2+1]; + t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 ); + *py0 = one + t0; + *py1 = one + t1; + *py2 = a2 + t2; + break; + + case 4: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x0 -= t0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 ); + t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 ); + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = one + t2; + break; + + case 5: + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 ); + *py0 = one + t0; + *py1 = a1 + t1; + *py2 = one + t2; + break; + + case 6: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 ); + *py0 = a0 + t0; + *py1 = one + t1; + *py2 = one + t2; + break; + + case 7: /* All are < 5/32 */ + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + *py0 = one + t0; + *py1 = one + t1; + *py2 = one + t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while ( --n > 0 ); /* END MAIN LOOP */ + + /* + * CLEAN UP last 0, 1, or 2 elts. + */ + if ( i > 0 ) /* Clean up elts at tail. i < 3. */ + { + double a0, a1, w0, w1; + double t0, t1, z0, z1; + unsigned j0, j1; + + if ( i > 1 ) + { + if ( hx1 < 0x3fc40000 ) + { + z1 = x1 * x1; + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + t1 = one + t1; + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+1]; + t1 = __vlibm_TBL_sincos_lo[j1+1] + - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 ); + *py1 = a1 + t1; + } + } + if ( hx0 < 0x3fc40000 ) + { + z0 = x0 * x0; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + t0 = one + t0; + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+1]; + t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 ); + *py0 = a0 + t0; + } + } /* END CLEAN UP */ + + return; + + /* + * Take care of BIGUNS. + * + * We have jumped here in the middle of processing after having + * encountered a medium range argument. Therefore things are in a + * bit of a tizzy. + */ + +MEDIUM: + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + if ( biguns == 3 ) + { + biguns = 0; + xsb0 = xsb0 >> 31; + xsb1 = xsb1 >> 31; + goto loop2; + } + else if ( biguns == 2 ) + { + xsb0 = xsb0 >> 31; + biguns = 0; + goto loop1; + } + biguns = 0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + + /* + * Find 3 more to work on: Not already done, not too big. + */ + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */ + { + if ( hx >= 0x7ff00000 ) /* Inf or NaN */ + { + x0 = *x; + *y = x0 - x0; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x1 = *x; + *y = x1 - x1; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x2 = *x; + *y = x2 - x2; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + n2 = (int) ( x2 * invpio2 + half[xsb2] ); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + xsb0 = HI(&x0); + i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31; + xsb1 = HI(&x1); + i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2; + xsb2 = HI(&x2); + i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4; + switch ( i ) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned hx, j0, j1; + int n0, n1; + + if ( i > 1 ) + { + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + fn1 = (double) n1; + n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + xsb1 = HI(&x1); + if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] ) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = ( x1 - t1 ) + y1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + fn0 = (double) n0; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + xsb0 = HI(&x0); + if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] ) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = ( x0 - t0 ) + y0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if ( biguns ) + __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb ); +} diff --git a/usr/src/libm/src/mvec/__vcosbig.c b/usr/src/libm/src/mvec/__vcosbig.c new file mode 100644 index 0000000..b832377 --- /dev/null +++ b/usr/src/libm/src/mvec/__vcosbig.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vcosbig.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m( double *, double *, int, int, int ); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vcos_big( int n, double * restrict x, int stridex, double * restrict y, + int stridey, int thresh ) +{ + for ( ; n--; x += stridex, y += stridey ) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, i, j; + + hx = HI(x); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if ( hx <= thresh || hx >= 0x7ff00000 ) + continue; + e0 = ( hx >> 20 ) - 1046; + HI(&tx) = 0x41600000 | ( hx & 0xfffff ); + LO(&tx) = LO(x); + tt[0] = (double)( (int) tx ); + tx = ( tx - tt[0] ) * two24; + if ( tx != zero ) + { + nx = 2; + tt[1] = (double)( (int) tx ); + tt[2] = ( tx - tt[1] ) * two24; + if ( tt[2] != zero ) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m( tt, ty, e0, nx, 2 ); + if ( xsb ) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + nx = (nx + 1) & 3; /* Add 1 to turn sin into cos */ + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = ( nx & 2 ) << 30; + hx = HI(&ty[0]); + if ( nx & 1 ) + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if ( hx < 0x3fc40000 ) + { + z = ty[0] * ty[0]; + t = z * ( q1 + z * ( q2 + z * ( q3 + z * q4 ) ) ); + a = one + t; + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - ( __vlibm_TBL_sincos_hi[j] * w - a * t ); + a += t; + } + } + else + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if ( hx < 0x3fc90000 ) + { + z = ty[0] * ty[0]; + t = z * ( p1 + z * ( p2 + z * ( p3 + z * p4 ) ) ); + a = ty[0] + ( ty[1] + ty[0] * t ); + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = ( __vlibm_TBL_sincos_hi[j+1] * w + a * t ) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if ( xsb ) a = -a; + *y = a; + } +} diff --git a/usr/src/libm/src/mvec/__vcosbig_ultra3.c b/usr/src/libm/src/mvec/__vcosbig_ultra3.c new file mode 100644 index 0000000..6b65ad7 --- /dev/null +++ b/usr/src/libm/src/mvec/__vcosbig_ultra3.c @@ -0,0 +1,652 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vcosbig_ultra3.c 1.4 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, + pio2_1 = 1.570796326734125614166, + pio2_2 = 6.077100506303965976596e-11, + pio2_3 = 2.022266248711166455796e-21, + pio2_3t = 8.478427660368899643959e-32, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +extern void __vlibm_vcos_big( int, double *, int, double *, int, int ); + +void +__vlibm_vcos_big_ultra3( int n, double * restrict x, int stridex, double * restrict y, + int stridey, int pthresh ) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; + unsigned xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= pthresh || hx > 0x413921fb ) + { + if ( hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= pthresh || hx > 0x413921fb ) + { + if ( hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= pthresh || hx > 0x413921fb ) + { + if ( hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + n2 = (int) ( x2 * invpio2 + half[xsb2] ); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + xsb0 = HI(&x0); + i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31; + xsb1 = HI(&x1); + i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2; + xsb2 = HI(&x2); + i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4; + switch ( i ) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned j0, j1; + int n0, n1; + + if ( i > 1 ) + { + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + fn1 = (double) n1; + n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + xsb1 = HI(&x1); + if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] ) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = ( x1 - t1 ) + y1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + fn0 = (double) n0; + n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + xsb0 = HI(&x0); + if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] ) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = ( x0 - t0 ) + y0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if ( biguns ) + __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb ); +} diff --git a/usr/src/libm/src/mvec/__vcosbigf.c b/usr/src/libm/src/mvec/__vcosbigf.c new file mode 100644 index 0000000..e5677fc --- /dev/null +++ b/usr/src/libm/src/mvec/__vcosbigf.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vcosbigf.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m( double *, double *, int, int, int ); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vcos_bigf( int n, float * restrict x, int stridex, float * restrict y, + int stridey ) +{ + for ( ; n--; x += stridex, y += stridey ) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, i, j; + + tx = *x; + hx = HI(&tx); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if ( hx <= 0x413921fb || hx >= 0x7ff00000 ) + continue; + e0 = ( hx >> 20 ) - 1046; + HI(&tx) = 0x41600000 | ( hx & 0xfffff ); + + tt[0] = (double)( (int) tx ); + tx = ( tx - tt[0] ) * two24; + if ( tx != zero ) + { + nx = 2; + tt[1] = (double)( (int) tx ); + tt[2] = ( tx - tt[1] ) * two24; + if ( tt[2] != zero ) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m( tt, ty, e0, nx, 2 ); + if ( xsb ) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + nx = (nx + 1) & 3; /* Add 1 to turn sin into cos */ + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = ( nx & 2 ) << 30; + hx = HI(&ty[0]); + if ( nx & 1 ) + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if ( hx < 0x3fc40000 ) + { + z = ty[0] * ty[0]; + t = z * ( q1 + z * ( q2 + z * ( q3 + z * q4 ) ) ); + a = one + t; + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - ( __vlibm_TBL_sincos_hi[j] * w - a * t ); + a += t; + } + } + else + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if ( hx < 0x3fc90000 ) + { + z = ty[0] * ty[0]; + t = z * ( p1 + z * ( p2 + z * ( p3 + z * p4 ) ) ); + a = ty[0] + ( ty[1] + ty[0] * t ); + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = ( __vlibm_TBL_sincos_hi[j+1] * w + a * t ) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if ( xsb ) a = -a; + *y = a; + } +} diff --git a/usr/src/libm/src/mvec/__vcosf.c b/usr/src/libm/src/mvec/__vcosf.c new file mode 100644 index 0000000..049786b --- /dev/null +++ b/usr/src/libm/src/mvec/__vcosf.c @@ -0,0 +1,376 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vcosf.c 1.4 06/01/23 SMI" + +/* + * __vcosf: single precision vector cos + * + * Algorithm: + * + * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+ + * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+ + * z*C2))), where z = x*x, all evaluated in double precision. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double C[] = { + -1.66666552424430847168e-01, /* 2^ -3 * -1.5555460000000 */ + 8.33219196647405624390e-03, /* 2^ -7 * 1.11077E0000000 */ + -1.95187909412197768688e-04, /* 2^-13 * -1.9956B60000000 */ + 1.0, + -0.5, + 4.16666455566883087158e-02, /* 2^ -5 * 1.55554A0000000 */ + -1.38873036485165357590e-03, /* 2^-10 * -1.6C0C1E0000000 */ + 2.44309903791872784495e-05, /* 2^-16 * 1.99E24E0000000 */ + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 6755399441055744.0, /* 2^ 52 * 1.8000000000000 */ + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define one C[3] +#define mhalf C[4] +#define C0 C[5] +#define C1 C[6] +#define C2 C[7] +#define invpio2 C[8] +#define c3two51 C[9] +#define pio2_1 C[10] +#define pio2_t C[11] + +#define PREPROCESS(N, index, label) \ + hx = *(int *)x; \ + ix = hx & 0x7fffffff; \ + t = *x; \ + x += stridex; \ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ \ + if (ix == 0) { \ + y[index] = one; \ + goto label; \ + } \ + y##N = (double)t; \ + n##N = 1; \ + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ \ + y##N = (double)t; \ + medium = 1; \ + } else { \ + if (ix >= 0x7f800000) { /* inf or nan */ \ + y[index] = t / t; \ + goto label; \ + } \ + z##N = y##N = (double)t; \ + hx = HI(y##N); \ + n##N = ((hx >> 20) & 0x7ff) - 1046; \ + HI(z##N) = (hx & 0xfffff) | 0x41600000; \ + n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0) + 1; \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * \ + (C0 + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + \ + z##N * (S1 + z##N * S2))); \ + } \ + y[index] = (n##N & 2)? -f##N : f##N; \ + goto label; \ + } + +#define PROCESS(N) \ + if (medium) { \ + z##N = y##N * invpio2 + c3two51; \ + n##N = LO(z##N) + 1; \ + z##N -= c3two51; \ + y##N = (y##N - z##N * pio2_1) - z##N * pio2_t; \ + } \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * (C0 + \ + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + \ + z##N * S2))); \ + } \ + *y = (n##N & 2)? -f##N : f##N; \ + y += stridey + +void +__vcosf(int n, float *restrict x, int stridex, float *restrict y, + int stridey) +{ + double y0, y1, y2, y3; + double z0, z1, z2, z3; + float f0, f1, f2, f3, t; + int n0, n1, n2, n3, hx, ix, medium; + + y -= stridey; + + for (;;) { +begin: + y += stridey; + + if (--n < 0) + break; + + medium = 0; + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (medium) { + z0 = y0 * invpio2 + c3two51; + z1 = y1 * invpio2 + c3two51; + z2 = y2 * invpio2 + c3two51; + z3 = y3 * invpio2 + c3two51; + + n0 = LO(z0) + 1; + n1 = LO(z1) + 1; + n2 = LO(z2) + 1; + n3 = LO(z3) + 1; + + z0 -= c3two51; + z1 -= c3two51; + z2 -= c3two51; + z3 -= c3two51; + + y0 = (y0 - z0 * pio2_1) - z0 * pio2_t; + y1 = (y1 - z1 * pio2_1) - z1 * pio2_t; + y2 = (y2 - z2 * pio2_1) - z2 * pio2_t; + y3 = (y3 - z3 * pio2_1) - z3 * pio2_t; + } + + z0 = y0 * y0; + z1 = y1 * y1; + z2 = y2 * y2; + z3 = y3 * y3; + + hx = (n0 & 1) | ((n1 & 1) << 1) | ((n2 & 1) << 2) | + ((n3 & 1) << 3); + switch (hx) { + case 0: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 1: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 2: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 3: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 4: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 5: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 6: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 7: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 8: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 9: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 10: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 11: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 12: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 13: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 14: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + default: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + } + + *y = (n0 & 2)? -f0 : f0; + y += stridey; + *y = (n1 & 2)? -f1 : f1; + y += stridey; + *y = (n2 & 2)? -f2 : f2; + y += stridey; + *y = (n3 & 2)? -f3 : f3; + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/libm/src/mvec/__vexp.c b/usr/src/libm/src/mvec/__vexp.c new file mode 100644 index 0000000..7f2fc36 --- /dev/null +++ b/usr/src/libm/src/mvec/__vexp.c @@ -0,0 +1,589 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vexp.c 1.7 06/01/23 SMI" + +/* + * __vexp: double precision vector exp + * + * Algorithm: + * + * Write x = (k + j/256)ln2 + r, where k and j are integers, j >= 0, + * and |r| <= ln2/512. Then exp(x) = 2^k * 2^(j/256) * exp(r). + * Compute exp(r) by a polynomial approximation exp(r) ~ 1 + p(r) + * where p(r) := r*(1+r*(B1+r*(B2+r*B3))). From a table, obtain + * h and l such that h ~ 2^(j/256) to double precision and h+l + * ~ 2^(j/256) to well more than double precision. Then exp(x) + * ~ 2^k * (h + (l + h * p(r))) to about double precision. Note + * that the multiplication by 2^k requires some finagling when + * the result might be subnormal. + * + * Accuracy: + * + * For normal results, the largest error observed is less than + * 0.6 ulps. For subnormal results, the largest error observed + * is 0.737 ulps. + */ + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#define DBLWORD(x, y) y, x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#define DBLWORD(x, y) x, y +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +static const double TBL[] = { + 1.00000000000000000000e+00, 0.00000000000000000000e+00, + 1.00271127505020252180e+00, -3.63661592869226394432e-17, + 1.00542990111280272636e+00, 9.49918653545503175702e-17, + 1.00815589811841754830e+00, -3.25205875608430806089e-17, + 1.01088928605170047526e+00, -1.52347786033685771763e-17, + 1.01363008495148942956e+00, 9.28359976818356758749e-18, + 1.01637831491095309566e+00, -5.77217007319966002766e-17, + 1.01913399607773791367e+00, 3.60190498225966110587e-17, + 1.02189714865411662714e+00, 5.10922502897344389359e-17, + 1.02466779289713572076e+00, -7.56160786848777820704e-17, + 1.02744594911876374610e+00, -4.95607417464536982418e-17, + 1.03023163768604097967e+00, 3.31983004108081294377e-17, + 1.03302487902122841490e+00, 7.60083887402708848935e-18, + 1.03582569360195719810e+00, -7.80678239133763616702e-17, + 1.03863410196137873065e+00, 5.99627378885251061843e-17, + 1.04145012468831610342e+00, 3.78483048028757620966e-17, + 1.04427378242741375480e+00, 8.55188970553796365958e-17, + 1.04710509587928979336e+00, 7.27707724310431474861e-17, + 1.04994408580068721015e+00, 5.59293784812700258637e-17, + 1.05279077300462642341e+00, -9.62948289902693573942e-17, + 1.05564517836055715705e+00, 1.75932573877209198414e-18, + 1.05850732279451276163e+00, -7.15265185663778073796e-17, + 1.06137722728926209292e+00, -1.19735370853656575649e-17, + 1.06425491288446449900e+00, 5.07875419861123039357e-17, + 1.06714040067682369717e+00, -7.89985396684158212226e-17, + 1.07003371182024187291e+00, -9.93716271128891938112e-17, + 1.07293486752597555522e+00, -3.83966884335882380671e-18, + 1.07584388906279104781e+00, -1.00027161511441361125e-17, + 1.07876079775711986031e+00, -6.65666043605659260344e-17, + 1.08168561499321524977e+00, -4.78262390299708626556e-17, + 1.08461836221330920615e+00, 3.16615284581634611576e-17, + 1.08755906091776965994e+00, 5.40934930782029075923e-18, + 1.09050773266525768967e+00, -3.04678207981247114697e-17, + 1.09346439907288583981e+00, 1.44139581472692093420e-17, + 1.09642908181637688259e+00, -5.91993348444931582405e-17, + 1.09940180263022191376e+00, 7.17045959970192322483e-17, + 1.10238258330784089090e+00, 5.26603687157069438656e-17, + 1.10537144570174117320e+00, 8.23928876050021358995e-17, + 1.10836841172367872588e+00, -8.78681384518052661558e-17, + 1.11137350334481754821e+00, 5.56394502666969764311e-17, + 1.11438674259589243221e+00, 1.04102784568455709549e-16, + 1.11740815156736927882e+00, -7.97680590262822045601e-17, + 1.12043775240960674644e+00, -6.20108590655417874998e-17, + 1.12347556733301989773e+00, -9.69973758898704299544e-17, + 1.12652161860824184814e+00, 5.16585675879545612073e-17, + 1.12957592856628807887e+00, 6.71280585872625658758e-17, + 1.13263851959871919561e+00, 3.23735616673800026374e-17, + 1.13570941415780546357e+00, 5.06659992612615524241e-17, + 1.13878863475669156458e+00, 8.91281267602540777782e-17, + 1.14187620396956157620e+00, 4.65109117753141238741e-17, + 1.14497214443180417298e+00, 4.64128989217001065651e-17, + 1.14807647884017893780e+00, 6.89774023662719177044e-17, + 1.15118922995298267331e+00, 3.25071021886382721198e-17, + 1.15431042059021593538e+00, 1.04171289462732661865e-16, + 1.15744007363375112085e+00, -9.12387123113440028710e-17, + 1.16057821202749877898e+00, -3.26104020541739310553e-17, + 1.16372485877757747552e+00, 3.82920483692409349872e-17, + 1.16688003695248165847e+00, -8.79187957999916974198e-17, + 1.17004376968325018993e+00, -1.84774420179000469438e-18, + 1.17321608016363732041e+00, -7.28756258658499447915e-17, + 1.17639699165028122074e+00, 5.55420325421807896277e-17, + 1.17958652746287584456e+00, 1.00923127751003904354e-16, + 1.18278471098434101449e+00, 1.54297543007907605845e-17, + 1.18599156566099384058e+00, -9.20950683529310590495e-18, + 1.18920711500272102690e+00, 3.98201523146564611098e-17, + 1.19243138258315117817e+00, 4.39755141560972082715e-17, + 1.19566439203982732842e+00, 4.61660367048148139743e-17, + 1.19890616707438057986e+00, -9.80919335600842311848e-17, + 1.20215673145270307565e+00, 6.64498149925230124489e-17, + 1.20541610900512385918e+00, -3.35727219326752963448e-17, + 1.20868432362658162482e+00, -4.74672594522898409739e-17, + 1.21196139927680124337e+00, -4.89061107752111835732e-17, + 1.21524735998046895524e+00, -7.71263069268148813091e-17, + 1.21854222982740845183e+00, -9.00672695836383767487e-17, + 1.22184603297275762301e+00, -1.06110212114026911612e-16, + 1.22515879363714552674e+00, -8.90353381426998342947e-17, + 1.22848053610687002468e+00, -1.89878163130252995312e-17, + 1.23181128473407586199e+00, 7.38938247161005024655e-17, + 1.23515106393693341325e+00, -1.07552443443078413783e-16, + 1.23849989819981654016e+00, 2.76770205557396742995e-17, + 1.24185781207348400201e+00, 4.65802759183693679123e-17, + 1.24522483017525797955e+00, -4.67724044984672750044e-17, + 1.24860097718920481924e+00, -8.26181099902196355046e-17, + 1.25198627786631622172e+00, 4.83416715246989759959e-17, + 1.25538075702469109629e+00, -6.71138982129687841853e-18, + 1.25878443954971652730e+00, -8.42178258773059935677e-17, + 1.26219735039425073886e+00, -3.08446488747384584900e-17, + 1.26561951457880628169e+00, 4.25057700345086802072e-17, + 1.26905095719173321989e+00, 2.66793213134218609523e-18, + 1.27249170338940276181e+00, -1.05779162672124210291e-17, + 1.27594177839639200123e+00, 9.91543024421429032951e-17, + 1.27940120750566932450e+00, -9.75909500835606221035e-17, + 1.28287001607877826359e+00, 1.71359491824356096814e-17, + 1.28634822954602556777e+00, -3.41695570693618197638e-17, + 1.28983587340666572274e+00, 8.94925753089759172195e-17, + 1.29333297322908946647e+00, -2.97459044313275164581e-17, + 1.29683955465100964055e+00, 2.53825027948883149593e-17, + 1.30035564337965059423e+00, 5.67872810280221742200e-17, + 1.30388126519193581210e+00, 8.64767559826787117946e-17, + 1.30741644593467731816e+00, -7.33664565287886889230e-17, + 1.31096121152476441374e+00, -7.18153613551945385697e-17, + 1.31451558794935463581e+00, 2.26754331510458564505e-17, + 1.31807960126606404927e+00, -5.45795582714915288619e-17, + 1.32165327760315753913e+00, -2.48063824591302174150e-17, + 1.32523664315974132322e+00, -2.85873121003886075697e-17, + 1.32882972420595435459e+00, 4.08908622391016005195e-17, + 1.33243254708316150037e+00, -5.10158663091674334319e-17, + 1.33604513820414583236e+00, -5.89186635638880135250e-17, + 1.33966752405330291609e+00, 8.92728259483173198426e-17, + 1.34329973118683532185e+00, -5.80258089020143775130e-17, + 1.34694178623294580355e+00, 3.22406510125467916913e-17, + 1.35059371589203447428e+00, -8.28711038146241653260e-17, + 1.35425554693689265129e+00, 7.70094837980298946162e-17, + 1.35792730621290114179e+00, -9.52963574482518886709e-17, + 1.36160902063822475405e+00, 1.53378766127066804593e-18, + 1.36530071720401191548e+00, -1.00053631259747639350e-16, + 1.36900242297459051599e+00, 9.59379791911884877256e-17, + 1.37271416508766841424e+00, -4.49596059523484126201e-17, + 1.37643597075453016920e+00, -6.89858893587180104162e-17, + 1.38016786726023799048e+00, 1.05103145799699839462e-16, + 1.38390988196383202258e+00, -6.77051165879478628716e-17, + 1.38766204229852907481e+00, 8.42298427487541531762e-17, + 1.39142437577192623621e+00, -4.90617486528898870821e-17, + 1.39519690996620027157e+00, -9.32933622422549531960e-17, + 1.39897967253831123635e+00, -9.61421320905132307233e-17, + 1.40277269122020475933e+00, -5.29578324940798922316e-17, + 1.40657599381901543545e+00, 7.03491481213642218800e-18, + 1.41038960821727066275e+00, 4.16654872843506164270e-17, + 1.41421356237309514547e+00, -9.66729331345291345105e-17, + 1.41804788432041517510e+00, 2.27443854218552945230e-17, + 1.42189260216916557589e+00, -1.60778289158902441338e-17, + 1.42574774410549420800e+00, 9.88069075850060728430e-17, + 1.42961333839197002327e+00, -1.20316424890536551792e-17, + 1.43348941336778890054e+00, -5.80245424392682610310e-17, + 1.43737599744898236764e+00, -4.20403401646755661225e-17, + 1.44127311912862565713e+00, 5.60250365087898567501e-18, + 1.44518080697704665027e+00, -3.02375813499398731940e-17, + 1.44909908964203504311e+00, -6.25940500081930925441e-17, + 1.45302799584905262265e+00, -5.77994860939610610226e-17, + 1.45696755440144376514e+00, 5.64867945387699814049e-17, + 1.46091779418064704466e+00, -5.60037718607521580013e-17, + 1.46487874414640573129e+00, 9.53076754358715731900e-17, + 1.46885043333698184220e+00, 8.46588275653362637570e-17, + 1.47283289086936752810e+00, 6.69177408194058937165e-17, + 1.47682614593949934623e+00, -3.48399455689279579579e-17, + 1.48083022782247186733e+00, -9.68695210263061857841e-17, + 1.48484516587275239274e+00, 1.07800867644074807559e-16, + 1.48887098952439700383e+00, 6.15536715774287133031e-17, + 1.49290772829126483501e+00, 1.41929201542840357707e-17, + 1.49695541176723545540e+00, -2.86166325389915821109e-17, + 1.50101406962642558440e+00, -6.41376727579023503859e-17, + 1.50508373162340647333e+00, 7.07471061358284636429e-17, + 1.50916442759342284141e+00, -1.01645532775429503911e-16, + 1.51325618745260981335e+00, 8.88449785133871209093e-17, + 1.51735904119821474190e+00, -4.30869947204334080070e-17, + 1.52147301890881458952e+00, -5.99638767594568341985e-18, + 1.52559815074453819506e+00, 1.11795187801605698722e-16, + 1.52973446694728698603e+00, 3.78579211515721903683e-17, + 1.53388199784095591305e+00, 8.87522684443844614135e-17, + 1.53804077383165682669e+00, 1.01746723511613580618e-16, + 1.54221082540794074411e+00, 7.94983480969762085616e-17, + 1.54639218314102144802e+00, 1.06839600056572198028e-16, + 1.55058487768499997372e+00, -1.46007065906893851791e-17, + 1.55478893977708865215e+00, -8.00316135011603564104e-17, + 1.55900440023783692922e+00, 3.78120705335752750188e-17, + 1.56323128997135762930e+00, 7.48477764559073438896e-17, + 1.56746963996555299659e+00, -1.03520617688497219883e-16, + 1.57171948129234140268e+00, -3.34298400468720006928e-17, + 1.57598084510788649659e+00, -1.01369164712783039808e-17, + 1.58025376265282457844e+00, -5.16340292955446806159e-17, + 1.58453826525249374946e+00, -1.93377170345857029304e-17, + 1.58883438431716395023e+00, -5.99495011882447940052e-18, + 1.59314215134226699888e+00, -1.00944065423119624890e-16, + 1.59746159790862707339e+00, 2.48683927962209992069e-17, + 1.60179275568269341434e+00, -6.05491745352778434252e-17, + 1.60613565641677102924e+00, -1.03545452880599952591e-16, + 1.61049033194925428347e+00, 2.47071925697978878522e-17, + 1.61485681420486071325e+00, -7.31666339912512326264e-17, + 1.61923513519486372836e+00, 2.09413341542290924068e-17, + 1.62362532701732886764e+00, -3.58451285141447470996e-17, + 1.62802742185734783398e+00, -6.71295508470708408630e-17, + 1.63244145198727497181e+00, 9.85281923042999296414e-17, + 1.63686744976696441078e+00, 7.69832507131987557450e-17, + 1.64130544764400632118e+00, -9.24756873764070550805e-17, + 1.64575547815396494578e+00, -1.01256799136747726038e-16, + 1.65021757392061774183e+00, 9.13327958872990419009e-18, + 1.65469176765619430114e+00, 9.64329430319602742879e-17, + 1.65917809216161615815e+00, -7.27554555082304942180e-17, + 1.66367658032673637614e+00, 5.89099269671309967045e-17, + 1.66818726513058246397e+00, 4.26917801957061447430e-17, + 1.67271017964159662839e+00, -5.47671596459956307616e-17, + 1.67724535701787846875e+00, 8.30394950995073155275e-17, + 1.68179283050742900407e+00, 8.19901002058149652013e-17, + 1.68635263344839336774e+00, -7.18146327835800944212e-17, + 1.69092479926930527867e+00, -9.66967147439488016590e-17, + 1.69550936148933262260e+00, 7.23841687284516664081e-17, + 1.70010635371852347753e+00, -8.02371937039770024589e-18, + 1.70471580965805125096e+00, -2.72888328479728156257e-17, + 1.70933776310046292579e+00, -9.86877945663293107628e-17, + 1.71397224792992597386e+00, 6.47397510775336706412e-17, + 1.71861929812247793414e+00, -1.85138041826311098821e-17, + 1.72327894774627399244e+00, -9.52212380039379996275e-17, + 1.72795123096183766975e+00, -1.07509818612046424459e-16, + 1.73263618202231106658e+00, -1.69805107431541549407e-18, + 1.73733383527370621735e+00, 3.16438929929295694659e-17, + 1.74204422515515644498e+00, -1.52595911895078879236e-18, + 1.74676738619916904760e+00, -1.07522904835075145042e-16, + 1.75150335303187820735e+00, -5.12445042059672465939e-17, + 1.75625216037329945351e+00, 2.96014069544887330703e-17, + 1.76101384303758390359e+00, -7.94325312503922771057e-17, + 1.76578843593327272643e+00, 9.46131501808326786660e-17, + 1.77057597406355471392e+00, 5.96179451004055584767e-17, + 1.77537649252652118825e+00, 6.42973179655657203396e-17, + 1.78019002651542446181e+00, -5.28462728909161736517e-17, + 1.78501661131893496481e+00, 1.53304001210313138184e-17, + 1.78985628232140103755e+00, -4.15435466068334977098e-17, + 1.79470907500310716820e+00, 1.82274584279120867698e-17, + 1.79957502494053511732e+00, -2.52688923335889795224e-17, + 1.80445416780662393208e+00, -5.17722240879331788328e-17, + 1.80934653937103195886e+00, -9.03264140245002968190e-17, + 1.81425217550039885595e+00, -9.96953153892034881983e-17, + 1.81917111215860849427e+00, 7.40267690114583888997e-17, + 1.82410338540705341259e+00, -1.01596278622770830650e-16, + 1.82904903140489727420e+00, 6.88919290883569563697e-17, + 1.83400808640934243066e+00, 3.28310722424562658722e-17, + 1.83898058677589371079e+00, 6.91896974027251194233e-18, + 1.84396656895862598446e+00, -5.93974202694996455028e-17, + 1.84896606951045083811e+00, 9.02758044626108928816e-17, + 1.85397912508338547077e+00, 9.76188749072759353840e-17, + 1.85900577242882047990e+00, -9.52870546198994068663e-17, + 1.86404604839778897940e+00, 6.54091268062057047791e-17, + 1.86909998994123860427e+00, -9.93850521425506708290e-17, + 1.87416763411029996256e+00, -6.12276341300414256164e-17, + 1.87924901805656019427e+00, -1.62263155578358447799e-17, + 1.88434417903233453195e+00, -8.22659312553371090551e-17, + 1.88945315439093919352e+00, -9.00516828505912548531e-17, + 1.89457598158696560731e+00, 3.40340353521652967060e-17, + 1.89971269817655530332e+00, -3.85973976937851370678e-17, + 1.90486334181767413831e+00, 6.53385751471827862895e-17, + 1.91002795027038985154e+00, -5.90968800674406023686e-17, + 1.91520656139714740007e+00, -1.06199460561959626376e-16, + 1.92039921316304740273e+00, 7.11668154063031418621e-17, + 1.92560594363612502811e+00, -9.91496376969374092749e-17, + 1.93082679098762710623e+00, 6.16714970616910955284e-17, + 1.93606179349229434727e+00, 1.03323859606763257448e-16, + 1.94131098952864045160e+00, -6.63802989162148798984e-17, + 1.94657441757923321823e+00, 6.81102234953387718436e-17, + 1.95185211623097831790e+00, -2.19901696997935108603e-17, + 1.95714412417540017941e+00, 8.96076779103666776760e-17, + 1.96245048020892731699e+00, 1.09768440009135469493e-16, + 1.96777122323317588126e+00, -1.03149280115311315109e-16, + 1.97310639225523432039e+00, -7.45161786395603748608e-18, + 1.97845602638795092787e+00, 4.03887531092781665750e-17, + 1.98382016485021939189e+00, -2.20345441239106265716e-17, + 1.98919884696726634310e+00, 8.20513263836919941553e-18, + 1.99459211217094023461e+00, 1.79097103520026450854e-17 +}; + +static const union { + unsigned i[2]; + double d; +} C[] = { + { DBLWORD(0x43380000, 0x00000000) }, + { DBLWORD(0x40771547, 0x652b82fe) }, + { DBLWORD(0x3f662e42, 0xfee00000) }, + { DBLWORD(0x3d6a39ef, 0x35793c76) }, + { DBLWORD(0x3ff00000, 0x00000000) }, + { DBLWORD(0x3fdfffff, 0xfffffff6) }, + { DBLWORD(0x3fc55555, 0x721a1d14) }, + { DBLWORD(0x3fa55555, 0x6e0896af) }, + { DBLWORD(0x01000000, 0x00000000) }, + { DBLWORD(0x7f000000, 0x00000000) }, + { DBLWORD(0x40862e42, 0xfefa39ef) }, + { DBLWORD(0xc0874910, 0xd52d3051) }, + { DBLWORD(0xfff00000, 0x00000000) }, + { DBLWORD(0x00000000, 0x00000000) } +}; + +#define round C[0].d +#define invln2_256 C[1].d +#define ln2_256h C[2].d +#define ln2_256l C[3].d +#define one C[4].d +#define B1 C[5].d +#define B2 C[6].d +#define B3 C[7].d +#define tiny C[8].d +#define huge C[9].d +#define othresh C[10].d +#define uthresh C[11].d +#define neginf C[12].d +#define zero C[13].d + +#define PROCESS(N) \ + y##N = (x##N * invln2_256) + round; \ + j##N = LO(y##N); \ + y##N -= round; \ + k##N = j##N >> 8; \ + j##N = (j##N & 0xff) << 1; \ + x##N = (x##N - y##N * ln2_256h) - y##N * ln2_256l; \ + y##N = x##N * (one + x##N * (B1 + x##N * (B2 + x##N * B3))); \ + t##N = TBL[j##N]; \ + y##N = t##N + (TBL[j##N + 1] + t##N * y##N); \ + if (k##N < -1021) { \ + HI(y##N) += (k##N + 0x3ef) << 20; \ + y##N *= tiny; \ + } else { \ + HI(y##N) += k##N << 20; \ + } \ + *y = y##N; \ + y += stridey + +#define PREPROCESS(N, index, label) \ + hx = HI(x[0]); \ + ix = hx & ~0x80000000; \ + x##N = *x; \ + x += stridex; \ + if (ix >= 0x40862e42) { \ + if (ix >= 0x7ff00000) { /* x is inf or nan */ \ + y[index] = (x##N == neginf)? zero : \ + x##N * x##N; \ + goto label; \ + } \ + if (x##N > othresh) { \ + y[index] = huge * huge; \ + goto label; \ + } \ + if (x##N < uthresh) { \ + y[index] = tiny * tiny; \ + goto label; \ + } \ + } else if (ix < 0x3e300000) { /* |x| < 2^-28 */ \ + y[index] = one + x##N; \ + goto label; \ + } + +void +__vexp(int n, double *restrict x, int stridex, double *restrict y, + int stridey) +{ + double x0, x1, x2, x3, x4, x5; + double y0, y1, y2, y3, y4, y5; + double t0, t1, t2, t3, t4, t5; + int k0, k1, k2, k3, k4, k5; + int j0, j1, j2, j3, j4, j5; + int hx, ix; + + y -= stridey; + + for (;;) { +begin: + if (--n < 0) + break; + y += stridey; + + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, stridey << 1, process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (--n < 0) + goto process4; + + PREPROCESS(4, stridey << 2, process4); + + if (--n < 0) + goto process5; + + PREPROCESS(5, (stridey << 2) + stridey, process5); + + y0 = (x0 * invln2_256) + round; + y1 = (x1 * invln2_256) + round; + y2 = (x2 * invln2_256) + round; + y3 = (x3 * invln2_256) + round; + y4 = (x4 * invln2_256) + round; + y5 = (x5 * invln2_256) + round; + + j0 = LO(y0); + j1 = LO(y1); + j2 = LO(y2); + j3 = LO(y3); + j4 = LO(y4); + j5 = LO(y5); + + y0 -= round; + y1 -= round; + y2 -= round; + y3 -= round; + y4 -= round; + y5 -= round; + + k0 = j0 >> 8; + k1 = j1 >> 8; + k2 = j2 >> 8; + k3 = j3 >> 8; + k4 = j4 >> 8; + k5 = j5 >> 8; + + j0 = (j0 & 0xff) << 1; + j1 = (j1 & 0xff) << 1; + j2 = (j2 & 0xff) << 1; + j3 = (j3 & 0xff) << 1; + j4 = (j4 & 0xff) << 1; + j5 = (j5 & 0xff) << 1; + + x0 = (x0 - y0 * ln2_256h) - y0 * ln2_256l; + x1 = (x1 - y1 * ln2_256h) - y1 * ln2_256l; + x2 = (x2 - y2 * ln2_256h) - y2 * ln2_256l; + x3 = (x3 - y3 * ln2_256h) - y3 * ln2_256l; + x4 = (x4 - y4 * ln2_256h) - y4 * ln2_256l; + x5 = (x5 - y5 * ln2_256h) - y5 * ln2_256l; + + y0 = x0 * (one + x0 * (B1 + x0 * (B2 + x0 * B3))); + y1 = x1 * (one + x1 * (B1 + x1 * (B2 + x1 * B3))); + y2 = x2 * (one + x2 * (B1 + x2 * (B2 + x2 * B3))); + y3 = x3 * (one + x3 * (B1 + x3 * (B2 + x3 * B3))); + y4 = x4 * (one + x4 * (B1 + x4 * (B2 + x4 * B3))); + y5 = x5 * (one + x5 * (B1 + x5 * (B2 + x5 * B3))); + + t0 = TBL[j0]; + t1 = TBL[j1]; + t2 = TBL[j2]; + t3 = TBL[j3]; + t4 = TBL[j4]; + t5 = TBL[j5]; + + y0 = t0 + (TBL[j0 + 1] + t0 * y0); + y1 = t1 + (TBL[j1 + 1] + t1 * y1); + y2 = t2 + (TBL[j2 + 1] + t2 * y2); + y3 = t3 + (TBL[j3 + 1] + t3 * y3); + y4 = t4 + (TBL[j4 + 1] + t4 * y4); + y5 = t5 + (TBL[j5 + 1] + t5 * y5); + + if (k0 < -1021) { + HI(y0) += (k0 + 0x3ef) << 20; + y0 *= tiny; + } else { + HI(y0) += k0 << 20; + } + if (k1 < -1021) { + HI(y1) += (k1 + 0x3ef) << 20; + y1 *= tiny; + } else { + HI(y1) += k1 << 20; + } + if (k2 < -1021) { + HI(y2) += (k2 + 0x3ef) << 20; + y2 *= tiny; + } else { + HI(y2) += k2 << 20; + } + if (k3 < -1021) { + HI(y3) += (k3 + 0x3ef) << 20; + y3 *= tiny; + } else { + HI(y3) += k3 << 20; + } + if (k4 < -1021) { + HI(y4) += (k4 + 0x3ef) << 20; + y4 *= tiny; + } else { + HI(y4) += k4 << 20; + } + if (k5 < -1021) { + HI(y5) += (k5 + 0x3ef) << 20; + y5 *= tiny; + } else { + HI(y5) += k5 << 20; + } + + y[0] = y0; + y[stridey] = y1; + y[stridey << 1] = y2; + y[(stridey << 1) + stridey] = y3; + y[stridey << 2] = y4; + y[(stridey << 2) + stridey] = y5; + y += (stridey << 2) + stridey; + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + continue; + +process4: + PROCESS(0); + PROCESS(1); + PROCESS(2); + PROCESS(3); + continue; + +process5: + PROCESS(0); + PROCESS(1); + PROCESS(2); + PROCESS(3); + PROCESS(4); + } +} diff --git a/usr/src/libm/src/mvec/__vexpf.c b/usr/src/libm/src/mvec/__vexpf.c new file mode 100644 index 0000000..311bed8 --- /dev/null +++ b/usr/src/libm/src/mvec/__vexpf.c @@ -0,0 +1,350 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vexpf.c 1.9 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float expf(float x) + * + * Method : + * 1. Special cases: + * for x > 88.722839355...(0x42B17218) => Inf + overflow; + * for x < -103.97207642..(0xc2CFF1B4) => 0 + underflow; + * for x = Inf => Inf; + * for x = -Inf => 0; + * for x = +-NaN => QNaN. + * 2. Computes exponential from: + * exp(x) = 2**a * 2**(k/256) * 2**(y/256) + * Where: + * a = int ( 256 * log2(e) * x ) >> 8; + * k = int ( 256 * log2(e) * x ) & 0xFF; + * y = frac ( 256 * x * log2(e)). + * Note that: + * k = 0, 1, ..., 255; + * y = (-1, 1). + * Then: + * 2**(k/256) is looked up in a table of 2**0, 2**1/256, ... + * 2**(y/256) is computed using approximation: + * 2**(y/256) = a0 + a1 * y + a2 * y**2 + * Multiplication by 2**a is done by adding "a" to + * the biased exponent. + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-29.18). All calculations are of + * double precision. + * Maximum error observed: less than 0.528 ulp for the whole + * float type range. + * + * NOTE: This implementation has been modified for SPARC to deliver + * zero instead of a subnormal result whenever the argument is less + * than log(2^-126). Therefore the worst case relative error is 1. + */ + +static const double __TBL_exp2f[] = { + /* 2^(i/256) - (((i & 0xff) << 44), i = [0, 255] */ +1.000000000000000000e+00, 9.994025125251012609e-01, 9.988087005564013632e-01, +9.982185740592087742e-01, 9.976321430258502376e-01, 9.970494174757447148e-01, +9.964704074554765478e-01, 9.958951230388689568e-01, 9.953235743270583136e-01, +9.947557714485678604e-01, 9.941917245593818730e-01, 9.936314438430204898e-01, +9.930749395106142074e-01, 9.925222218009785990e-01, 9.919733009806893653e-01, +9.914281873441580517e-01, 9.908868912137068774e-01, 9.903494229396448967e-01, +9.898157929003436051e-01, 9.892860115023132117e-01, 9.887600891802785785e-01, +9.882380363972563808e-01, 9.877198636446310465e-01, 9.872055814422322495e-01, +9.866952003384118486e-01, 9.861887309101209365e-01, 9.856861837629877776e-01, +9.851875695313955239e-01, 9.846928988785599302e-01, 9.842021824966076249e-01, +9.837154311066546031e-01, 9.832326554588848300e-01, 9.827538663326288448e-01, +9.822790745364429199e-01, 9.818082909081884413e-01, 9.813415263151109569e-01, +9.808787916539204454e-01, 9.804200978508705866e-01, 9.799654558618393629e-01, +9.795148766724087741e-01, 9.790683712979462161e-01, 9.786259507836846394e-01, +9.781876262048033732e-01, 9.777534086665099489e-01, 9.773233093041209241e-01, +9.768973392831440394e-01, 9.764755097993595978e-01, 9.760578320789027318e-01, +9.756443173783457823e-01, 9.752349769847807881e-01, 9.748298222159020865e-01, +9.744288644200894689e-01, 9.740321149764913367e-01, 9.736395852951079677e-01, +9.732512868168755604e-01, 9.728672310137493895e-01, 9.724874293887887378e-01, +9.721118934762408292e-01, 9.717406348416250950e-01, 9.713736650818186602e-01, +9.710109958251406104e-01, 9.706526387314379223e-01, 9.702986054921705072e-01, +9.699489078304969203e-01, 9.696035575013605134e-01, 9.692625662915755891e-01, +9.689259460199136642e-01, 9.685937085371902899e-01, 9.682658657263515378e-01, +9.679424295025619296e-01, 9.676234118132908124e-01, 9.673088246384006217e-01, +9.669986799902344776e-01, 9.666929899137042259e-01, 9.663917664863788115e-01, +9.660950218185727634e-01, 9.658027680534350123e-01, 9.655150173670379310e-01, +9.652317819684667066e-01, 9.649530740999082701e-01, 9.646789060367420010e-01, +9.644092900876289898e-01, 9.641442385946024096e-01, 9.638837639331581109e-01, +9.636278785123455481e-01, 9.633765947748582636e-01, 9.631299251971253694e-01, +9.628878822894031408e-01, 9.626504785958666099e-01, 9.624177266947013809e-01, +9.621896391981960006e-01, 9.619662287528346623e-01, 9.617475080393891318e-01, +9.615334897730127839e-01, 9.613241867033328614e-01, 9.611196116145447332e-01, +9.609197773255048203e-01, 9.607246966898252971e-01, 9.605343825959679060e-01, +9.603488479673386591e-01, 9.601681057623822069e-01, 9.599921689746773179e-01, +9.598210506330320246e-01, 9.596547638015787696e-01, 9.594933215798706616e-01, +9.593367371029771773e-01, 9.591850235415807502e-01, 9.590381941020729162e-01, +9.588962620266514580e-01, 9.587592405934176609e-01, 9.586271431164729018e-01, +9.584999829460172371e-01, 9.583777734684463256e-01, 9.582605281064505709e-01, +9.581482603191123770e-01, 9.580409836020059577e-01, 9.579387114872952580e-01, +9.578414575438342071e-01, 9.577492353772650846e-01, 9.576620586301189952e-01, +9.575799409819160113e-01, 9.575028961492645374e-01, 9.574309378859631181e-01, +9.573640799831001358e-01, 9.573023362691556182e-01, 9.572457206101023797e-01, +9.571942469095077177e-01, 9.571479291086353314e-01, 9.571067811865475727e-01, +9.570708171602075875e-01, 9.570400510845827879e-01, 9.570144970527471040e-01, +9.569941691959850116e-01, 9.569790816838944503e-01, 9.569692487244911838e-01, +9.569646845643128286e-01, 9.569654034885233251e-01, 9.569714198210175216e-01, +9.569827479245263113e-01, 9.569994022007218826e-01, 9.570213970903235223e-01, +9.570487470732028656e-01, 9.570814666684909211e-01, 9.571195704346837640e-01, +9.571630729697496731e-01, 9.572119889112359337e-01, 9.572663329363761964e-01, +9.573261197621985019e-01, 9.573913641456324175e-01, 9.574620808836177277e-01, +9.575382848132127922e-01, 9.576199908117032367e-01, 9.577072137967114207e-01, +9.577999687263049067e-01, 9.578982705991073709e-01, 9.580021344544072948e-01, +9.581115753722692086e-01, 9.582266084736434930e-01, 9.583472489204779565e-01, +9.584735119158284133e-01, 9.586054127039703721e-01, 9.587429665705107240e-01, +9.588861888424999869e-01, 9.590350948885443261e-01, 9.591897001189184646e-01, +9.593500199856788146e-01, 9.595160699827764983e-01, 9.596878656461707013e-01, +9.598654225539432483e-01, 9.600487563264122892e-01, 9.602378826262468747e-01, +9.604328171585819751e-01, 9.606335756711334994e-01, 9.608401739543135367e-01, +9.610526278413467072e-01, 9.612709532083855146e-01, 9.614951659746271417e-01, +9.617252821024303566e-01, 9.619613175974318642e-01, 9.622032885086644338e-01, +9.624512109286739170e-01, 9.627051009936374859e-01, 9.629649748834822054e-01, +9.632308488220031606e-01, 9.635027390769824729e-01, 9.637806619603088709e-01, +9.640646338280971506e-01, 9.643546710808080791e-01, 9.646507901633681881e-01, +9.649530075652912320e-01, 9.652613398207983142e-01, 9.655758035089392344e-01, +9.658964152537145020e-01, 9.662231917241966839e-01, 9.665561496346526393e-01, +9.668953057446663113e-01, 9.672406768592617388e-01, 9.675922798290256255e-01, +9.679501315502314629e-01, 9.683142489649629869e-01, 9.686846490612389671e-01, +9.690613488731369962e-01, 9.694443654809188349e-01, 9.698337160111555333e-01, +9.702294176368531087e-01, 9.706314875775782225e-01, 9.710399430995845238e-01, +9.714548015159391037e-01, 9.718760801866497268e-01, 9.723037965187919518e-01, +9.727379679666363632e-01, 9.731786120317773570e-01, 9.736257462632605941e-01, +9.740793882577122309e-01, 9.745395556594674824e-01, 9.750062661607005188e-01, +9.754795375015535841e-01, 9.759593874702675587e-01, 9.764458339033119660e-01, +9.769388946855159794e-01, 9.774385877501994280e-01, 9.779449310793042471e-01, +9.784579427035267063e-01, 9.789776407024486371e-01, 9.795040432046712153e-01, +9.800371683879468554e-01, 9.805770344793129922e-01, 9.811236597552254191e-01, +9.816770625416927354e-01, 9.822372612144102400e-01, 9.828042741988944897e-01, +9.833781199706193021e-01, 9.839588170551499813e-01, 9.845463840282800971e-01, +9.851408395161672660e-01, 9.857422021954695968e-01, 9.863504907934828037e-01, +9.869657240882776517e-01, 9.875879209088370692e-01, 9.882171001351949258e-01, +9.888532806985737000e-01, 9.894964815815237014e-01, 9.901467218180625141e-01, +9.908040204938135531e-01, 9.914683967461471736e-01, 9.921398697643202258e-01, +9.928184587896166091e-01, 9.935041831154891590e-01, 9.941970620877000897e-01, +9.948971151044636585e-01, 9.956043616165879406e-01, 9.963188211276171602e-01, +9.970405131939754639e-01, 9.977694574251096959e-01, 9.985056734836331715e-01, +9.992491810854701173e-01 +}; + +static const double + K256ONLN2 = 369.3299304675746271, + KA2 = 3.66556671660783833261e-06, + KA1 = 2.70760782821392980564e-03, + KA0 = 1.0; + +static const float extreme[2] = { 1.0e30f, 1.0e-30f }; + +#define PROCESS(N) \ + x##N *= K256ONLN2; \ + k##N = (int) x##N; \ + x##N -= (double) k##N; \ + x##N = (KA2 * x##N + KA1) * x##N + KA0; \ + lres##N = ((long long *)__TBL_exp2f)[k##N & 0xff]; \ + lres##N += (long long)k##N << 44; \ + *y = (float) (x##N * *(double *)&lres##N); \ + y += stridey + +#ifdef __sparc + +#define PREPROCESS(N , index, label) \ + xi = *(int *)x; \ + ax = xi & ~0x80000000; \ + fx = *x; \ + x += stridex; \ + if ( ax >= 0x42aeac50 ) /* log(2^126) = 87.3365... */ \ + { \ + sign = (unsigned)xi >> 31; \ + if ( ax >= 0x7f800000 ) /* |x| = inf or nan */ \ + { \ + if ( ax > 0x7f800000 ) /* nan */ \ + { \ + y[index] = fx * fx; \ + goto label; \ + } \ + y[index] = (sign) ? 0.0f : fx; \ + goto label; \ + } \ + if ( sign || ax > 0x42b17218 ) { \ + fx = extreme[sign]; \ + y[index] = fx * fx; \ + goto label; \ + } \ + } \ + x##N = fx + +#else + +#define PREPROCESS(N , index, label) \ + xi = *(int *)x; \ + ax = xi & ~0x80000000; \ + fx = *x; \ + x += stridex; \ + if ( ax > 0x42cff1b4 ) /* 103.972076f */ \ + { \ + sign = (unsigned)xi >> 31; \ + if ( ax >= 0x7f800000 ) /* |x| = inf or nan */ \ + { \ + if ( ax > 0x7f800000 ) /* nan */ \ + { \ + y[index] = fx * fx; \ + goto label; \ + } \ + y[index] = (sign) ? 0.0f : fx; \ + goto label; \ + } \ + fx = extreme[sign]; \ + y[index] = fx * fx; \ + goto label; \ + } \ + x##N = fx + +#endif + +void +__vexpf( int n, float * restrict x, int stridex, float * restrict y, + int stridey ) +{ + double x0, x1, x2, x3, x4; + double res0, res1, res2, res3, res4; + float fx; + long long lres0, lres1, lres2, lres3, lres4; + int k0, k1, k2, k3, k4; + int xi, ax, sign; + + y -= stridey; + + for ( ; ; ) + { +begin: + if ( --n < 0 ) + break; + y += stridey; + + PREPROCESS(0, 0, begin); + + if ( --n < 0 ) + goto process1; + + PREPROCESS(1, stridey, process1); + + if ( --n < 0 ) + goto process2; + + PREPROCESS(2, stridey << 1, process2); + + if ( --n < 0 ) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if ( --n < 0 ) + goto process4; + + PREPROCESS(4, (stridey << 2), process4); + + x0 *= K256ONLN2; + x1 *= K256ONLN2; + x2 *= K256ONLN2; + x3 *= K256ONLN2; + x4 *= K256ONLN2; + + k0 = (int)x0; + k1 = (int)x1; + k2 = (int)x2; + k3 = (int)x3; + k4 = (int)x4; + + x0 -= (double)k0; + x1 -= (double)k1; + x2 -= (double)k2; + x3 -= (double)k3; + x4 -= (double)k4; + + x0 = (KA2 * x0 + KA1) * x0 + KA0; + x1 = (KA2 * x1 + KA1) * x1 + KA0; + x2 = (KA2 * x2 + KA1) * x2 + KA0; + x3 = (KA2 * x3 + KA1) * x3 + KA0; + x4 = (KA2 * x4 + KA1) * x4 + KA0; + + lres0 = ((long long *)__TBL_exp2f)[k0 & 255]; + lres1 = ((long long *)__TBL_exp2f)[k1 & 255]; + lres2 = ((long long *)__TBL_exp2f)[k2 & 255]; + lres3 = ((long long *)__TBL_exp2f)[k3 & 255]; + lres4 = ((long long *)__TBL_exp2f)[k4 & 255]; + + lres0 += (long long)k0 << 44; + res0 = *(double *)&lres0; + lres1 += (long long)k1 << 44; + res1 = *(double *)&lres1; + lres2 += (long long)k2 << 44; + res2 = *(double *)&lres2; + lres3 += (long long)k3 << 44; + res3 = *(double *)&lres3; + lres4 += (long long)k4 << 44; + res4 = *(double *)&lres4; + + *y = (float)(res0 * x0); + y += stridey; + *y = (float)(res1 * x1); + y += stridey; + *y = (float)(res2 * x2); + y += stridey; + *y = (float)(res3 * x3); + y += stridey; + *y = (float)(res4 * x4); + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + continue; + +process4: + PROCESS(0); + PROCESS(1); + PROCESS(2); + PROCESS(3); + } +} diff --git a/usr/src/libm/src/mvec/__vhypot.c b/usr/src/libm/src/mvec/__vhypot.c new file mode 100644 index 0000000..0cc0d47 --- /dev/null +++ b/usr/src/libm/src/mvec/__vhypot.c @@ -0,0 +1,394 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vhypot.c 1.7 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double hypot(double x, double y) + * + * Method : + * 1. Special cases: + * x or y is +Inf or -Inf => +Inf + * x or y is NaN => QNaN + * 2. Computes hypot(x,y): + * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) + * Where: + * m = max(|x|,|y|) + * xnm = x * (1/m) + * ynm = y * (1/m) + * + * Compute xnm * xnm + ynm * ynm by simulating + * muti-precision arithmetic. + * + * Accuracy: + * Maximum error observed: less than 0.872 ulp after 16.777.216.000 + * results. + */ + +#define sqrt __sqrt + +extern double sqrt( double ); +extern double fabs( double ); + +static const unsigned long long LCONST[] = { +0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ +0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ +0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ +}; + +static void +__vhypot_n( int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez ); + +#pragma no_inline(__vhypot_n) + +#define RETURN(ret) \ +{ \ + *pz = (ret); \ + py += stridey; \ + pz += stridez; \ + if ( n_n == 0 ) \ + { \ + hx0 = HI(px); \ + hy0 = HI(py); \ + spx = px; spy = py; spz = pz; \ + continue; \ + } \ + n--; \ + break; \ +} + +void +__vhypot( int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez ) +{ + int hx0, hx1, hy0, j0, diff; + double x_hi, x_lo, y_hi, y_lo; + double scl = 0; + double x, y, res; + double *spx, *spy, *spz; + int n_n; + double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ + double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */ + double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */ + + while ( n > 1 ) + { + n_n = 0; + spx = px; + spy = py; + spz = pz; + hx0 = HI(px); + hy0 = HI(py); + for ( ; n > 1 ; n-- ) + { + px += stridex; + hx0 &= 0x7fffffff; + hy0 &= 0x7fffffff; + + if ( hx0 >= 0x7fe00000 ) /* |X| >= 2**1023 or Inf or NaN */ + { + diff = hy0 - hx0; + j0 = diff >> 31; + j0 = hy0 - (diff & j0); + j0 &= 0x7ff00000; + x = *(px - stridex); + y = *py; + x = fabs(x); + y = fabs(y); + if ( j0 >= 0x7ff00000 ) /* |X| or |Y| = Inf or NaN */ + { + int lx = LO((px - stridex)); + int ly = LO(py); + if ( hx0 == 0x7ff00000 && lx == 0 ) res = x == y ? y : x; + else if ( hy0 == 0x7ff00000 && ly == 0 ) res = x == y ? x : y; + else res = x + y; + RETURN ( res ) + } + else + { + j0 = diff >> 31; + if ( ((diff ^ j0) - j0) < 0x03600000 ) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ + { + x *= D2ONM1022; + y *= D2ONM1022; + + x_hi = ( x + D2ON28 ) - D2ON28; + x_lo = x - x_hi; + y_hi = ( y + D2ON28 ) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt ( res ); + + res = D2ONP1022 * res; + RETURN ( res ) + } + else RETURN ( x + y ) + } + } + if ( hy0 >= 0x7fe00000 ) /* |Y| >= 2**1023 or Inf or NaN */ + { + diff = hy0 - hx0; + j0 = diff >> 31; + j0 = hy0 - (diff & j0); + j0 &= 0x7ff00000; + x = *(px - stridex); + y = *py; + x = fabs(x); + y = fabs(y); + if ( j0 >= 0x7ff00000 ) /* |X| or |Y| = Inf or NaN */ + { + int lx = LO((px - stridex)); + int ly = LO(py); + if ( hx0 == 0x7ff00000 && lx == 0 ) res = x == y ? y : x; + else if ( hy0 == 0x7ff00000 && ly == 0 ) res = x == y ? x : y; + else res = x + y; + RETURN ( res ) + } + else + { + j0 = diff >> 31; + if ( ((diff ^ j0) - j0) < 0x03600000 ) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ + { + x *= D2ONM1022; + y *= D2ONM1022; + + x_hi = ( x + D2ON28 ) - D2ON28; + x_lo = x - x_hi; + y_hi = ( y + D2ON28 ) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt ( res ); + + res = D2ONP1022 * res; + RETURN ( res ) + } + else RETURN ( x + y ) + } + } + + hx1 = HI(px); + + if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) /* X and Y are subnormal */ + { + x = *(px - stridex); + y = *py; + + x *= D2ONP1022; + y *= D2ONP1022; + + x_hi = ( x + D2ON28 ) - D2ON28; + x_lo = x - x_hi; + y_hi = ( y + D2ON28 ) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt(res); + + res = D2ONM1022 * res; + RETURN ( res ) + } + + hx0 = hx1; + py += stridey; + pz += stridez; + n_n++; + hy0 = HI(py); + } + if ( n_n > 0 ) + __vhypot_n ( n_n, spx, stridex, spy, stridey, spz, stridez ); + } + + if ( n > 0 ) + { + x = *px; + y = *py; + hx0 = HI(px); + hy0 = HI(py); + + hx0 &= 0x7fffffff; + hy0 &= 0x7fffffff; + + diff = hy0 - hx0; + j0 = diff >> 31; + j0 = hy0 - (diff & j0); + j0 &= 0x7ff00000; + + if ( j0 >= 0x7fe00000 ) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ + { + x = fabs(x); + y = fabs(y); + if ( j0 >= 0x7ff00000 ) /* |X| or |Y| = Inf or NaN */ + { + int lx = LO(px); + int ly = LO(py); + if ( hx0 == 0x7ff00000 && lx == 0 ) res = x == y ? y : x; + else if ( hy0 == 0x7ff00000 && ly == 0 ) res = x == y ? x : y; + else res = x + y; + *pz = res; + return; + } + else + { + j0 = diff >> 31; + if ( ((diff ^ j0) - j0) < 0x03600000 ) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ + { + x *= D2ONM1022; + y *= D2ONM1022; + + x_hi = ( x + D2ON28 ) - D2ON28; + x_lo = x - x_hi; + y_hi = ( y + D2ON28 ) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt ( res ); + + res = D2ONP1022 * res; + *pz = res; + return; + } + else + { + *pz = x + y; + return; + } + } + } + + if ( j0 < 0x00100000 ) /* X and Y are subnormal */ + { + x *= D2ONP1022; + y *= D2ONP1022; + + x_hi = ( x + D2ON28 ) - D2ON28; + x_lo = x - x_hi; + y_hi = ( y + D2ON28 ) - D2ON28; + y_lo = y - y_hi; + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt(res); + + res = D2ONM1022 * res; + *pz = res; + return; + } + + HI(&scl) = (0x7fe00000 - j0); + + x *= scl; + y *= scl; + + x_hi = ( x + D2ON28 ) - D2ON28; + y_hi = ( y + D2ON28 ) - D2ON28; + x_lo = x - x_hi; + y_lo = y - y_hi; + + res = (x_hi * x_hi + y_hi * y_hi); + res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); + + res = sqrt(res); + + HI(&scl) = j0; + + res = scl * res; + *pz = res; + } +} + +static void +__vhypot_n( int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez ) +{ + int hx0, hy0, j0, diff0; + double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; + double x0, y0, res0; + double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ + + for( ; n > 0 ; n-- ) + { + x0 = *px; + y0 = *py; + hx0 = HI(px); + hy0 = HI(py); + + hx0 &= 0x7fffffff; + hy0 &= 0x7fffffff; + + diff0 = hy0 - hx0; + j0 = diff0 >> 31; + j0 = hy0 - (diff0 & j0); + j0 &= 0x7ff00000; + + px += stridex; + py += stridey; + + HI(&scl0) = ( 0x7fe00000 - j0 ); + + x0 *= scl0; + y0 *= scl0; + + x_hi0 = ( x0 + D2ON28 ) - D2ON28; + y_hi0 = ( y0 + D2ON28 ) - D2ON28; + x_lo0 = x0 - x_hi0; + y_lo0 = y0 - y_hi0; + + res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); + res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); + + res0 = sqrt(res0); + + HI(&scl0) = j0; + + res0 = scl0 * res0; + *pz = res0; + + pz += stridez; + } +} + diff --git a/usr/src/libm/src/mvec/__vhypotf.c b/usr/src/libm/src/mvec/__vhypotf.c new file mode 100644 index 0000000..0187de5 --- /dev/null +++ b/usr/src/libm/src/mvec/__vhypotf.c @@ -0,0 +1,207 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vhypotf.c 1.4 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +#define sqrt __sqrt + +extern double sqrt( double ); + +void +__vhypotf( int n, float * restrict x, int stridex, float * restrict y, + int stridey, float * restrict z, int stridez ) +{ + float x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2; + unsigned hx0, hx1, hx2, hy0, hy1, hy2; + int i, j0, j1, j2; + + do + { +LOOP0: + hx0 = *(unsigned*)x & ~0x80000000; + hy0 = *(unsigned*)y & ~0x80000000; + *(unsigned*)&x0 = hx0; + *(unsigned*)&y0 = hy0; + if ( hy0 > hx0 ) + { + i = hy0 - hx0; + j0 = hy0 & 0x7f800000; + if ( hx0 == 0 ) + i = 0x7f800000; + } + else + { + i = hx0 - hy0; + j0 = hx0 & 0x7f800000; + if ( hy0 == 0 ) + i = 0x7f800000; + else if ( hx0 == 0 ) + i = 0x7f800000; + } + if ( i >= 0x0c800000 || j0 >= 0x7f800000 ) + { + z0 = x0 + y0; + if ( hx0 == 0x7f800000 ) + z0 = x0; + else if ( hy0 == 0x7f800000 ) + z0 = y0; + else if ( hx0 > 0x7f800000 || hy0 > 0x7f800000 ) + z0 = *x + *y; + *z = z0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if ( --n <= 0 ) + break; + goto LOOP0; + } + pz0 = z; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + +LOOP1: + hx1 = *(unsigned*)x & ~0x80000000; + hy1 = *(unsigned*)y & ~0x80000000; + *(unsigned*)&x1 = hx1; + *(unsigned*)&y1 = hy1; + if ( hy1 > hx1 ) + { + i = hy1 - hx1; + j1 = hy1 & 0x7f800000; + if ( hx1 == 0 ) + i = 0x7f800000; + } + else + { + i = hx1 - hy1; + j1 = hx1 & 0x7f800000; + if ( hy1 == 0 ) + i = 0x7f800000; + else if ( hx1 == 0 ) + i = 0x7f800000; + } + if ( i >= 0x0c800000 || j1 >= 0x7f800000 ) + { + z1 = x1 + y1; + if ( hx1 == 0x7f800000 ) + z1 = x1; + else if ( hy1 == 0x7f800000 ) + z1 = y1; + else if ( hx1 > 0x7f800000 || hy1 > 0x7f800000 ) + z1 = *x + *y; + *z = z1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if ( --n <= 0 ) + break; + goto LOOP1; + } + pz1 = z; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + +LOOP2: + hx2 = *(unsigned*)x & ~0x80000000; + hy2 = *(unsigned*)y & ~0x80000000; + *(unsigned*)&x2 = hx2; + *(unsigned*)&y2 = hy2; + if ( hy2 > hx2 ) + { + i = hy2 - hx2; + j2 = hy2 & 0x7f800000; + if ( hx2 == 0 ) + i = 0x7f800000; + } + else + { + i = hx2 - hy2; + j2 = hx2 & 0x7f800000; + if ( hy2 == 0 ) + i = 0x7f800000; + else if ( hx2 == 0 ) + i = 0x7f800000; + } + if ( i >= 0x0c800000 || j2 >= 0x7f800000 ) + { + z2 = x2 + y2; + if ( hx2 == 0x7f800000 ) + z2 = x2; + else if ( hy2 == 0x7f800000 ) + z2 = y2; + else if ( hx2 > 0x7f800000 || hy2 > 0x7f800000 ) + z2 = *x + *y; + *z = z2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if ( --n <= 0 ) + break; + goto LOOP2; + } + pz2 = z; + + z0 = sqrt( x0 * (double)x0 + y0 * (double)y0 ); + z1 = sqrt( x1 * (double)x1 + y1 * (double)y1 ); + z2 = sqrt( x2 * (double)x2 + y2 * (double)y2 ); + *pz0 = z0; + *pz1 = z1; + *pz2 = z2; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + if ( i > 1 ) + { + z1 = sqrt( x1 * (double)x1 + y1 * (double)y1 ); + *pz1 = z1; + } + z0 = sqrt( x0 * (double)x0 + y0 * (double)y0 ); + *pz0 = z0; + } +} diff --git a/usr/src/libm/src/mvec/__vlog.c b/usr/src/libm/src/mvec/__vlog.c new file mode 100644 index 0000000..39fef50 --- /dev/null +++ b/usr/src/libm/src/mvec/__vlog.c @@ -0,0 +1,786 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vlog.c 1.5 06/01/23 SMI" + +/* + * __vlog: double precision vector log + * + * Algorithm: + * + * Write x = 2^n z where 1 - 2^-10 <= z < 2 - 2^-9. Let m = z + * rounded to nine significant bits, so m = 1 + 2^-8 k, where + * 0 <= k <= 255. Let d = z - m. Then + * + * log(x) = n log(2) + log(m) + log(1+(d/m)) + * + * Let ln2hi = log(2) rounded to a multiple of 2^-42 and ln2lo + * ~ log(2) - ln2hi. From a table, obtain mh and ml such that + * mh = log(m) rounded to a multiple of 2^-42 and ml ~ log(m) - + * mh. From the same table, obtain rh and rl such that rh = 1/m + * rounded to a multiple of 2^-10 and rl ~ 1/m - rh. For |y| <= + * 2^-9, approximate log(1+y) by a polynomial y+p(y) where p(y) + * := y*y*(-1/2+y*(P3+y*(P4+y*(P5+y*P6)))). Now letting s = + * d*rh + d*rl in double precision, we can compute the sum above + * accurately as + * + * (n*ln2hi + mh) + (d*rh + (d*rl + (n*ln2lo + ml) + p(s))) + * + * When x is subnormal, we first scale it to the normal range, + * adjusting n accordingly. + * + * Accuracy: + * + * The largest error observed is less than 0.8 ulps. + */ + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#define HIWORD 1 +#define LOWORD 0 +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#define HIWORD 0 +#define LOWORD 1 +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +static const double TBL[] = { + 1.00000000000000000000e+00, 0.00000000000000000000e+00, + 0.00000000000000000000e+00, 0.00000000000000000000e+00, + 9.96093750000000000000e-01, 1.51994163424124515728e-05, + 3.89864041562759666704e-03, 2.97263469009289512726e-14, + 9.92187500000000000000e-01, 6.05620155038759681518e-05, + 7.78214044203195953742e-03, 2.29894100462035112076e-14, + 9.88281250000000000000e-01, 1.35738416988416988208e-04, + 1.16506172200843138853e-02, -1.09039749717359319029e-13, + 9.84375000000000000000e-01, 2.40384615384615397959e-04, + 1.55041865359635266941e-02, 1.72745674997061065553e-15, + 9.80468750000000000000e-01, 3.74161877394636028203e-04, + 1.93429628432113531744e-02, -8.04185385052258635682e-14, + 9.77539062500000000000e-01, -4.39825858778625927714e-04, + 2.31670592816044518258e-02, -7.00735970431003565857e-14, + 9.73632812500000000000e-01, -2.48782081749049442231e-04, + 2.69765876983001362532e-02, -9.80605051684317662887e-14, + 9.69726562500000000000e-01, -2.95928030303030311244e-05, + 3.07716586667083902285e-02, 4.52981425779092882775e-14, + 9.65820312500000000000e-01, 2.17423349056603779517e-04, + 3.45523815067281248048e-02, -6.83913974232877736961e-14, + 9.62890625000000000000e-01, -4.84609962406015010693e-04, + 3.83188643020275776507e-02, 1.09021543022033016421e-13, + 9.58984375000000000000e-01, -1.82876872659176042957e-04, + 4.20712139207353175152e-02, -4.82631400055112824008e-14, + 9.55078125000000000000e-01, 1.45755597014925360189e-04, + 4.58095360313564015087e-02, -6.21983419947579227529e-14, + 9.52148437500000000000e-01, -4.75575046468401500289e-04, + 4.95339351223265111912e-02, -4.98803091079814255646e-14, + 9.48242187500000000000e-01, -9.40393518518518520526e-05, + 5.32445145188376045553e-02, -2.53216894311744497863e-14, + 9.44335937500000000000e-01, 3.13508994464944631443e-04, + 5.69413764001183153596e-02, 2.01093994355649575698e-14, + 9.41406250000000000000e-01, -2.29779411764705879164e-04, + 6.06246218164869787870e-02, -5.21362063913650408235e-14, + 9.37500000000000000000e-01, 2.28937728937728937530e-04, + 6.42943507054951624013e-02, -9.79051851199021608925e-14, + 9.34570312500000000000e-01, -2.63743156934306572509e-04, + 6.79506619085259444546e-02, -1.81950600301688149235e-14, + 9.30664062500000000000e-01, 2.45028409090909096626e-04, + 7.15936531869374448434e-02, 7.13730822534317801406e-14, + 9.27734375000000000000e-01, -1.98143115942028998078e-04, + 7.52234212375242350390e-02, 6.32906595872454402199e-14, + 9.23828125000000000000e-01, 3.59600631768953083074e-04, + 7.88400617077513743425e-02, 2.46501890617661192316e-14, + 9.20898437500000000000e-01, -3.51281474820143869292e-05, + 8.24436692109884461388e-02, 8.61451293608781447223e-14, + 9.17968750000000000000e-01, -4.06025985663082419983e-04, + 8.60343373417435941519e-02, 5.95592298762564263463e-14, + 9.14062500000000000000e-01, 2.23214285714285707316e-04, + 8.96121586897606903221e-02, -7.35577021943502867846e-14, + 9.11132812500000000000e-01, -1.00784030249110321056e-04, + 9.31772248541165026836e-02, 6.67870851716289831942e-14, + 9.08203125000000000000e-01, -4.01706560283687926730e-04, + 9.67296264584547316190e-02, 9.63806765855227740728e-14, + 9.04296875000000000000e-01, 2.96764575971731443208e-04, + 1.00269453163718935684e-01, -4.37863761707839790971e-14, + 9.01367187500000000000e-01, 4.12632042253521119125e-05, + 1.03796793681567578460e-01, 7.59863659719414144342e-14, + 8.98437500000000000000e-01, -1.91885964912280701945e-04, + 1.07311735789153317455e-01, -6.52667880273107116669e-14, + 8.95507812500000000000e-01, -4.02917395104895122333e-04, + 1.10814366340264314204e-01, 2.57999912830699022513e-14, + 8.91601562500000000000e-01, 3.84500217770034828473e-04, + 1.14304771280103523168e-01, -4.48895335223869926230e-14, + 8.88671875000000000000e-01, 2.17013888888888876842e-04, + 1.17783035656430001836e-01, -4.65472974759844472568e-14, + 8.85742187500000000000e-01, 7.09612889273356431397e-05, + 1.21249243632973957574e-01, -1.04272412782730081647e-13, + 8.82812500000000000000e-01, -5.38793103448275854592e-05, + 1.24703478501032805070e-01, -7.55692068745133691756e-14, + 8.79882812500000000000e-01, -1.57726589347079046649e-04, + 1.28145822691976718488e-01, -4.66803140394579609437e-14, + 8.76953125000000000000e-01, -2.40796232876712315400e-04, + 1.31576357788617315236e-01, 1.01957352237084734958e-13, + 8.74023437500000000000e-01, -3.03300981228668954746e-04, + 1.34995164537485834444e-01, 1.89961580415787680134e-14, + 8.71093750000000000000e-01, -3.45450680272108847594e-04, + 1.38402322859064952354e-01, 5.41833313790089940464e-14, + 8.68164062500000000000e-01, -3.67452330508474583805e-04, + 1.41797911860294334474e-01, -3.69845950669709681858e-14, + 8.65234375000000000000e-01, -3.69510135135135155647e-04, + 1.45182009844575077295e-01, -7.71800133682809851086e-14, + 8.62304687500000000000e-01, -3.51825547138047162871e-04, + 1.48554694323138392065e-01, -1.24915489807515996540e-15, + 8.59375000000000000000e-01, -3.14597315436241590364e-04, + 1.51916042025732167531e-01, 1.09807540998552379211e-13, + 8.56445312500000000000e-01, -2.58021530100334438914e-04, + 1.55266128911080159014e-01, 4.37925082924060541938e-14, + 8.53515625000000000000e-01, -1.82291666666666674979e-04, + 1.58605030176659056451e-01, -2.04723578004619553937e-14, + 8.50585937500000000000e-01, -8.75986295681063168849e-05, + 1.61932820269385047141e-01, -7.17939001929567730476e-14, + 8.47656250000000000000e-01, 2.58692052980132450107e-05, + 1.65249572895390883787e-01, -8.37209109923591205585e-14, + 8.44726562500000000000e-01, 1.57925948844884475120e-04, + 1.68555361029802952544e-01, 3.71439775417047191367e-15, + 8.41796875000000000000e-01, 3.08388157894736824986e-04, + 1.71850256926745714736e-01, -8.64923960721207091374e-14, + 8.38867187500000000000e-01, 4.77074795081967189831e-04, + 1.75134332127754532848e-01, 9.46151658066508147714e-14, + 8.36914062500000000000e-01, -3.12755310457516312941e-04, + 1.78407657472916980623e-01, -9.86835038673494943912e-14, + 8.33984375000000000000e-01, -1.08153501628664488934e-04, + 1.81670303107694053324e-01, -5.93750633338470149673e-14, + 8.31054687500000000000e-01, 1.14143668831168828529e-04, + 1.84922338494061477832e-01, -4.94851676612509959777e-14, + 8.28125000000000000000e-01, 3.53964401294498405386e-04, + 1.88163832418240417610e-01, -5.74307839320075599347e-14, + 8.26171875000000000000e-01, -3.65423387096774205090e-04, + 1.91394852999565046048e-01, 6.44085615069689207389e-14, + 8.23242187500000000000e-01, -9.10620980707395479654e-05, + 1.94615467699577493477e-01, 9.41653814571825038763e-14, + 8.20312500000000000000e-01, 2.00320512820512813563e-04, + 1.97825743329985925811e-01, -6.60454487708238395939e-14, + 8.18359375000000000000e-01, -4.68001198083067100272e-04, + 2.01025746060622623190e-01, -3.18818493754377370219e-14, + 8.15429687500000000000e-01, -1.43063296178343944383e-04, + 2.04215541428766300669e-01, -7.54091651195618882501e-14, + 8.12500000000000000000e-01, 1.98412698412698412526e-04, + 2.07395194345963318483e-01, 1.07268675772897325437e-13, + 8.10546875000000000000e-01, -4.20292721518987358927e-04, + 2.10564769107350002741e-01, -3.65071888317905767114e-16, + 8.07617187500000000000e-01, -4.62095820189274421015e-05, + 2.13724329397791734664e-01, -7.35958018644051430164e-14, + 8.04687500000000000000e-01, 3.43946540880503122493e-04, + 2.16873938300523150247e-01, 9.12093724991498410553e-14, + 8.02734375000000000000e-01, -2.26538009404388704197e-04, + 2.20013658305333592580e-01, -5.14966723414140783686e-14, + 7.99804687500000000000e-01, 1.95312500000000010842e-04, + 2.23143551314251453732e-01, -4.16979658452719528642e-14, + 7.97851562500000000000e-01, -3.43774338006230513552e-04, + 2.26263678650411748094e-01, 4.16412673028722634501e-14, + 7.94921875000000000000e-01, 1.09180900621118015200e-04, + 2.29374101064877322642e-01, -3.14926506519148377243e-14, + 7.92968750000000000000e-01, -3.99090557275541795833e-04, + 2.32474878743005319848e-01, 8.87450729797463158287e-14, + 7.90039062500000000000e-01, 8.43942901234567854386e-05, + 2.35566071312860003673e-01, -9.30945949519688945136e-14, + 7.88085937500000000000e-01, -3.93629807692307670790e-04, + 2.38647737850214980426e-01, -3.99705090953013414198e-14, + 7.85156250000000000000e-01, 1.19823619631901839909e-04, + 2.41719936887193398434e-01, -4.82302894299408858477e-14, + 7.83203125000000000000e-01, -3.28507262996941896190e-04, + 2.44782726417724916246e-01, -3.39998110836183310018e-14, + 7.80273437500000000000e-01, 2.14367378048780488466e-04, + 2.47836163904594286578e-01, -1.30297971733086634357e-14, + 7.78320312500000000000e-01, -2.04810980243161095543e-04, + 2.50880306285807819222e-01, 1.59736634636249040926e-15, + 7.75390625000000000000e-01, 3.66950757575757553416e-04, + 2.53915209980959843961e-01, 3.60017673263733462441e-15, + 7.73437500000000000000e-01, -2.36027190332326283783e-05, + 2.56940930897599173477e-01, -9.87480301596639169955e-14, + 7.71484375000000000000e-01, -4.00037650602409625492e-04, + 2.59957524436913445243e-01, 1.26217293988853160748e-14, + 7.68554687500000000000e-01, 2.14081268768768768606e-04, + 2.62965045500777705456e-01, 1.03646364598966627113e-13, + 7.66601562500000000000e-01, -1.34496631736526949192e-04, + 2.65963548497211377253e-01, -7.34359136986779711761e-14, + 7.64648437500000000000e-01, -4.69333022388059722691e-04, + 2.68953087345607855241e-01, -1.03896307840029875617e-13, + 7.61718750000000000000e-01, 1.86011904761904751579e-04, + 2.71933715483555715764e-01, 8.60430677280873279668e-14, + 7.59765625000000000000e-01, -1.21708086053412463954e-04, + 2.74905485872750432463e-01, 4.88167036467699861016e-14, + 7.57812500000000000000e-01, -4.16050295857988176266e-04, + 2.77868451003541849786e-01, -8.55436000656632193091e-14, + 7.54882812500000000000e-01, 2.79429387905604702334e-04, + 2.80822662900845898548e-01, 4.18860913786370112029e-14, + 7.52929687500000000000e-01, 1.14889705882352939582e-05, + 2.83768173130738432519e-01, -9.38341722366369999987e-14, + 7.50976562500000000000e-01, -2.43424670087976540225e-04, + 2.86705032803865833557e-01, 8.84810960400682115458e-14, + 7.49023437500000000000e-01, -4.85425804093567224515e-04, + 2.89633292582948342897e-01, 9.43339818951269030846e-14, + 7.46093750000000000000e-01, 2.61935131195335281235e-04, + 2.92553002686418039957e-01, -4.05999788601512838979e-14, + 7.44140625000000000000e-01, 4.54215116279069761138e-05, + 2.95464212893875810551e-01, -3.99341638438784391272e-14, + 7.42187500000000000000e-01, -1.58514492753623176778e-04, + 2.98366972551775688771e-01, 2.15926937419734905112e-14, + 7.40234375000000000000e-01, -3.49981936416184958877e-04, + 3.01261330578199704178e-01, -3.79231648020931467980e-14, + 7.37304687500000000000e-01, 4.47473883285302582568e-04, + 3.04147335467405355303e-01, -1.08638286797079129552e-13, + 7.35351562500000000000e-01, 2.80621408045976994047e-04, + 3.07025035294827830512e-01, 8.40315630479242455758e-14, + 7.33398437500000000000e-01, 1.25917800859598846179e-04, + 3.09894477722764349892e-01, 1.00337969820392140548e-13, + 7.31445312500000000000e-01, -1.67410714285714294039e-05, + 3.12755710003784770379e-01, 1.12118007403609819830e-13, + 7.29492187500000000000e-01, -1.47458155270655270810e-04, + 3.15608778986415927648e-01, -1.12592746246808286851e-13, + 7.27539062500000000000e-01, -2.66335227272727253015e-04, + 3.18453731118552241242e-01, -1.76254313121726620573e-14, + 7.25585937500000000000e-01, -3.73472910764872500361e-04, + 3.21290612453822177486e-01, -8.78854276997154463823e-14, + 7.23632812500000000000e-01, -4.68970692090395495540e-04, + 3.24119468654316733591e-01, -1.04757500587765412913e-13, + 7.20703125000000000000e-01, 4.23635563380281667846e-04, + 3.26940344995819032192e-01, 3.42884001266694615699e-14, + 7.18750000000000000000e-01, 3.51123595505617967782e-04, + 3.29753286372579168528e-01, -1.11186713895593226425e-13, + 7.16796875000000000000e-01, 2.89959733893557422817e-04, + 3.32558337300042694551e-01, 3.39068613367222871432e-14, + 7.14843750000000000000e-01, 2.40048882681564236573e-04, + 3.35355541921217081835e-01, -7.92515783138655870267e-14, + 7.12890625000000000000e-01, 2.01297005571030637044e-04, + 3.38144944008718084660e-01, -1.68695012281303904492e-15, + 7.10937500000000000000e-01, 1.73611111111111117737e-04, + 3.40926586970681455568e-01, -8.82452633212564001210e-14, + 7.08984375000000000000e-01, 1.56899238227146807121e-04, + 3.43700513853264055797e-01, 5.43888832989906475149e-14, + 7.07031250000000000000e-01, 1.51070441988950269954e-04, + 3.46466767346100823488e-01, 1.07757430375726404546e-13, + 7.05078125000000000000e-01, 1.56034779614325073201e-04, + 3.49225389785260631470e-01, 2.76727112657366262202e-14, + 7.03125000000000000000e-01, 1.71703296703296716700e-04, + 3.51976423157111639739e-01, 6.65449164332479482515e-14, + 7.01171875000000000000e-01, 1.97988013698630136838e-04, + 3.54719909102868768969e-01, 6.02593863918127820941e-14, + 6.99218750000000000000e-01, 2.34801912568306000561e-04, + 3.57455888921776931966e-01, 2.68422602858563731995e-14, + 6.97265625000000000000e-01, 2.82058923705722061539e-04, + 3.60184403574976386153e-01, 3.14101284357935074430e-14, + 6.95312500000000000000e-01, 3.39673913043478251442e-04, + 3.62905493689368086052e-01, 3.67085697163493829481e-16, + 6.93359375000000000000e-01, 4.07562669376693761502e-04, + 3.65619199561024288414e-01, -5.95770946492931122703e-14, + 6.91406250000000000000e-01, 4.85641891891891918850e-04, + 3.68325561158599157352e-01, 1.08495696229679121506e-13, + 6.90429687500000000000e-01, -4.02733322102425902751e-04, + 3.71024618127876237850e-01, -3.57393774001043846673e-15, + 6.88476562500000000000e-01, -3.04519489247311828540e-04, + 3.73716409793587445165e-01, -3.36434401382552911606e-15, + 6.86523437500000000000e-01, -1.96359752010723855866e-04, + 3.76400975164187912014e-01, 6.51539835645912724894e-14, + 6.84570312500000000000e-01, -7.83338903743315521791e-05, + 3.79078352935039220029e-01, -6.97616377035377091917e-14, + 6.82617187500000000000e-01, 4.94791666666666654379e-05, + 3.81748581490910510183e-01, -6.21703236457339082579e-14, + 6.80664062500000000000e-01, 1.87001329787234041400e-04, + 3.84411698910298582632e-01, 3.34571026954408237380e-14, + 6.78710937500000000000e-01, 3.34155338196286447704e-04, + 3.87067742968383754487e-01, 6.45334117530848658606e-14, + 6.77734375000000000000e-01, -4.85697751322751295790e-04, + 3.89716751139985717600e-01, 3.94957702521028807100e-14, + 6.75781250000000000000e-01, -3.19508575197889187636e-04, + 3.92358760602974143694e-01, -1.10271214775306207128e-13, + 6.73828125000000000000e-01, -1.43914473684210512906e-04, + 3.94993808240769794793e-01, 9.91833135258393974771e-14, + 6.71875000000000000000e-01, 4.10104986876640414256e-05, + 3.97621930647119370406e-01, 1.91186992668509687992e-14, + 6.69921875000000000000e-01, 2.35193062827225135005e-04, + 4.00243164127005002229e-01, 7.70470078193964863175e-15, + 6.67968750000000000000e-01, 4.38560704960835531785e-04, + 4.02857544701191727654e-01, -1.08212998879547184399e-13, + 6.66992187500000000000e-01, -3.25520833333333315263e-04, + 4.05465108108273852849e-01, -1.09470871366066397592e-13, + 6.65039062500000000000e-01, -1.03997564935064929046e-04, + 4.08065889808312931564e-01, -9.11831335065229488419e-14, + 6.63085937500000000000e-01, 1.26497733160621750282e-04, + 4.10659924985338875558e-01, -7.04896239210974659112e-14, + 6.61132812500000000000e-01, 3.65895510335917330171e-04, + 4.13247248550305812387e-01, -8.64814613198628863840e-14, + 6.60156250000000000000e-01, -3.62435567010309291763e-04, + 4.15827895143820569501e-01, -1.09603887929539904968e-13, + 6.58203125000000000000e-01, -1.05438624678663237367e-04, + 4.18401899138871158357e-01, 1.26591539849383157019e-14, + 6.56250000000000000000e-01, 1.60256410256410256271e-04, + 4.20969294644237379543e-01, -1.07743414616095792458e-13, + 6.54296875000000000000e-01, 4.34582800511508948911e-04, + 4.23530115505855064839e-01, -5.17691206942015446275e-14, + 6.53320312500000000000e-01, -2.59088010204081649248e-04, + 4.26084395310908803367e-01, -8.74024251107295313295e-15, + 6.51367187500000000000e-01, 3.23035941475826945284e-05, + 4.28632167389650931000e-01, 4.78292070340653116123e-14, + 6.49414062500000000000e-01, 3.32130393401015248239e-04, + 4.31173464818357388140e-01, 1.39527194700992522593e-14, + 6.48437500000000000000e-01, -3.36234177215189876300e-04, + 4.33708320421601456474e-01, -4.20630377335898599132e-14, + 6.46484375000000000000e-01, -1.97285353535353552123e-05, + 4.36236766774982243078e-01, -6.41727287881571093141e-14, + 6.44531250000000000000e-01, 3.05022040302267011258e-04, + 4.38758836207625790848e-01, 2.14689717834000941735e-15, + 6.43554687500000000000e-01, -3.38607097989949751195e-04, + 4.41274560804913562606e-01, -3.83331165923754571982e-14, + 6.41601562500000000000e-01, 2.44752506265664146815e-06, + 4.43783972410301430500e-01, -4.49328344033376536063e-16, + 6.39648437500000000000e-01, 3.51562499999999986990e-04, + 4.46287102628502907464e-01, -8.33959316905439057284e-14, + 6.38671875000000000000e-01, -2.67884975062344151547e-04, + 4.48783982827080762945e-01, -7.40524322934505657145e-14, + 6.36718750000000000000e-01, 9.71703980099502536783e-05, + 4.51274644139402880683e-01, 5.57044620824077391343e-14, + 6.34765625000000000000e-01, 4.70107009925558303777e-04, + 4.53759117467143369140e-01, -2.28624953086649163255e-14, + 6.33789062500000000000e-01, -1.25696163366336636884e-04, + 4.56237433481646803557e-01, -5.92091761359114736879e-14, + 6.31835937500000000000e-01, 2.62827932098765450035e-04, + 4.58709622626884083729e-01, 9.25811146459912121009e-14, + 6.30859375000000000000e-01, -3.17503078817733981869e-04, + 4.61175715122180918115e-01, -1.07517471912360339462e-14, + 6.28906250000000000000e-01, 8.63789926289926251633e-05, + 4.63635740963127318537e-01, -9.48054446804536471658e-14, + 6.27929687500000000000e-01, -4.78707107843137234706e-04, + 4.66089729924533457961e-01, 6.57665976858006147528e-14, + 6.25976562500000000000e-01, -5.96920843520782368088e-05, + 4.68537711563158154604e-01, 8.11157716400523519546e-14, + 6.24023437500000000000e-01, 3.66806402439024390773e-04, + 4.70979715218845740310e-01, -5.47277630185806178777e-14, + 6.23046875000000000000e-01, -1.75828771289537715006e-04, + 4.73415770016572423629e-01, 9.97077440469968501191e-14, + 6.21093750000000000000e-01, 2.65473300970873776934e-04, + 4.75845904869856894948e-01, 1.07019317621142549209e-13, + 6.20117187500000000000e-01, -2.62465950363196100312e-04, + 4.78270148481442447519e-01, 2.78328646163063623105e-14, + 6.18164062500000000000e-01, 1.93425422705314001282e-04, + 4.80688529345798087888e-01, -4.61802117788209510607e-14, + 6.17187500000000000000e-01, -3.20030120481927722077e-04, + 4.83101075751164898975e-01, -2.90762364463866399448e-14, + 6.15234375000000000000e-01, 1.50240384615384623725e-04, + 4.85507815781602403149e-01, 9.84046527823262695501e-14, + 6.14257812500000000000e-01, -3.48939598321342924619e-04, + 4.87908777319262298988e-01, -2.33257420051882497138e-14, + 6.12304687500000000000e-01, 1.35503887559808614775e-04, + 4.90303988045297955978e-01, -1.04117827384293371195e-13, + 6.11328125000000000000e-01, -3.49604713603818609800e-04, + 4.92693475442592898617e-01, -1.76429214903040463891e-14, + 6.09375000000000000000e-01, 1.48809523809523822947e-04, + 4.95077266797807169496e-01, 4.43451018828153751026e-14, + 6.08398437500000000000e-01, -3.22427998812351533642e-04, + 4.97455389202741571353e-01, 7.73708980421385689768e-14, + 6.06445312500000000000e-01, 1.89758590047393372637e-04, + 4.99827869556384030147e-01, 6.52996738757825591006e-14, + 6.05468750000000000000e-01, -2.67804373522458635890e-04, + 5.02194734566728584468e-01, -1.30901947805436250965e-14, + 6.03515625000000000000e-01, 2.57959905660377355422e-04, + 5.04556010752367001260e-01, 2.82857986090678938760e-14, + 6.02539062500000000000e-01, -1.86121323529411759412e-04, + 5.06911724444762512576e-01, 9.18415373613231066159e-14, + 6.00585937500000000000e-01, 3.53029636150234741275e-04, + 5.09261901789841431309e-01, -3.34845053941249831574e-14, + 5.99609375000000000000e-01, -7.77590749414519956471e-05, + 5.11606568749130019569e-01, -6.79410499533039142111e-14, + 5.97656250000000000000e-01, 4.74591121495327101284e-04, + 5.13945751102255599108e-01, -2.12823065872096837292e-14, + 5.96679687500000000000e-01, 5.69092365967365941461e-05, + 5.16279474448538167053e-01, -8.36708800829965016511e-14, + 5.95703125000000000000e-01, -3.54287790697674440793e-04, + 5.18607764208127264283e-01, -8.16321296891503919914e-14, + 5.93750000000000000000e-01, 2.17517401392111359854e-04, + 5.20930645624275712180e-01, -9.03997701415351032573e-14, + 5.92773437500000000000e-01, -1.80844907407407397368e-04, + 5.23248143764476481010e-01, 7.13555066011812146304e-14, + 5.90820312500000000000e-01, 4.03705975750577367080e-04, + 5.25560283522963800351e-01, -3.64289687078304118459e-14, + 5.89843750000000000000e-01, 1.80011520737327188784e-05, + 5.27867089620940532768e-01, -9.81476542529858082436e-14, + 5.88867187500000000000e-01, -3.61440373563218372236e-04, + 5.30168586609079284244e-01, 4.23335972026522927116e-14, + 5.86914062500000000000e-01, 2.41900802752293591410e-04, + 5.32464798869568767259e-01, -9.69233849737002813365e-14, + 5.85937500000000000000e-01, -1.25143020594965678717e-04, + 5.34755750616113800788e-01, -8.61253103749572066304e-14, + 5.84960937500000000000e-01, -4.86051655251141525530e-04, + 5.37041465896891168086e-01, -7.51351912898166894415e-15, + 5.83007812500000000000e-01, 1.35695472665148063720e-04, + 5.39321968595686485060e-01, -7.76104042041871663206e-14, + 5.82031250000000000000e-01, -2.13068181818181807833e-04, + 5.41597282432803694974e-01, -5.93233971574446149215e-14, + 5.80078125000000000000e-01, 4.20741213151927453007e-04, + 5.43867430967338805203e-01, -5.52875399870574035452e-14, + 5.79101562500000000000e-01, 8.39578619909502261217e-05, + 5.46132437598089381936e-01, 4.62684463909612350375e-14, + 5.78125000000000000000e-01, -2.46896162528216717505e-04, + 5.48392325565600913251e-01, -2.77505026685624314655e-14, + 5.76171875000000000000e-01, 4.04701576576576562902e-04, + 5.50647117952621556469e-01, 4.07227907088846767786e-14, + 5.75195312500000000000e-01, 8.55863764044943823575e-05, + 5.52896837686603248585e-01, 7.44889957023668801898e-14, + 5.74218750000000000000e-01, -2.27718609865470858825e-04, + 5.55141507540611200966e-01, -1.09608250460592783688e-13, + 5.72265625000000000000e-01, 4.41310123042505588354e-04, + 5.57381150134006020380e-01, 3.36669632485986549666e-16, + 5.71289062500000000000e-01, 1.39508928571428563684e-04, + 5.59615787935399566777e-01, 2.31194938380053776320e-14, + 5.70312500000000000000e-01, -1.56597995545657025672e-04, + 5.61845443262654953287e-01, 3.68646286817464054051e-14, + 5.69335937500000000000e-01, -4.47048611111111116653e-04, + 5.64070138284705535625e-01, 9.74304462767037064935e-14, + 5.67382812500000000000e-01, 2.44681956762749441229e-04, + 5.66289895023146527819e-01, -3.06552284854813270707e-14, + 5.66406250000000000000e-01, -3.45685840707964596973e-05, + 5.68504735352689749561e-01, -2.10374825114449422873e-14, + 5.65429687500000000000e-01, -3.08274696467991172252e-04, + 5.70714681003437362961e-01, 3.41818930848065350178e-14, + 5.63476562500000000000e-01, 4.00089482378854644894e-04, + 5.72919753561791367247e-01, -5.85815401264202219115e-15, + 5.62500000000000000000e-01, 1.37362637362637362518e-04, + 5.75119974471363093471e-01, 2.48469505879759890764e-14, + 5.61523437500000000000e-01, -1.19928728070175431939e-04, + 5.77315365034792193910e-01, 3.14104080050449590607e-14, + 5.60546875000000000000e-01, -3.71820295404814028101e-04, + 5.79505946414656136767e-01, -1.39129117330010386790e-14, + 5.58593750000000000000e-01, 3.58215065502183428129e-04, + 5.81691739634607074549e-01, 1.54079711890856738893e-14, + 5.57617187500000000000e-01, 1.17017293028322439969e-04, + 5.83872765580963459797e-01, 1.92193002098161738068e-14, + 5.56640625000000000000e-01, -1.18885869565217396136e-04, + 5.86049045003619539784e-01, -4.13308801481084566682e-14, + 5.55664062500000000000e-01, -3.49528877440347096866e-04, + 5.88220598517182224896e-01, -9.61818609368988642797e-14, + 5.53710937500000000000e-01, 4.01616612554112561388e-04, + 5.90387446602107957006e-01, 6.84176364159146659095e-14, + 5.52734375000000000000e-01, 1.81391738660907137675e-04, + 5.92549609606749072555e-01, -7.74738125310530505286e-14, + 5.51757812500000000000e-01, -3.36745689655172409120e-05, + 5.94707107746671681525e-01, 2.11079891578422983965e-14, + 5.50781250000000000000e-01, -2.43615591397849451990e-04, + 5.96859961107838898897e-01, -4.50623098590974831636e-14, + 5.49804687500000000000e-01, -4.48464324034334772557e-04, + 5.99008189646156097297e-01, -7.26979150253512871478e-14, + 5.47851562500000000000e-01, 3.28309020342612404610e-04, + 6.01151813189289896400e-01, 4.49397919602643900279e-14, + 5.46875000000000000000e-01, 1.33547008547008560445e-04, + 6.03290851438032404985e-01, 5.18573553063418286042e-14, + 5.45898437500000000000e-01, -5.62200159914712159731e-05, + 6.05425323966755968286e-01, -3.90788481567525388100e-14, + 5.44921875000000000000e-01, -2.41023936170212761459e-04, + 6.07555250224550036364e-01, -8.24086314983113070392e-15, + 5.43945312500000000000e-01, -4.20896364118895980992e-04, + 6.09680649536812779843e-01, 4.24936389576037736371e-14, + 5.41992187500000000000e-01, 3.80693855932203405450e-04, + 6.11801541105933210929e-01, 5.96926009653846962309e-14, + 5.41015625000000000000e-01, 2.10590644820295982628e-04, + 6.13917944012428051792e-01, -5.75595951560511011845e-14, + 5.40039062500000000000e-01, 4.53256856540084409344e-05, + 6.16029877215623855591e-01, -1.09835943254384298330e-13, + 5.39062500000000000000e-01, -1.15131578947368418456e-04, + 6.18137359555021248525e-01, 5.74853476805674446129e-14, + 5.38085937500000000000e-01, -2.70811449579831946440e-04, + 6.20240409751886545564e-01, -2.90167125533596631915e-14, + 5.37109375000000000000e-01, -4.21743972746331215531e-04, + 6.22339046408797003096e-01, -1.82614988669165533809e-14, + 5.35156250000000000000e-01, 4.08603556485355630390e-04, + 6.24433288011914555682e-01, -2.10546393306435734475e-14, + 5.34179687500000000000e-01, 2.67076591858037557577e-04, + 6.26523152931440563407e-01, -8.78036279744035513715e-14, + 5.33203125000000000000e-01, 1.30208333333333331526e-04, + 6.28608659422297932906e-01, 7.62048382318937090230e-14, + 5.32226562500000000000e-01, -2.03027546777546788817e-06, + 6.30689825626177480444e-01, 2.12246394140452907525e-14, + 5.31250000000000000000e-01, -1.29668049792531120444e-04, + 6.32766669571083184564e-01, -4.53550186996774688761e-14, + 5.30273437500000000000e-01, -2.52733566252587998902e-04, + 6.34839209172923801816e-01, 8.64101534252508178520e-14, + 5.29296875000000000000e-01, -3.71255165289256208652e-04, + 6.36907462237104482483e-01, -3.52508626243453241145e-14, + 5.28320312500000000000e-01, -4.85260953608247433411e-04, + 6.38971446457844649558e-01, 7.60718216684202016469e-14, + 5.26367187500000000000e-01, 3.81783693415637856959e-04, + 6.41031179420906482846e-01, 2.48082091251967673736e-14, + 5.25390625000000000000e-01, 2.76726129363449673514e-04, + 6.43086678603140171617e-01, -1.12856225215656411367e-13, + 5.24414062500000000000e-01, 1.76101434426229513973e-04, + 6.45137961373620782979e-01, -3.60813136042255739798e-14, + 5.23437500000000000000e-01, 7.98824130879345644567e-05, + 6.47185044995239877608e-01, 6.96725146472247760395e-14, + 5.22460937500000000000e-01, -1.19579081632653055071e-05, + 6.49227946625160257099e-01, -5.04382083563449091526e-14, + 5.21484375000000000000e-01, -9.94462830957230209915e-05, + 6.51266683315043337643e-01, -8.52342468131615437746e-14, + 5.20507812500000000000e-01, -1.82609247967479665321e-04, + 6.53301272012640765752e-01, 1.04873006903856996874e-13, + 5.19531250000000000000e-01, -2.61473123732251517670e-04, + 6.55331729563158660312e-01, -3.10282172335227455825e-14, + 5.18554687500000000000e-01, -3.36064018218623454786e-04, + 6.57358072708348117885e-01, 1.19122567102055698791e-14, + 5.17578125000000000000e-01, -4.06407828282828297722e-04, + 6.59380318089233696810e-01, -1.05870694633429062178e-13, + 5.16601562500000000000e-01, -4.72530241935483884957e-04, + 6.61398482245431296178e-01, -6.62879179039074743232e-14, + 5.14648437500000000000e-01, 4.42105759557344087183e-04, + 6.63412581616967145237e-01, 9.91058598099467920662e-14, + 5.13671875000000000000e-01, 3.84349899598393583006e-04, + 6.65422632545187298092e-01, -9.68491419671810783613e-14, + 5.12695312500000000000e-01, 3.30739604208416838882e-04, + 6.67428651271848139004e-01, 1.08050943383646665619e-13, + 5.11718750000000000000e-01, 2.81249999999999978750e-04, + 6.69430653942526987521e-01, 1.02279777907416200886e-13, + 5.10742187500000000000e-01, 2.35856412175648700539e-04, + 6.71428656605257856427e-01, 4.44668903784876907111e-14, + 5.09765625000000000000e-01, 1.94534362549800786662e-04, + 6.73422675212123067467e-01, 4.36528304869414810551e-14, + 5.08789062500000000000e-01, 1.57259567594433401650e-04, + 6.75412725620162746054e-01, 1.39850267837821649808e-14, + 5.07812500000000000000e-01, 1.24007936507936501053e-04, + 6.77398823591829568613e-01, -2.34278036379790696248e-14, + 5.06835937500000000000e-01, 9.47555693069306959140e-05, + 6.79380984795898257289e-01, -1.00907141981183426552e-13, + 5.05859375000000000000e-01, 6.94787549407114679145e-05, + 6.81359224807920327294e-01, -1.72583456150091690167e-14, + 5.04882812500000000000e-01, 4.81539694280078915244e-05, + 6.83333559111588328960e-01, 3.23592040115024425781e-14, + 5.03906250000000000000e-01, 3.07578740157480310692e-05, + 6.85304003098963221419e-01, -4.38048746232309815355e-14, + 5.02929687500000000000e-01, 1.72673133595284864178e-05, + 6.87270572070929119946e-01, 3.11475515031130920163e-14, + 5.01953125000000000000e-01, 7.65931372549019597214e-06, + 6.89233281238784911693e-01, 2.40686318405286681994e-14, + 5.00976562500000000000e-01, 1.91108121330724059841e-06, + 6.91192145724244255689e-01, -1.02296829368141946888e-13, +}; + +static const double C[] = { + 6.93147180559890330187e-01, + 5.49792301870837115524e-14, + -0.5, + 3.33333333332438282293284931714682042701467889609e-0001, + -2.49999999998669026809069285994497705748522309858e-0001, + 2.00000758613044543658508591796951886624273250472e-0001, + -1.66667492411916229281646821123333564982955309481e-0001, + 4503599627370496.0, + 0.0 +}; + +#define ln2hi C[0] +#define ln2lo C[1] +#define mhalf C[2] +#define P3 C[3] +#define P4 C[4] +#define P5 C[5] +#define P6 C[6] +#define two52 C[7] +#define zero C[8] + +#define PROCESS(N) \ + i##N = (i##N + 0x800) & ~0xfff; \ + e = (i##N & 0x7ff00000) - 0x3ff00000; \ + z##N##.i[HIWORD] -= e; \ + w##N##.i[HIWORD] = i##N - e; \ + w##N##.i[LOWORD] = 0; \ + n##N += (e >> 20); \ + i##N = (i##N >> 10) & 0x3fc; \ + d##N = z##N##.d - w##N##.d; \ + h##N = d##N * TBL[i##N]; \ + l##N = d##N * TBL[i##N+1]; \ + s##N = h##N + l##N; \ + b##N = (s##N * s##N) * (mhalf + s##N * (P3 + s##N * (P4 + \ + s##N * (P5 + s##N * P6)))); \ + *y = (n##N * ln2hi + TBL[i##N+2]) + (h##N + (l##N + \ + (n##N * ln2lo + TBL[i##N+3]) + b##N)); \ + y += stridey + +#define PREPROCESS(N, index, label) \ + i##N = HI(*x); \ + z##N##.d = *x; \ + x += stridex; \ + n##N = 0; \ + if ((i##N & 0x7ff00000) == 0x7ff00000) { /* inf or NaN */ \ + y[index] = z##N##.d * ((i##N < 0)? zero : z##N##.d); \ + goto label; \ + } else if (i##N < 0x00100000) { /* subnormal, negative, zero */ \ + if (((i##N << 1) | z##N##.i[LOWORD]) == 0) { \ + y[index] = mhalf / zero; \ + goto label; \ + } else if (i##N < 0) { \ + y[index] = zero / zero; \ + goto label; \ + } \ + z##N##.d *= two52; \ + n##N = -52; \ + i##N = z##N##.i[HIWORD]; \ + } + +void +__vlog(int n, double *restrict x, int stridex, double *restrict y, + int stridey) +{ + union { + unsigned i[2]; + double d; + } z0, z1, z2, z3, w0, w1, w2, w3; + double b0, b1, b2, b3; + double d0, d1, d2, d3; + double h0, h1, h2, h3; + double l0, l1, l2, l3; + double s0, s1, s2, s3; + int i0, i1, i2, i3, e; + int n0, n1, n2, n3; + + w0.i[LOWORD] = 0; + w1.i[LOWORD] = 0; + w2.i[LOWORD] = 0; + w3.i[LOWORD] = 0; + + y -= stridey; + + for (;;) { +begin: + y += stridey; + + if (--n < 0) + break; + + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + i0 = (i0 + 0x800) & ~0xfff; + e = (i0 & 0x7ff00000) - 0x3ff00000; + z0.i[HIWORD] -= e; + w0.i[HIWORD] = i0 - e; + n0 += (e >> 20); + i0 = (i0 >> 10) & 0x3fc; + + i1 = (i1 + 0x800) & ~0xfff; + e = (i1 & 0x7ff00000) - 0x3ff00000; + z1.i[HIWORD] -= e; + w1.i[HIWORD] = i1 - e; + n1 += (e >> 20); + i1 = (i1 >> 10) & 0x3fc; + + i2 = (i2 + 0x800) & ~0xfff; + e = (i2 & 0x7ff00000) - 0x3ff00000; + z2.i[HIWORD] -= e; + w2.i[HIWORD] = i2 - e; + n2 += (e >> 20); + i2 = (i2 >> 10) & 0x3fc; + + i3 = (i3 + 0x800) & ~0xfff; + e = (i3 & 0x7ff00000) - 0x3ff00000; + z3.i[HIWORD] -= e; + w3.i[HIWORD] = i3 - e; + n3 += (e >> 20); + i3 = (i3 >> 10) & 0x3fc; + + d0 = z0.d - w0.d; + d1 = z1.d - w1.d; + d2 = z2.d - w2.d; + d3 = z3.d - w3.d; + + h0 = d0 * TBL[i0]; + h1 = d1 * TBL[i1]; + h2 = d2 * TBL[i2]; + h3 = d3 * TBL[i3]; + + l0 = d0 * TBL[i0+1]; + l1 = d1 * TBL[i1+1]; + l2 = d2 * TBL[i2+1]; + l3 = d3 * TBL[i3+1]; + + s0 = h0 + l0; + s1 = h1 + l1; + s2 = h2 + l2; + s3 = h3 + l3; + + b0 = (s0 * s0) * (mhalf + s0 * (P3 + s0 * (P4 + + s0 * (P5 + s0 * P6)))); + b1 = (s1 * s1) * (mhalf + s1 * (P3 + s1 * (P4 + + s1 * (P5 + s1 * P6)))); + b2 = (s2 * s2) * (mhalf + s2 * (P3 + s2 * (P4 + + s2 * (P5 + s2 * P6)))); + b3 = (s3 * s3) * (mhalf + s3 * (P3 + s3 * (P4 + + s3 * (P5 + s3 * P6)))); + + *y = (n0 * ln2hi + TBL[i0+2]) + (h0 + (l0 + + (n0 * ln2lo + TBL[i0+3]) + b0)); + y += stridey; + *y = (n1 * ln2hi + TBL[i1+2]) + (h1 + (l1 + + (n1 * ln2lo + TBL[i1+3]) + b1)); + y += stridey; + *y = (n2 * ln2hi + TBL[i2+2]) + (h2 + (l2 + + (n2 * ln2lo + TBL[i2+3]) + b2)); + y += stridey; + *y = (n3 * ln2hi + TBL[i3+2]) + (h3 + (l3 + + (n3 * ln2lo + TBL[i3+3]) + b3)); + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/libm/src/mvec/__vlogf.c b/usr/src/libm/src/mvec/__vlogf.c new file mode 100644 index 0000000..abafac9 --- /dev/null +++ b/usr/src/libm/src/mvec/__vlogf.c @@ -0,0 +1,261 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vlogf.c 1.7 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float logf(float x) + * + * Method : + * 1. Special cases: + * for x is negative, -Inf => QNaN + invalid; + * for x = 0 => -Inf + divide-by-zero; + * for x = +Inf => Inf; + * for x = NaN => QNaN. + * 2. Computes logarithm from: + * x = m * 2**n => log(x) = n * log(2) + log(m), + * m = [1, 2). + * Let m = m0 + dm, where m0 = 1 + k / 32, + * k = [0, 32], + * dm = [-1/64, 1/64]. + * Then log(m) = log(m0 + dm) = log(m0) + log(1+y), + * where y = dm*(1/m0), y = [-1/66, 1/64]. + * Then + * 1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32); + * log(m0) is looked up in a table of log(1), log(1+1/32), + * ..., log(1+32/32). + * log(1+y) is computed using approximation: + * log(1+y) = ((a3*y + a2)*y + a1)*y*y + y. + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-28.41). All calculations are of + * double precision. + * Maximum error observed: less than 0.545 ulp for the + * whole float type range. + */ + +static const double __TBL_logf[] = { + /* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */ + /* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */ +0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02, +1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07, +8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01, +1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07, +1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01, +9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08, +2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01, +9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08, +3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01, +8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08, +3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01, +7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08, +4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01, +7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08, +5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01, +7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08, +5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01, +6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08, +6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01, +6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08, +6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01, +6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08 +}; + +static const double + K3 = -2.49887584306188944706e-01, + K2 = 3.33368809981254554946e-01, + K1 = -5.00000008402474976565e-01; + +static const union { + int i; + float f; +} inf = { 0x7f800000 }; + +#define INF inf.f + +#define PROCESS(N) \ + iy##N = ival##N & 0x007fffff; \ + ival##N = (iy##N + 0x20000) & 0xfffc0000; \ + i##N = ival##N >> 17; \ + iy##N = iy##N - ival##N; \ + ty##N = LN2 * (double) exp##N + __TBL_logf[i##N]; \ + yy##N = (double) iy##N * __TBL_logf[i##N + 1]; \ + yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N; \ + y[0] = (float)(yy##N + ty##N); \ + y += stridey; + +#define PREPROCESS(N, index, label) \ + ival##N = *(int*)x; \ + value = x[0]; \ + x += stridex; \ + exp##N = (ival##N >> 23) - 127; \ + if ( (ival##N & 0x7fffffff) >= 0x7f800000 ) /* X = NaN or Inf */ \ + { \ + y[index] = value + INF; \ + goto label; \ + } \ + if ( ival##N < 0x00800000 ) \ + { \ + if ( ival##N > 0 ) /* X = denormal */ \ + { \ + value = (float) ival##N; \ + ival##N = *(int*) &value; \ + exp##N = (ival##N >> 23) - (127 + 149); \ + } \ + else \ + { \ + value = 0.0f; \ + y[index] = ((ival##N & 0x7fffffff) == 0) ? \ + -1.0f / value : value / value; \ + goto label; \ + } \ + } + +void +__vlogf( int n, float * restrict x, int stridex, float * restrict y, + int stridey ) +{ + double LN2 = __TBL_logf[64]; /* log(2) = 0.6931471805599453094 */ + double yy0, yy1, yy2, yy3, yy4; + double ty0, ty1, ty2, ty3, ty4; + float value; + int i0, i1, i2, i3, i4; + int ival0, ival1, ival2, ival3, ival4; + int exp0, exp1, exp2, exp3, exp4; + int iy0, iy1, iy2, iy3, iy4; + + y -= stridey; + + for ( ; ; ) + { +begin: + y += stridey; + + if ( --n < 0 ) + break; + + PREPROCESS(0, 0, begin) + + if ( --n < 0 ) + goto process1; + + PREPROCESS(1, stridey, process1) + + if ( --n < 0 ) + goto process2; + + PREPROCESS(2, (stridey << 1), process2) + + if ( --n < 0 ) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3) + + if ( --n < 0 ) + goto process4; + + PREPROCESS(4, (stridey << 2), process4) + + iy0 = ival0 & 0x007fffff; + iy1 = ival1 & 0x007fffff; + iy2 = ival2 & 0x007fffff; + iy3 = ival3 & 0x007fffff; + iy4 = ival4 & 0x007fffff; + + ival0 = (iy0 + 0x20000) & 0xfffc0000; + ival1 = (iy1 + 0x20000) & 0xfffc0000; + ival2 = (iy2 + 0x20000) & 0xfffc0000; + ival3 = (iy3 + 0x20000) & 0xfffc0000; + ival4 = (iy4 + 0x20000) & 0xfffc0000; + + i0 = ival0 >> 17; + i1 = ival1 >> 17; + i2 = ival2 >> 17; + i3 = ival3 >> 17; + i4 = ival4 >> 17; + + iy0 = iy0 - ival0; + iy1 = iy1 - ival1; + iy2 = iy2 - ival2; + iy3 = iy3 - ival3; + iy4 = iy4 - ival4; + + ty0 = LN2 * (double) exp0 + __TBL_logf[i0]; + ty1 = LN2 * (double) exp1 + __TBL_logf[i1]; + ty2 = LN2 * (double) exp2 + __TBL_logf[i2]; + ty3 = LN2 * (double) exp3 + __TBL_logf[i3]; + ty4 = LN2 * (double) exp4 + __TBL_logf[i4]; + + yy0 = (double) iy0 * __TBL_logf[i0 + 1]; + yy1 = (double) iy1 * __TBL_logf[i1 + 1]; + yy2 = (double) iy2 * __TBL_logf[i2 + 1]; + yy3 = (double) iy3 * __TBL_logf[i3 + 1]; + yy4 = (double) iy4 * __TBL_logf[i4 + 1]; + + yy0 = ((K3 * yy0 + K2) * yy0 + K1) * yy0 * yy0 + yy0; + yy1 = ((K3 * yy1 + K2) * yy1 + K1) * yy1 * yy1 + yy1; + yy2 = ((K3 * yy2 + K2) * yy2 + K1) * yy2 * yy2 + yy2; + yy3 = ((K3 * yy3 + K2) * yy3 + K1) * yy3 * yy3 + yy3; + yy4 = ((K3 * yy4 + K2) * yy4 + K1) * yy4 * yy4 + yy4; + + y[0] = (float)(yy0 + ty0); + y += stridey; + y[0] = (float)(yy1 + ty1); + y += stridey; + y[0] = (float)(yy2 + ty2); + y += stridey; + y[0] = (float)(yy3 + ty3); + y += stridey; + y[0] = (float)(yy4 + ty4); + continue; + +process1: + PROCESS(0) + continue; + +process2: + PROCESS(0) + PROCESS(1) + continue; + +process3: + PROCESS(0) + PROCESS(1) + PROCESS(2) + continue; + +process4: + PROCESS(0) + PROCESS(1) + PROCESS(2) + PROCESS(3) + } +} diff --git a/usr/src/libm/src/mvec/__vpow.c b/usr/src/libm/src/mvec/__vpow.c new file mode 100644 index 0000000..fa29d31 --- /dev/null +++ b/usr/src/libm/src/mvec/__vpow.c @@ -0,0 +1,1390 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vpow.c 1.13 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double pow(double x, double y) + * + * Method : + * 1. Special cases: + * for (anything) ** 0 => 1 + * for (anything) ** NaN => QNaN + invalid + * for NaN ** (anything) => QNaN + invalid + * for +-1 ** +-Inf => QNaN + invalid + * for +-(|x| < 1) ** +Inf => +0 + * for +-(|x| < 1) ** -Inf => +Inf + * for +-(|x| > 1) ** +Inf => +Inf + * for +-(|x| > 1) ** -Inf => +0 + * for +Inf ** (negative) => +0 + * for +Inf ** (positive) => +Inf + * for -Inf ** (negative except odd integer) => +0 + * for -Inf ** (negative odd integer) => -0 + * for -Inf ** (positive except odd integer) => +Inf + * for -Inf ** (positive odd integer) => -Inf + * for (negative) ** (non-integer) => QNaN + invalid + * for +0 ** (negative) => +Inf + overflow + * for +0 ** (positive) => +0 + * for -0 ** (negative except odd integer) => +Inf + overflow + * for -0 ** (negative odd integer) => -Inf + overflow + * for -0 ** (positive except odd integer) => +0 + * for -0 ** (positive odd integer) => -0 + * 2. Computes x**y from: + * x**y = 2**(y*log2(x)) = 2**(w/256), where w = 256*log2(|x|)*y. + * 3. Computes w = 256*log2(|x|)*y from + * |x| = m * 2**n => log2(|x|) = n + log2(m). + * Let m = m0 + dm, where m0 = 1 + k / 256, + * k = [0, 255], + * dm = [-1/512, 1/512]. + * Then 256*log2(m) = 256*log2(m0 + dm) = 256*log2(m0) + 256*log2((1+z)/(1-z)), + * where z = (m-m0)/(m+m0), z = [-1/1025, 1/1025]. + * Then + * 256*log2(m0) is looked up in a table of 256*log2(1), 256*log2(1+1/128), + * ..., 256*log2(1+128/128). + * 256*log2((1+z)/(1-z)) is computed using + * approximation: 256*log2((1+z)/(1-z)) = a0 * z + a1 * z**3 + a1 * z**5. + * Perform w = 256*log2(|x|)*y = w1 + w2 by simulating muti-precision arithmetic. + * 3. For w >= 262144 + * then for (negative) ** (odd integer) => -Inf + overflow + * else => +Inf + overflow + * For w <= -275200 + * then for (negative) ** (odd integer) => -0 + underflow + * else => +0 + underflow + * 4. Computes 2 ** (w/256) from: + * 2 ** (w/256) = 2**a * 2**(k/256) * 2**(r/256) + * Where: + * a = int ( w ) >> 8; + * k = int ( w ) & 0xFF; + * r = frac ( w ). + * Note that: + * k = 0, 1, ..., 255; + * r = (-1, 1). + * Then: + * 2**(k/256) is looked up in a table of 2**0, 2**1/256, ... + * 2**(r/256) is computed using approximation: + * 2**(r/256) = ((((b5 * r + b4) * r + b3) * r + b2) * r + b1) * r + b0 + * Multiplication by 2**a is done by adding "a" to + * the biased exponent. + * Perform 2 ** (w/256) by simulating muti-precision arithmetic. + * 5. For (negative) ** (odd integer) => -(2**(w/256)) + * otherwise => 2**(w/256) + * + * Accuracy: + * Max. relative aproximation error < 2**(-67.94) for 256*log2((1+z)/(1-z)). + * Max. relative aproximation error < 2**(-63.15) for 2**(r/256). + * Maximum error observed: less than 0.761 ulp after 1.300.000.000 + * results. + */ + +static void +__vpowx( int n, double * restrict px, double * restrict py, + int stridey, double * restrict pz, int stridez ); + +static const double __TBL_exp2[] = { + /* __TBL_exp2[2*i] = high order bits 2^(i/256), i = [0, 255] */ + /* __TBL_exp2[2*i+1] = least bits 2^(i/256), i = [0, 255] */ + 1.000000000000000000e+00, 0.000000000000000000e+00, 1.002711275050202522e+00, +-3.636615928692263944e-17, 1.005429901112802726e+00, 9.499186535455031757e-17, + 1.008155898118417548e+00,-3.252058756084308061e-17, 1.010889286051700475e+00, +-1.523477860336857718e-17, 1.013630084951489430e+00, 9.283599768183567587e-18, + 1.016378314910953096e+00,-5.772170073199660028e-17, 1.019133996077737914e+00, + 3.601904982259661106e-17, 1.021897148654116627e+00, 5.109225028973443894e-17, + 1.024667792897135721e+00,-7.561607868487779440e-17, 1.027445949118763746e+00, +-4.956074174645370440e-17, 1.030231637686040980e+00, 3.319830041080812944e-17, + 1.033024879021228415e+00, 7.600838874027088489e-18, 1.035825693601957198e+00, +-7.806782391337636167e-17, 1.038634101961378731e+00, 5.996273788852510618e-17, + 1.041450124688316103e+00, 3.784830480287576210e-17, 1.044273782427413755e+00, + 8.551889705537964892e-17, 1.047105095879289793e+00, 7.277077243104314749e-17, + 1.049944085800687210e+00, 5.592937848127002586e-17, 1.052790773004626423e+00, +-9.629482899026935739e-17, 1.055645178360557157e+00, 1.759325738772091599e-18, + 1.058507322794512762e+00,-7.152651856637780738e-17, 1.061377227289262093e+00, +-1.197353708536565756e-17, 1.064254912884464499e+00, 5.078754198611230394e-17, + 1.067140400676823697e+00,-7.899853966841582122e-17, 1.070033711820241873e+00, +-9.937162711288919381e-17, 1.072934867525975555e+00,-3.839668843358823807e-18, + 1.075843889062791048e+00,-1.000271615114413611e-17, 1.078760797757119860e+00, +-6.656660436056592603e-17, 1.081685614993215250e+00,-4.782623902997086266e-17, + 1.084618362213309206e+00, 3.166152845816346116e-17, 1.087559060917769660e+00, + 5.409349307820290759e-18, 1.090507732665257690e+00,-3.046782079812471147e-17, + 1.093464399072885840e+00, 1.441395814726920934e-17, 1.096429081816376883e+00, +-5.919933484449315824e-17, 1.099401802630221914e+00, 7.170459599701923225e-17, + 1.102382583307840891e+00, 5.266036871570694387e-17, 1.105371445701741173e+00, + 8.239288760500213590e-17, 1.108368411723678726e+00,-8.786813845180526616e-17, + 1.111373503344817548e+00, 5.563945026669697643e-17, 1.114386742595892432e+00, + 1.041027845684557095e-16, 1.117408151567369279e+00,-7.976805902628220456e-17, + 1.120437752409606746e+00,-6.201085906554178750e-17, 1.123475567333019898e+00, +-9.699737588987042995e-17, 1.126521618608241848e+00, 5.165856758795456737e-17, + 1.129575928566288079e+00, 6.712805858726256588e-17, 1.132638519598719196e+00, + 3.237356166738000264e-17, 1.135709414157805464e+00, 5.066599926126155859e-17, + 1.138788634756691565e+00, 8.912812676025407778e-17, 1.141876203969561576e+00, + 4.651091177531412387e-17, 1.144972144431804173e+00, 4.641289892170010657e-17, + 1.148076478840178938e+00, 6.897740236627191770e-17, 1.151189229952982673e+00, + 3.250710218863827212e-17, 1.154310420590215935e+00, 1.041712894627326619e-16, + 1.157440073633751121e+00,-9.123871231134400287e-17, 1.160578212027498779e+00, +-3.261040205417393722e-17, 1.163724858777577476e+00, 3.829204836924093499e-17, + 1.166880036952481658e+00,-8.791879579999169742e-17, 1.170043769683250190e+00, +-1.847744201790004694e-18, 1.173216080163637320e+00,-7.287562586584994479e-17, + 1.176396991650281221e+00, 5.554203254218078963e-17, 1.179586527462875845e+00, + 1.009231277510039044e-16, 1.182784710984341014e+00, 1.542975430079076058e-17, + 1.185991565660993841e+00,-9.209506835293105905e-18, 1.189207115002721027e+00, + 3.982015231465646111e-17, 1.192431382583151178e+00, 4.397551415609721443e-17, + 1.195664392039827328e+00, 4.616603670481481397e-17, 1.198906167074380580e+00, +-9.809193356008423118e-17, 1.202156731452703076e+00, 6.644981499252301245e-17, + 1.205416109005123859e+00,-3.357272193267529634e-17, 1.208684323626581625e+00, +-4.746725945228984097e-17, 1.211961399276801243e+00,-4.890611077521118357e-17, + 1.215247359980468955e+00,-7.712630692681488131e-17, 1.218542229827408452e+00, +-9.006726958363837675e-17, 1.221846032972757623e+00,-1.061102121140269116e-16, + 1.225158793637145527e+00,-8.903533814269983429e-17, 1.228480536106870025e+00, +-1.898781631302529953e-17, 1.231811284734075862e+00, 7.389382471610050247e-17, + 1.235151063936933413e+00,-1.075524434430784138e-16, 1.238499898199816540e+00, + 2.767702055573967430e-17, 1.241857812073484002e+00, 4.658027591836936791e-17, + 1.245224830175257980e+00,-4.677240449846727500e-17, 1.248600977189204819e+00, +-8.261810999021963550e-17, 1.251986277866316222e+00, 4.834167152469897600e-17, + 1.255380757024691096e+00,-6.711389821296878419e-18, 1.258784439549716527e+00, +-8.421782587730599357e-17, 1.262197350394250739e+00,-3.084464887473846465e-17, + 1.265619514578806282e+00, 4.250577003450868637e-17, 1.269050957191733220e+00, + 2.667932131342186095e-18, 1.272491703389402762e+00,-1.057791626721242103e-17, + 1.275941778396392001e+00, 9.915430244214290330e-17, 1.279401207505669325e+00, +-9.759095008356062210e-17, 1.282870016078778264e+00, 1.713594918243560968e-17, + 1.286348229546025568e+00,-3.416955706936181976e-17, 1.289835873406665723e+00, + 8.949257530897591722e-17, 1.293332973229089466e+00,-2.974590443132751646e-17, + 1.296839554651009641e+00, 2.538250279488831496e-17, 1.300355643379650594e+00, + 5.678728102802217422e-17, 1.303881265191935812e+00, 8.647675598267871179e-17, + 1.307416445934677318e+00,-7.336645652878868892e-17, 1.310961211524764414e+00, +-7.181536135519453857e-17, 1.314515587949354636e+00, 2.267543315104585645e-17, + 1.318079601266064049e+00,-5.457955827149153502e-17, 1.321653277603157539e+00, +-2.480638245913021742e-17, 1.325236643159741323e+00,-2.858731210038861373e-17, + 1.328829724205954355e+00, 4.089086223910160052e-17, 1.332432547083161500e+00, +-5.101586630916743959e-17, 1.336045138204145832e+00,-5.891866356388801353e-17, + 1.339667524053302916e+00, 8.927282594831731984e-17, 1.343299731186835322e+00, +-5.802580890201437751e-17, 1.346941786232945804e+00, 3.224065101254679169e-17, + 1.350593715892034474e+00,-8.287110381462416533e-17, 1.354255546936892651e+00, + 7.700948379802989462e-17, 1.357927306212901142e+00,-9.529635744825188867e-17, + 1.361609020638224754e+00, 1.533787661270668046e-18, 1.365300717204011915e+00, +-1.000536312597476517e-16, 1.369002422974590516e+00, 9.593797919118848773e-17, + 1.372714165087668414e+00,-4.495960595234841262e-17, 1.376435970754530169e+00, +-6.898588935871801042e-17, 1.380167867260237990e+00, 1.051031457996998395e-16, + 1.383909881963832023e+00,-6.770511658794786287e-17, 1.387662042298529075e+00, + 8.422984274875415318e-17, 1.391424375771926236e+00,-4.906174865288989325e-17, + 1.395196909966200272e+00,-9.329336224225496552e-17, 1.398979672538311236e+00, +-9.614213209051323072e-17, 1.402772691220204759e+00,-5.295783249407989223e-17, + 1.406575993819015435e+00, 7.034914812136422188e-18, 1.410389608217270663e+00, + 4.166548728435062259e-17, 1.414213562373095145e+00,-9.667293313452913451e-17, + 1.418047884320415175e+00, 2.274438542185529452e-17, 1.421892602169165576e+00, +-1.607782891589024413e-17, 1.425747744105494208e+00, 9.880690758500607284e-17, + 1.429613338391970023e+00,-1.203164248905365518e-17, 1.433489413367788901e+00, +-5.802454243926826103e-17, 1.437375997448982368e+00,-4.204034016467556612e-17, + 1.441273119128625657e+00, 5.602503650878985675e-18, 1.445180806977046650e+00, +-3.023758134993987319e-17, 1.449099089642035043e+00,-6.259405000819309254e-17, + 1.453027995849052623e+00,-5.779948609396106102e-17, 1.456967554401443765e+00, + 5.648679453876998140e-17, 1.460917794180647045e+00,-5.600377186075215800e-17, + 1.464878744146405731e+00, 9.530767543587157319e-17, 1.468850433336981842e+00, + 8.465882756533627608e-17, 1.472832890869367528e+00, 6.691774081940589372e-17, + 1.476826145939499346e+00,-3.483994556892795796e-17, 1.480830227822471867e+00, +-9.686952102630618578e-17, 1.484845165872752393e+00, 1.078008676440748076e-16, + 1.488870989524397004e+00, 6.155367157742871330e-17, 1.492907728291264835e+00, + 1.419292015428403577e-17, 1.496955411767235455e+00,-2.861663253899158211e-17, + 1.501014069626425584e+00,-6.413767275790235039e-17, 1.505083731623406473e+00, + 7.074710613582846364e-17, 1.509164427593422841e+00,-1.016455327754295039e-16, + 1.513256187452609813e+00, 8.884497851338712091e-17, 1.517359041198214742e+00, +-4.308699472043340801e-17, 1.521473018908814590e+00,-5.996387675945683420e-18, + 1.525598150744538417e+00,-1.102494171234256094e-16, 1.529734466947286986e+00, + 3.785792115157219653e-17, 1.533881997840955913e+00, 8.875226844438446141e-17, + 1.538040773831656827e+00, 1.017467235116135806e-16, 1.542210825407940744e+00, + 7.949834809697620856e-17, 1.546392183141021448e+00, 1.068396000565721980e-16, + 1.550584877684999974e+00,-1.460070659068938518e-17, 1.554788939777088652e+00, +-8.003161350116035641e-17, 1.559004400237836929e+00, 3.781207053357527502e-17, + 1.563231289971357629e+00, 7.484777645590734389e-17, 1.567469639965552997e+00, +-1.035206176884972199e-16, 1.571719481292341403e+00,-3.342984004687200069e-17, + 1.575980845107886497e+00,-1.013691647127830398e-17, 1.580253762652824578e+00, +-5.163402929554468062e-17, 1.584538265252493749e+00,-1.933771703458570293e-17, + 1.588834384317163950e+00,-5.994950118824479401e-18, 1.593142151342266999e+00, +-1.009440654231196372e-16, 1.597461597908627073e+00, 2.486839279622099613e-17, + 1.601792755682693414e+00,-6.054917453527784343e-17, 1.606135656416771029e+00, +-1.035454528805999526e-16, 1.610490331949254283e+00, 2.470719256979788785e-17, + 1.614856814204860713e+00,-7.316663399125123263e-17, 1.619235135194863728e+00, + 2.094133415422909241e-17, 1.623625327017328868e+00,-3.584512851414474710e-17, + 1.628027421857347834e+00,-6.712955084707084086e-17, 1.632441451987274972e+00, + 9.852819230429992964e-17, 1.636867449766964411e+00, 7.698325071319875575e-17, + 1.641305447644006321e+00,-9.247568737640705508e-17, 1.645755478153964946e+00, +-1.012567991367477260e-16, 1.650217573920617742e+00, 9.133279588729904190e-18, + 1.654691767656194301e+00, 9.643294303196028661e-17, 1.659178092161616158e+00, +-7.275545550823050654e-17, 1.663676580326736376e+00, 5.890992696713099670e-17, + 1.668187265130582464e+00, 4.269178019570615091e-17, 1.672710179641596628e+00, +-5.476715964599563076e-17, 1.677245357017878469e+00, 8.303949509950732785e-17, + 1.681792830507429004e+00, 8.199010020581496520e-17, 1.686352633448393368e+00, +-7.181463278358010675e-17, 1.690924799269305279e+00,-9.669671474394880166e-17, + 1.695509361489332623e+00, 7.238416872845166641e-17, 1.700106353718523478e+00, +-8.023719370397700246e-18, 1.704715809658051251e+00,-2.728883284797281563e-17, + 1.709337763100462926e+00,-9.868779456632931076e-17, 1.713972247929925974e+00, + 6.473975107753367064e-17, 1.718619298122477934e+00,-1.851380418263110988e-17, + 1.723278947746273992e+00,-9.522123800393799963e-17, 1.727951230961837670e+00, +-1.075098186120464245e-16, 1.732636182022311067e+00,-1.698051074315415494e-18, + 1.737333835273706217e+00, 3.164389299292956947e-17, 1.742044225155156445e+00, +-1.525959118950788792e-18, 1.746767386199169048e+00,-1.075229048350751450e-16, + 1.751503353031878207e+00,-5.124450420596724659e-17, 1.756252160373299454e+00, + 2.960140695448873307e-17, 1.761013843037583904e+00,-7.943253125039227711e-17, + 1.765788435933272726e+00, 9.461315018083267867e-17, 1.770575974063554714e+00, + 5.961794510040555848e-17, 1.775376492526521188e+00, 6.429731796556572034e-17, + 1.780190026515424462e+00,-5.284627289091617365e-17, 1.785016611318934965e+00, + 1.533040012103131382e-17, 1.789856282321401038e+00,-4.154354660683350387e-17, + 1.794709075003107168e+00, 1.822745842791208677e-17, 1.799575024940535117e+00, +-2.526889233358897644e-17, 1.804454167806623932e+00,-5.177222408793317883e-17, + 1.809346539371031959e+00,-9.032641402450029682e-17, 1.814252175500398856e+00, +-9.969531538920348820e-17, 1.819171112158608494e+00, 7.402676901145838890e-17, + 1.824103385407053413e+00,-1.015962786227708306e-16, 1.829049031404897274e+00, + 6.889192908835695637e-17, 1.834008086409342431e+00, 3.283107224245627204e-17, + 1.838980586775893711e+00, 6.918969740272511942e-18, 1.843966568958625984e+00, +-5.939742026949964550e-17, 1.848966069510450838e+00, 9.027580446261089288e-17, + 1.853979125083385471e+00, 9.761887490727593538e-17, 1.859005772428820480e+00, +-9.528705461989940687e-17, 1.864046048397788979e+00, 6.540912680620571711e-17, + 1.869099989941238604e+00,-9.938505214255067083e-17, 1.874167634110299963e+00, +-6.122763413004142562e-17, 1.879249018056560194e+00,-1.622631555783584478e-17, + 1.884344179032334532e+00,-8.226593125533710906e-17, 1.889453154390939194e+00, +-9.005168285059126718e-17, 1.894575981586965607e+00, 3.403403535216529671e-17, + 1.899712698176555303e+00,-3.859739769378514323e-17, 1.904863341817674138e+00, + 6.533857514718278629e-17, 1.910027950270389852e+00,-5.909688006744060237e-17, + 1.915206561397147400e+00,-1.061994605619596264e-16, 1.920399213163047403e+00, + 7.116681540630314186e-17, 1.925605943636125028e+00,-9.914963769693740927e-17, + 1.930826790987627106e+00, 6.167149706169109553e-17, 1.936061793492294347e+00, + 1.033238596067632574e-16, 1.941310989528640452e+00,-6.638029891621487990e-17, + 1.946574417579233218e+00, 6.811022349533877184e-17, 1.951852116230978318e+00, +-2.199016969979351086e-17, 1.957144124175400179e+00, 8.960767791036667768e-17, + 1.962450480208927317e+00, 1.097684400091354695e-16, 1.967771223233175881e+00, +-1.031492801153113151e-16, 1.973106392255234320e+00,-7.451617863956037486e-18, + 1.978456026387950928e+00, 4.038875310927816657e-17, 1.983820164850219392e+00, +-2.203454412391062657e-17, 1.989198846967266343e+00, 8.205132638369199416e-18, + 1.994592112170940235e+00, 1.790971035200264509e-17 +}; + +static const double __TBL_log2[] = { + /* __TBL_log2[2*i] = high order rounded 32 bits log2(1+i/256)*256, i = [0, 255] */ + /* __TBL_log2[2*i+1] = low order least bits log2(1+i/256)*256, i = [0, 255] */ + 0.000000000000000000e+00, 0.000000000000000000e+00, 1.439884185791015625e+00, + 4.078417797464839152e-07, 2.874177932739257812e+00,-5.443862030060025621e-07, + 4.302921295166015625e+00, 3.525917800357419922e-07, 5.726161956787109375e+00, +-1.821502755258614180e-06, 7.143936157226562500e+00,-1.035336134691423741e-06, + 8.556289672851562500e+00,-1.279264291071495652e-06, 9.963264465332031250e+00, +-3.206502629414843101e-06, 1.136489105224609375e+01, 3.503517986289194222e-06, + 1.276123046875000000e+01,-1.809406249049319022e-06, 1.415230560302734375e+01, +-2.114722805833714926e-06, 1.553816223144531250e+01,-3.719431504776986979e-06, + 1.691883850097656250e+01,-5.743786819670105240e-06, 1.829435729980468750e+01, + 7.514691093524705578e-06, 1.966479492187500000e+01,-2.076862291588726520e-06, + 2.103015136718750000e+01, 3.219403619538604258e-06, 2.239048767089843750e+01, +-3.108115489869591032e-07, 2.374583435058593750e+01,-6.275103710481114264e-06, + 2.509620666503906250e+01, 6.572855776743687178e-06, 2.644168090820312500e+01, +-1.954725505303359537e-06, 2.778225708007812500e+01, 3.855133152759458770e-06, + 2.911799621582031250e+01,-1.707228100041815487e-06, 3.044891357421875000e+01, + 1.042999152333371737e-06, 3.177505493164062500e+01, 8.966313933586820042e-07, + 3.309646606445312500e+01,-1.372654171244005427e-05, 3.441314697265625000e+01, +-8.996099168734074844e-06, 3.572515869140625000e+01,-1.247731510027211536e-05, + 3.703250122070312500e+01, 8.944258749129049106e-06, 3.833526611328125000e+01, +-3.520082642279872716e-06, 3.963342285156250000e+01, 1.306577612991810031e-05, + 4.092706298828125000e+01,-7.730135593513790229e-07, 4.221618652343750000e+01, +-1.329446142304436745e-05, 4.350079345703125000e+01, 6.912200714904314733e-06, + 4.478097534179687500e+01,-6.216230979739182064e-07, 4.605673217773437500e+01, +-5.133911151040936670e-06, 4.732809448242187500e+01,-6.697901206512330627e-06, + 4.859509277343750000e+01,-5.700153089154811841e-06, 4.985775756835937500e+01, +-2.836263919120346801e-06, 5.111611938476562500e+01, 8.933436604624454391e-07, + 5.237020874023437500e+01, 4.187561748309498307e-06, 5.362005615234375000e+01, + 5.448667394155597532e-06, 5.486569213867187500e+01, 2.786324169943508531e-06, + 5.610714721679687500e+01,-5.978483512667373796e-06, 5.734442138671875000e+01, + 7.207996138368885843e-06, 5.857757568359375000e+01, 9.083351754561760127e-06, + 5.980664062500000000e+01,-3.374516276140515786e-06, 6.103161621093750000e+01, +-2.943717299925017200e-06, 6.225253295898437500e+01, 6.810091060168101732e-06, + 6.346945190429687500e+01,-8.462738988588859704e-06, 6.468237304687500000e+01, +-2.233961135216831566e-05, 6.589129638671875000e+01,-8.657399896582645111e-06, + 6.709625244140625000e+01, 2.797335967336006296e-05, 6.829736328125000000e+01, +-8.863355250907819214e-06, 6.949450683593750000e+01, 2.830758238800374038e-05, + 7.068786621093750000e+01,-1.846073268549083018e-05, 7.187731933593750000e+01, +-2.182503249464459606e-06, 7.306298828125000000e+01,-2.025251442448625989e-05, + 7.424481201171875000e+01, 1.280303154355201204e-05, 7.542291259765625000e+01, +-8.813997363590295654e-07, 7.659722900390625000e+01, 2.370323712746426047e-05, + 7.776788330078125000e+01,-1.176744290134661421e-05, 7.893481445312500000e+01, +-2.273743674288609119e-05, 8.009802246093750000e+01, 1.409185747234803696e-05, + 8.125762939453125000e+01,-2.707246895087010889e-07, 8.241357421875000000e+01, + 1.807241476105480180e-05, 8.356597900390625000e+01,-3.030059664889450720e-05, + 8.471472167968750000e+01,-8.823455531875539245e-07, 8.585992431640625000e+01, + 6.485238524924182146e-06, 8.700158691406250000e+01, 1.382440142980862947e-05, + 8.813977050781250000e+01,-1.808136338482881111e-05, 8.927441406250000000e+01, +-6.579344146543672011e-06, 9.040557861328125000e+01, 8.714227880222726313e-06, + 9.153332519531250000e+01,-1.201308307454951138e-05, 9.265759277343750000e+01, + 1.330278431878087205e-05, 9.377850341796875000e+01,-1.657103990890600482e-05, + 9.489599609375000000e+01,-1.995110226941163424e-05, 9.601007080078125000e+01, + 2.362403148762806632e-05, 9.712084960937500000e+01, 1.236086810905991142e-05, + 9.822827148437500000e+01, 2.738898236946465744e-05, 9.933239746093750000e+01, + 2.758741700388469572e-05, 1.004332885742187500e+02,-2.834285611604269955e-05, + 1.015308227539062500e+02, 1.228649517068771375e-06, 1.026251220703125000e+02, + 1.361792668612316888e-05, 1.037161865234375000e+02, 2.803946653578170389e-05, + 1.048040771484375000e+02, 2.502814149567842806e-06, 1.058887329101562500e+02, + 1.692003190104140317e-05, 1.069702148437500000e+02, 2.896703985131545672e-05, + 1.080485839843750000e+02,-3.844135045484567362e-06, 1.091237792968750000e+02, +-2.093137927645659717e-06, 1.101958618164062500e+02,-8.590030211185738579e-06, + 1.112648315429687500e+02,-5.267967244023324300e-06, 1.123306884765625000e+02, + 2.578347229232600646e-05, 1.133935546875000000e+02,-1.975022555464358195e-05, + 1.144533081054687500e+02,-2.195797778964440179e-06, 1.155100708007812500e+02, +-2.617170507638525077e-05, 1.165637817382812500e+02,-1.334031370958194516e-05, + 1.176145019531250000e+02,-7.581976902412963145e-06, 1.186622314453125000e+02, + 8.112109654298731037e-06, 1.197070312500000000e+02,-1.042875265529314613e-05, + 1.207488403320312500e+02, 1.455233211877492951e-05, 1.217877807617187500e+02, +-2.243432092472914265e-05, 1.228237304687500000e+02, 1.712269952247034061e-05, + 1.238568115234375000e+02, 2.745621214456745937e-05, 1.248870239257812500e+02, + 2.473291989440979066e-05, 1.259143676757812500e+02, 2.498461547595911484e-05, + 1.269389038085937500e+02,-1.692547797717771941e-05, 1.279605712890625000e+02, +-2.419576192770340594e-05, 1.289793701171875000e+02, 1.880972467762623192e-05, + 1.299954833984375000e+02,-5.550757125543327248e-05, 1.310086669921875000e+02, + 1.237226167189998996e-05, 1.320191650390625000e+02,-6.438347630770959254e-06, + 1.330268554687500000e+02, 2.525911246920619613e-05, 1.340318603515625000e+02, + 3.990327953073019333e-07, 1.350340576171875000e+02, 5.593427389035480335e-05, + 1.360336914062500000e+02,-3.751407409478960320e-05, 1.370305175781250000e+02, +-2.116319935859897563e-05, 1.380246582031250000e+02,-2.559468964093475045e-06, + 1.390161132812500000e+02, 3.270409087092109593e-05, 1.400050048828125000e+02, +-2.315157751389992129e-05, 1.409912109375000000e+02,-3.387938973438343638e-05, + 1.419747314453125000e+02, 1.458416266727572812e-05, 1.429556884765625000e+02, + 1.412021555596584681e-05, 1.439340820312500000e+02,-2.143065540113838312e-05, + 1.449097900390625000e+02, 4.373273697503468317e-05, 1.458830566406250000e+02, +-2.090790235253405790e-05, 1.468536376953125000e+02, 4.230297794089183646e-05, + 1.478217773437500000e+02, 2.633401664450247309e-06, 1.487873535156250000e+02, +-4.542835986281740771e-06, 1.497503662109375000e+02, 3.397367848245215483e-05, + 1.507109375000000000e+02, 9.209059510146982590e-06, 1.516689453125000000e+02, + 5.622812858742714859e-05, 1.526246337890625000e+02,-5.621609346274134244e-05, + 1.535776367187500000e+02, 5.088115468603551539e-05, 1.545283203125000000e+02, + 2.400396513473623342e-05, 1.554765625000000000e+02,-2.180099663431456814e-06, + 1.564223632812500000e+02,-1.517056781617965675e-05, 1.573657226562500000e+02, +-2.562756696989711716e-06, 1.583066406250000000e+02, 4.795320325388065854e-05, + 1.592452392578125000e+02, 2.652301982429665372e-05, 1.601815185546875000e+02, +-5.473018439029181240e-05, 1.611152343750000000e+02, 6.036538006249134820e-05, + 1.620467529296875000e+02, 1.753890969321481711e-05, 1.629759521484375000e+02, +-4.928926339732922490e-05, 1.639027099609375000e+02,-6.288016979631557560e-06, + 1.648271484375000000e+02, 3.614482952210960361e-05, 1.657493896484375000e+02, +-3.247597790375142114e-05, 1.666691894531250000e+02, 4.348868072528205213e-05, + 1.675867919921875000e+02, 3.131097214651595330e-05, 1.685021972656250000e+02, +-5.768116554728405733e-05, 1.694151611328125000e+02, 3.189681619086343127e-05, + 1.703260498046875000e+02,-5.500528238559059116e-05, 1.712344970703125000e+02, + 5.890184674174263693e-05, 1.721408691406250000e+02, 1.840407787096519837e-05, + 1.730450439453125000e+02,-4.351222480150346831e-05, 1.739468994140625000e+02, + 6.059331686505290421e-06, 1.748465576171875000e+02, 5.580532332169584454e-05, + 1.757441406250000000e+02,-5.666096094448416139e-06, 1.766395263671875000e+02, +-4.568380948624016041e-05, 1.775327148437500000e+02,-5.372392273978838048e-05, + 1.784237060546875000e+02,-1.933871000131713187e-05, 1.793126220703125000e+02, +-5.422619290693841471e-05, 1.801993408203125000e+02,-2.601847861521447132e-05, + 1.810839843750000000e+02,-4.656229401600182454e-05, 1.819664306640625000e+02, + 1.636297150881445295e-05, 1.828468017578125000e+02, 5.076471489501210225e-05, + 1.837252197265625000e+02,-5.542156510357154555e-05, 1.846014404296875000e+02, +-4.812064810565531807e-05, 1.854755859375000000e+02,-3.953879286781995545e-05, + 1.863476562500000000e+02,-1.988182101010412125e-05, 1.872176513671875000e+02, + 2.057522891062264376e-05, 1.880856933593750000e+02,-3.058156040982771239e-05, + 1.889516601562500000e+02,-4.169340446171797184e-05, 1.898155517578125000e+02, +-3.239118881346662872e-06, 1.906774902343750000e+02,-2.783449132689922134e-05, + 1.915373535156250000e+02, 1.597927683340914293e-05, 1.923952636718750000e+02, + 1.545493412281261116e-05, 1.932512207031250000e+02,-2.014927705264352875e-05, + 1.941051025390625000e+02, 4.043097907577914080e-05, 1.949571533203125000e+02, +-3.781452579504048975e-05, 1.958071289062500000e+02,-1.677810793588779092e-06, + 1.966551513671875000e+02, 3.577570564777057149e-05, 1.975013427734375000e+02, +-3.858128431828155999e-05, 1.983454589843750000e+02, 2.827352539329734468e-05, + 1.991877441406250000e+02, 1.020426695132691908e-06, 2.000280761718750000e+02, + 1.049043785864183866e-05, 2.008665771484375000e+02,-5.668571223208539910e-05, + 2.017030029296875000e+02, 5.227451898157462205e-05, 2.025377197265625000e+02, +-2.025647781341857894e-05, 2.033704833984375000e+02,-2.161281037339224341e-05, + 2.042012939453125000e+02, 5.667325008632565576e-05, 2.050303955078125000e+02, +-2.112821448834358837e-05, 2.058575439453125000e+02,-2.522383155215216853e-06, + 2.066828613281250000e+02,-1.281378348494855858e-06, 2.075063476562500000e+02, +-9.162516382743561384e-06, 2.083280029296875000e+02,-1.797812601298608335e-05, + 2.091478271484375000e+02,-1.959505997696247453e-05, 2.099658203125000000e+02, +-5.934211946670452627e-06, 2.107819824218750000e+02, 3.102996118252714271e-05, + 2.115964355468750000e+02,-2.280040076415178584e-05, 2.124090576171875000e+02, +-3.743515649437846729e-05, 2.132198486328125000e+02,-5.006638631136701490e-06, + 2.140289306640625000e+02,-3.976919665668718942e-05, 2.148361816406250000e+02, +-1.188780735169185652e-05, 2.156417236328125000e+02,-3.571887766413048520e-05, + 2.164454345703125000e+02, 1.847144755636210490e-05, 2.172474365234375000e+02, + 3.622647302213163157e-05, 2.180477294921875000e+02, 2.511032323154433900e-05, + 2.188463134765625000e+02,-7.361941985081681848e-06, 2.196431884765625000e+02, +-5.372390403709574017e-05, 2.204382324218750000e+02, 1.551294579696132803e-05, + 2.212316894531250000e+02,-3.642162925932327343e-05, 2.220233154296875000e+02, + 4.193598594979618241e-05, 2.228133544921875000e+02, 1.372116405796589833e-05, + 2.236016845703125000e+02, 8.233623894335039537e-06, 2.243883056640625000e+02, + 3.265657742833052654e-05, 2.251733398437500000e+02,-2.794287750390687326e-05, + 2.259566650390625000e+02,-4.440243113774530265e-05, 2.267382812500000000e+02, +-9.675114830058622014e-06, 2.275183105468750000e+02,-3.882892066889445600e-05, + 2.282966308593750000e+02,-2.835487591479255673e-06, 2.290733642578125000e+02, +-1.685097895998181422e-05, 2.298483886718750000e+02, 4.806553595480019518e-05, + 2.306219482421875000e+02,-4.539911586906436716e-05, 2.313937988281250000e+02, +-4.631966285757620260e-05, 2.321639404296875000e+02, 5.204609324350696002e-05, + 2.329326171875000000e+02, 1.225763073721718197e-05, 2.336997070312500000e+02, +-3.695637982554016382e-05, 2.344650878906250000e+02, 3.309133292926460016e-05, + 2.352290039062500000e+02,-1.516395380482592629e-05, 2.359913330078125000e+02, +-5.311674305290968619e-05, 2.367519531250000000e+02, 4.779807991226078768e-05, + 2.375111083984375000e+02, 4.989464209345647548e-05, 2.382687988281250000e+02, +-4.041202611322311408e-05, 2.390247802734375000e+02, 2.739433433590848536e-05, + 2.397792968750000000e+02, 1.550965806406508966e-05, 2.405322265625000000e+02, + 5.230206142425020257e-05, 2.412836914062500000e+02, 2.196059540790264514e-05, + 2.420335693359375000e+02, 5.277680785141730338e-05, 2.427819824218750000e+02, + 2.886380247947272558e-05, 2.435289306640625000e+02,-4.363251767645384661e-05, + 2.442742919921875000e+02,-3.653314744654563199e-05, 2.450180664062500000e+02, + 5.623369525922526825e-05, 2.457604980468750000e+02,-3.437446279919778004e-06, + 2.465013427734375000e+02, 3.459290119679066472e-05, 2.472407226562500000e+02, + 5.421724428316440202e-05, 2.479787597656250000e+02,-6.070765164808318435e-05, + 2.487152099609375000e+02,-6.014953987030989107e-05, 2.494501953125000000e+02, +-6.032228506450037554e-05, 2.501837158203125000e+02,-5.540433388359054134e-05, + 2.509157714843750000e+02,-3.960875078622925214e-05, 2.516463623046875000e+02, +-7.182944107105660894e-06, 2.523754882812500000e+02, 4.759160516857532540e-05, + 2.531032714843750000e+02, 8.329299458439681639e-06, 2.538295898437500000e+02, + 2.751627995643241118e-06, 2.545544433593750000e+02, 3.647649263201999678e-05, + 2.552779541015625000e+02,-6.981531437649667064e-06 +}; + +static const unsigned long long LCONST[] = { +0x3c90000000000000ULL, /* 2**(-54) = 5.551115123125782702e-17 */ +0x3ff0000000000000ULL, /* DONE = 1.0 */ +0x4330000000000000ULL, /* DVAIN52 = 2**52 = 4.503599627370496e15 */ +0xffffffff00000000ULL, /* 0xffffffff00000000 */ +0x000fffffffffffffULL, /* 0x000fffffffffffff */ +0x0000080000000000ULL, /* 0x0000080000000000 */ +0xfffff00000000000ULL, /* 0xfffff00000000000 */ +0x0000000000000000ULL, /* DZERO = 0.0 */ +0x4062776d8ce329bdULL, /* KA5 = 5.77078604860893737986e-01*256 */ +0x406ec709dc39fc99ULL, /* KA3 = 9.61796693925765549423e-01*256 */ +0x3f6d94ae0bf85de6ULL, /* KA1_LO = 1.41052154268147309568e-05*256 */ +0x4087154000000000ULL, /* KA1_HI = 2.8853759765625e+00*256 */ +0x40871547652b82feULL, /* KA1 = 2.885390081777926774e+00*256 */ +0x4110000000000000ULL, /* HTHRESH = 262144.0 */ +0xc110cc0000000000ULL, /* LTHRESH = -275200.0 */ +0x3cd5d52893bc7fecULL, /* KB5 = 1.21195555854068860923e-15 */ +0x3d83b2abc07c93d0ULL, /* KB4 = 2.23939573811855104311e-12 */ +0x3e2c6b08d71f5d1eULL, /* KB3 = 3.30830268126604677436e-09 */ +0x3ecebfbdff82c4edULL, /* KB2 = 3.66556559691003767877e-06 */ +0x3f662e42fefa39efULL, /* KB1 = 2.70760617406228636578e-03 */ +0x01a56e1fc2f8f359ULL, /* _TINY = 1.0e-300 */ +0x7e37e43c8800759cULL /* _HUGE = 1.0e+300 */ +}; + +#define SCALE_ARR ((double*)LCONST + 1) +#define _TINY ((double*)LCONST)[20] /* 1.0e-300 */ +#define _HUGE ((double*)LCONST)[21] /* 1.0e+300 */ + +#define RET_SC(I) \ + px += stridex; \ + py += stridey; \ + pz += stridez; \ + if ( --n <= 0 ) \ + break; \ + goto start##I; + +#define RETURN(I, ret) \ +{ \ + pz[0] = (ret); \ + RET_SC(I) \ +} + +#define PREP(I) \ +hx = HI(px); \ +lx = LO(px); \ +hy = HI(py); \ +ly = LO(py); \ +sx = hx >> 31; \ +sy = hy >> 31; \ +hx &= 0x7fffffff; \ +hy &= 0x7fffffff; \ +ull_y0 = *(unsigned long long*)px; \ + \ +if ( hy < 0x3bf00000 ) /* |Y| < 2^(-64) */ \ +{ \ + y0 = *px; \ + if ( (hy | ly) == 0 ) /* pow(X,0) */ \ + RETURN (I, DONE) \ + if ( hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) ) /* |X| = Nan */ \ + *pz = y0 + y0; \ + else if ( (hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0) ) /* X = 0 or Inf */ \ + { \ + HI(pz) = hx; \ + LO(pz) = lx; \ + if ( sy ) \ + *pz = DONE / *pz; \ + } \ + else \ + *pz = (sx) ? DZERO / DZERO : DONE; \ + RET_SC(I) \ +} \ +yisint##I = 0; /* Y - non-integer */ \ +exp = hy >> 20; /* Y exponent */ \ +ull_y0 &= LMMANT; \ +ull_x##I = ull_y0 | LDONE; \ +x##I = *(double*)&ull_x##I; \ +ull_ax##I = (ull_x##I + LMROUND) & LMHI20; \ +ax##I = *(double*)&ull_ax##I; \ +if ( hx >= 0x7ff00000 || exp >= 0x43e ) /* X=Inf,Nan or |Y|>2^63,Inf,Nan */ \ +{ \ + y0 = *px; \ + if ( hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) || \ + hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0) ) /* |X| or |Y| = Nan */ \ + RETURN (I, y0 + *py) \ + if ( hy == 0x7ff00000 && (ly == 0) ) /* |Y| = Inf */ \ + { \ + if ( hx == 0x3ff00000 && (lx == 0) ) /* +-1 ** +-Inf */ \ + *pz = *py - *py; \ + else if ( (hx < 0x3ff00000) != sy ) \ + *pz = DZERO; \ + else \ + { \ + HI(pz) = hy; \ + LO(pz) = ly; \ + } \ + RET_SC(I) \ + } \ + if ( exp < 0x43e ) /* |Y| < 2^63 */ \ + { \ + if ( sx ) /* X = -Inf */ \ + { \ + if ( exp >= 0x434 ) /* |Y| >= 2^53 */ \ + yisint##I = 2; /* Y - even */ \ + else \ + { \ + if ( exp >= 0x3ff ) /* |Y| >= 1 */ \ + { \ + if ( exp > (20 + 0x3ff) ) \ + { \ + i0 = ly >> (52 - (exp - 0x3ff)); \ + if ( (i0 << (52 - (exp - 0x3ff))) == ly ) \ + yisint##I = 2 - (i0 & 1); \ + } \ + else if ( ly == 0 ) \ + { \ + i0 = hy >> (20 - (exp - 0x3ff)); \ + if ( (i0 << (20 - (exp - 0x3ff))) == hy ) \ + yisint##I = 2 - (i0 & 1); \ + } \ + } \ + } \ + } \ + if ( sy ) \ + hx = lx = 0; \ + hx += yisint##I << 31; \ + HI(pz) = hx; \ + LO(pz) = lx; \ + RET_SC(I) \ + } \ + else /* |Y| >= 2^63 */ \ + { \ + /* |X| = 0, 1, Inf */ \ + if ( lx == 0 && (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000) ) \ + { \ + HI(pz) = hx; \ + LO(pz) = lx; \ + if ( sy ) \ + *pz = DONE / *pz; \ + } \ + else \ + { \ + y0 = ( (hx < 0x3ff00000) != sy ) ? _TINY : _HUGE; \ + *pz = y0 * y0; \ + } \ + RET_SC(I) \ + } \ +} \ +if ( sx || (hx | lx) == 0 ) /* X <= 0 */ \ +{ \ + if ( exp >= 0x434 ) /* |Y| >= 2^53 */ \ + yisint##I = 2; /* Y - even */ \ + else \ + { \ + if ( exp >= 0x3ff ) /* |Y| >= 1 */ \ + { \ + if ( exp > (20 + 0x3ff) ) \ + { \ + i0 = ly >> (52 - (exp - 0x3ff)); \ + if ( (i0 << (52 - (exp - 0x3ff))) == ly ) \ + yisint##I = 2 - (i0 & 1); \ + } \ + else if ( ly == 0 ) \ + { \ + i0 = hy >> (20 - (exp - 0x3ff)); \ + if ( (i0 << (20 - (exp - 0x3ff))) == hy ) \ + yisint##I = 2 - (i0 & 1); \ + } \ + } \ + } \ + if ( (hx | lx) == 0 ) /* X == 0 */ \ + { \ + y0 = DZERO; \ + if ( sy ) \ + y0 = DONE / y0; \ + if ( sx & yisint##I ) \ + y0 = -y0; \ + RETURN (I, y0) \ + } \ + if ( yisint##I == 0 ) /* pow(neg,non-integer) */ \ + RETURN (I, DZERO / DZERO) /* NaN */ \ +} \ +exp = (hx >> 20); \ +exp##I = exp - 2046; \ +py##I = py; \ +pz##I = pz; \ +ux##I = x##I + ax##I; \ +if ( !exp ) \ +{ \ + ax##I = (double) ull_y0; \ + ull_ax##I = *(unsigned long long*)&ax##I; \ + ull_x##I = ull_ax##I & LMMANT | LDONE; \ + x##I = *(double*)&ull_x##I; \ + exp##I = ((unsigned int*) & ull_ax##I)[0]; \ + exp##I = (exp##I >> 20) - (2046 + 1023 + 51); \ + ull_ax##I = ull_x##I + LMROUND & LMHI20; \ + ax##I = *(double*)&ull_ax##I; \ + ux##I = x##I + ax##I; \ +} \ +ull_x##I = *(unsigned long long *)&ux##I; \ +hx##I = HI(&ull_ax##I); \ +yd##I = DONE / ux##I; + +void +__vpow( int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez ) +{ + double *py0, *py1, *py2; + double *pz0, *pz1, *pz2; + double y0, yd0, u0, s0, s_l0, m_h0; + double y1, yd1, u1, s1, s_l1, m_h1; + double y2, yd2, u2, s2, s_l2, m_h2; + double ax0, x0, s_h0, ux0; + double ax1, x1, s_h1, ux1; + double ax2, x2, s_h2, ux2; + int eflag0, gflag0, ind0, i0; + int eflag1, gflag1, ind1, i1; + int eflag2, gflag2, ind2, i2; + int hx0, yisint0, exp0; + int hx1, yisint1, exp1; + int hx2, yisint2, exp2; + int exp, i = 0; + unsigned hx, lx, sx, hy, ly, sy; + unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0, ull_ax1, ull_ax2; + unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */ + unsigned long long LMMANT = ((unsigned long long*)LCONST)[4]; /* 0x000fffffffffffff */ + unsigned long long LMROUND = ((unsigned long long*)LCONST)[5]; /* 0x0000080000000000 */ + unsigned long long LMHI20 = ((unsigned long long*)LCONST)[6]; /* 0xfffff00000000000 */ + double DONE = ((double*)LCONST)[1]; /* 1.0 */ + double DZERO = ((double*)LCONST)[7]; /* 0.0 */ + double KA5 = ((double*)LCONST)[8]; /* 5.77078604860893737986e-01*256 */ + double KA3 = ((double*)LCONST)[9]; /* 9.61796693925765549423e-01*256 */ + double KA1_LO = ((double*)LCONST)[10]; /* 1.41052154268147309568e-05*256 */ + double KA1_HI = ((double*)LCONST)[11]; /* 2.8853759765625e+00*256 */ + double KA1 = ((double*)LCONST)[12]; /* 2.885390081777926774e+00*256 */ + double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */ + double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */ + double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */ + double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */ + double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */ + double KB2 = ((double*)LCONST)[18]; /* 3.66556559691003767877e-06 */ + double KB1 = ((double*)LCONST)[19]; /* 2.70760617406228636578e-03 */ + + if (stridex == 0) + { + unsigned hx = HI(px); + unsigned lx = LO(px); + + /* if x is a positive normal number not equal to one, + call __vpowx */ + if (hx >= 0x00100000 && hx < 0x7ff00000 && + (hx != 0x3ff00000 || lx != 0)) + { + __vpowx( n, px, py, stridey, pz, stridez ); + return; + } + } + + do + { + /* perform si + ydi = 256*log2(xi)*yi */ +start0: + PREP(0) + px += stridex; + py += stridey; + pz += stridez; + i = 1; + if ( --n <= 0 ) + break; + +start1: + PREP(1) + px += stridex; + py += stridey; + pz += stridez; + i = 2; + if ( --n <= 0 ) + break; + +start2: + PREP(2) + + u0 = x0 - ax0; + u1 = x1 - ax1; + u2 = x2 - ax2; + + s0 = u0 * yd0; + LO(&ux0) = 0; + s1 = u1 * yd1; + LO(&ux1) = 0; + s2 = u2 * yd2; + LO(&ux2) = 0; + + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + y1 = s1 * s1; + s_h1 = s1; + LO(&s_h1) = 0; + y2 = s2 * s2; + s_h2 = s2; + LO(&s_h2) = 0; + + s0 = (KA5 * y0 + KA3) * y0 * s0; + s1 = (KA5 * y1 + KA3) * y1 * s1; + s2 = (KA5 * y2 + KA3) * y2 * s2; + + s_l0 = (x0 - (ux0 - ax0)); + s_l1 = (x1 - (ux1 - ax1)); + s_l2 = (x2 - (ux2 - ax2)); + + s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0; + s_l1 = u1 - s_h1 * ux1 - s_h1 * s_l1; + s_l2 = u2 - s_h2 * ux2 - s_h2 * s_l2; + + s_l0 = KA1 * yd0 * s_l0; + i0 = (hx0 >> 8) & 0xff0; + exp0 += (hx0 >> 20); + + s_l1 = KA1 * yd1 * s_l1; + i1 = (hx1 >> 8) & 0xff0; + exp1 += (hx1 >> 20); + + s_l2 = KA1 * yd2 * s_l2; + i2 = (hx2 >> 8) & 0xff0; + exp2 += (hx2 >> 20); + + yd0 = KA1_HI * s_h0; + yd1 = KA1_HI * s_h1; + yd2 = KA1_HI * s_h2; + + y0 = *(double *)((char*)__TBL_log2 + i0); + y1 = *(double *)((char*)__TBL_log2 + i1); + y2 = *(double *)((char*)__TBL_log2 + i2); + + y0 += (double)(exp0 << 8); + y1 += (double)(exp1 << 8); + y2 += (double)(exp2 << 8); + + m_h0 = y0 + yd0; + m_h1 = y1 + yd1; + m_h2 = y2 + yd2; + + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y1 = s1 - ((m_h1 - y1 - yd1) - s_l1); + y2 = s2 - ((m_h2 - y2 - yd2) - s_l2); + + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + y1 += *(double *)((char*)__TBL_log2 + i1 + 8) + KA1_LO * s_h1; + y2 += *(double *)((char*)__TBL_log2 + i2 + 8) + KA1_LO * s_h2; + + s_h0 = y0 + m_h0; + s_h1 = y1 + m_h1; + s_h2 = y2 + m_h2; + + LO(&s_h0) = 0; + LO(&s_h1) = 0; + LO(&s_h2) = 0; + + yd0 = *py0; + yd1 = *py1; + yd2 = *py2; + s0 = yd0; + s1 = yd1; + s2 = yd2; + LO(&s0) = 0; + LO(&s1) = 0; + LO(&s2) = 0; + + y0 = y0 - (s_h0 - m_h0); + y1 = y1 - (s_h1 - m_h1); + y2 = y2 - (s_h2 - m_h2); + + yd0 = (yd0 - s0) * s_h0 + yd0 * y0; + yd1 = (yd1 - s1) * s_h1 + yd1 * y1; + yd2 = (yd2 - s2) * s_h2 + yd2 * y2; + + s0 = s_h0 * s0; + s1 = s_h1 * s1; + s2 = s_h2 * s2; + + /* perform 2 ** ((si+ydi)/256) */ + if ( s0 > HTHRESH ) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if ( s1 > HTHRESH ) + { + s1 = HTHRESH; + yd1 = DZERO; + } + if ( s2 > HTHRESH ) + { + s2 = HTHRESH; + yd2 = DZERO; + } + + if ( s0 < LTHRESH ) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + if ( s1 < LTHRESH ) + { + s1 = LTHRESH; + yd1 = DZERO; + } + ind1 = (int) (s1 + yd1); + if ( s2 < LTHRESH ) + { + s2 = LTHRESH; + yd2 = DZERO; + } + ind2 = (int) (s2 + yd2); + + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + + i1 = (ind1 & 0xff) << 4; + u1 = (double)ind1; + ind1 >>= 8; + + i2 = (ind2 & 0xff) << 4; + u2 = (double) ind2; + ind2 >>= 8; + + y0 = s0 - u0 + yd0; + y1 = s1 - u1 + yd1; + y2 = s2 - u2 + yd2; + + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + u1 = *(double*)((char*)__TBL_exp2 + i1); + y1 = ((((KB5 * y1 + KB4) * y1 + KB3) * y1 + KB2) * y1 + KB1) * y1; + u2 = *(double*)((char*)__TBL_exp2 + i2); + y2 = ((((KB5 * y2 + KB4) * y2 + KB3) * y2 + KB2) * y2 + KB1) * y2; + + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + eflag1 = (ind1 + 1021) >> 31; + gflag1 = (1022 - ind1) >> 31; + eflag2 = (ind2 + 1021) >> 31; + gflag2 = (1022 - ind2) >> 31; + + ind0 = (yisint0 << 11) + ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ind1 = (yisint1 << 11) + ind1 + (54 & eflag1) - (52 & gflag1); + ind1 <<= 20; + ind2 = (yisint2 << 11) + ind2 + (54 & eflag2) - (52 & gflag2); + ind2 <<= 20; + + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + u1 = *(double*)((char*)__TBL_exp2 + i1 + 8) + u1 * y1 + u1; + u2 = *(double*)((char*)__TBL_exp2 + i2 + 8) + u2 * y2 + u2; + + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + + ull_x1 = *(unsigned long long*)&u1; + HI(&ull_x1) += ind1; + u1 = *(double*)&ull_x1; + + ull_x2 = *(unsigned long long*)&u2; + HI(&ull_x2) += ind2; + u2 = *(double*)&ull_x2; + + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + *pz1 = u1 * SCALE_ARR[eflag1 - gflag1]; + *pz2 = u2 * SCALE_ARR[eflag2 - gflag2]; + + px += stridex; + py += stridey; + pz += stridez; + i = 0; + + } while ( --n > 0 ); + + if ( i > 0 ) + { + /* perform si + ydi = 256*log2(xi)*yi */ + u0 = x0 - ax0; + s0 = u0 * yd0; + LO(&ux0) = 0; + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + s0 = (KA5 * y0 + KA3) * y0 * s0; + s_l0 = (x0 - (ux0 - ax0)); + s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0; + s_l0 = KA1 * yd0 * s_l0; + i0 = (hx0 >> 8) & 0xff0; + exp0 += (hx0 >> 20); + yd0 = KA1_HI * s_h0; + y0 = *(double *)((char*)__TBL_log2 + i0); + y0 += (double)(exp0 << 8); + m_h0 = y0 + yd0; + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + s_h0 = y0 + m_h0; + LO(&s_h0) = 0; + y0 = y0 - (s_h0 - m_h0); + s0 = yd0 = *py0; + LO(&s0) = 0; + yd0 = (yd0 - s0) * s_h0 + yd0 * y0; + s0 = s_h0 * s0; + + /* perform 2 ** ((si+ydi)/256) */ + if ( s0 > HTHRESH ) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if ( s0 < LTHRESH ) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = (yisint0 << 11) + ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + + if ( i > 1 ) + { + /* perform si + ydi = 256*log2(xi)*yi */ + u0 = x1 - ax1; + s0 = u0 * yd1; + LO(&ux1) = 0; + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + s0 = (KA5 * y0 + KA3) * y0 * s0; + s_l0 = (x1 - (ux1 - ax1)); + s_l0 = u0 - s_h0 * ux1 - s_h0 * s_l0; + s_l0 = KA1 * yd1 * s_l0; + i0 = (hx1 >> 8) & 0xff0; + exp1 += (hx1 >> 20); + yd0 = KA1_HI * s_h0; + y0 = *(double *)((char*)__TBL_log2 + i0); + y0 += (double)(exp1 << 8); + m_h0 = y0 + yd0; + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + s_h0 = y0 + m_h0; + LO(&s_h0) = 0; + y0 = y0 - (s_h0 - m_h0); + s0 = yd0 = *py1; + LO(&s0) = 0; + yd0 = (yd0 - s0) * s_h0 + yd0 * y0; + s0 = s_h0 * s0; + /* perform 2 ** ((si+ydi)/256) */ + if ( s0 > HTHRESH ) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if ( s0 < LTHRESH ) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = (yisint1 << 11) + ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + *pz1 = u0 * SCALE_ARR[eflag0 - gflag0]; + } + } +} + +#undef RET_SC +#define RET_SC(I) \ + py += stridey; \ + pz += stridez; \ + if ( --n <= 0 ) \ + break; \ + goto start##I; + +#define PREP_X(I) \ +hy = HI(py); \ +ly = LO(py); \ +sy = hy >> 31; \ +hy &= 0x7fffffff; \ +py##I = py; \ + \ +if ( hy < 0x3bf00000 ) /* |Y| < 2^(-64) */ \ + RETURN (I, DONE) \ +pz##I = pz; \ +if ( hy >= 0x43e00000 ) /* |Y|>2^63,Inf,Nan */ \ +{ \ + if ( hy >= 0x7ff00000 ) /* |Y|=Inf,Nan */ \ + { \ + if ( hy == 0x7ff00000 && ly == 0 ) /* |Y|=Inf */ \ + { \ + if ( (hx < 0x3ff00000) != sy ) \ + *pz = DZERO; \ + else \ + { \ + HI(pz) = hy; \ + LO(pz) = ly; \ + } \ + } \ + else \ + *pz = *px + *py; /* |Y|=Nan */ \ + } \ + else /* |Y|>2^63 */ \ + { \ + y0 = ( (hx < 0x3ff00000) != sy ) ? _TINY : _HUGE; \ + *pz = y0 * y0; \ + } \ + RET_SC(I) \ +} \ + +#define LMMANT ((unsigned long long*)LCONST)[4] /* 0x000fffffffffffff */ +#define LMROUND ((unsigned long long*)LCONST)[5] /* 0x0000080000000000 */ +#define LMHI20 ((unsigned long long*)LCONST)[6] /* 0xfffff00000000000 */ +#define MMANT ((double*)LCONST)[4] /* 0x000fffffffffffff */ +#define MROUND ((double*)LCONST)[5] /* 0x0000080000000000 */ +#define MHI20 ((double*)LCONST)[6] /* 0xfffff00000000000 */ +#define KA5 ((double*)LCONST)[8] /* 5.77078604860893737986e-01*256 */ +#define KA3 ((double*)LCONST)[9] /* 9.61796693925765549423e-01*256 */ +#define KA1_LO ((double*)LCONST)[10] /* 1.41052154268147309568e-05*256 */ +#define KA1_HI ((double*)LCONST)[11] /* 2.8853759765625e+00*256 */ +#define KA1 ((double*)LCONST)[12] /* 2.885390081777926774e+00*256 */ + + +static void +__vpowx( int n, double * restrict px, double * restrict py, + int stridey, double * restrict pz, int stridez ) +{ + double *py0, *py1, *py2; + double *pz0, *pz1, *pz2; + double ux0, y0, yd0, u0, s0; + double y1, yd1, u1, s1; + double y2, yd2, u2, s2; + double yr, s_h0, s_l0, m_h0, x0, ax0; + unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0; + int eflag0, gflag0, ind0, i0, exp0; + int eflag1, gflag1, ind1, i1; + int eflag2, gflag2, ind2, i2; + int i = 0; + unsigned hx, hx0, hy, ly, sy; + double DONE = ((double*)LCONST)[1]; /* 1.0 */ + unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */ + double DZERO = ((double*)LCONST)[7]; /* 0.0 */ + double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */ + double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */ + double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */ + double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */ + double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */ + double KB2 = ((double*)LCONST)[18]; /* 3.66556559691003767877e-06 */ + double KB1 = ((double*)LCONST)[19]; /* 2.70760617406228636578e-03 */ + + /* perform s_h + yr = 256*log2(x) */ + ull_y0 = *(unsigned long long*)px; + hx = HI(px); + ull_x0 = ull_y0 & LMMANT | LDONE; + x0 = *(double*)&ull_x0; + exp0 = (hx >> 20) - 2046; + ull_ax0 = ull_x0 + LMROUND & LMHI20; + ax0 = *(double*)&ull_ax0; + hx0 = HI(&ax0); + ux0 = x0 + ax0; + yd0 = DONE / ux0; + u0 = x0 - ax0; + s0 = u0 * yd0; + LO(&ux0) = 0; + y0 = s0 * s0; + s_h0 = s0; + LO(&s_h0) = 0; + s0 = (KA5 * y0 + KA3) * y0 * s0; + s_l0 = (x0 - (ux0 - ax0)); + s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0; + s_l0 = KA1 * yd0 * s_l0; + i0 = (hx0 >> 8) & 0xff0; + exp0 += (hx0 >> 20); + yd0 = KA1_HI * s_h0; + y0 = *(double *)((char*)__TBL_log2 + i0); + y0 += (double)(exp0 << 8); + m_h0 = y0 + yd0; + y0 = s0 - ((m_h0 - y0 - yd0) - s_l0); + y0 += *(double *)((char*)__TBL_log2 + i0 + 8) + KA1_LO * s_h0; + s_h0 = y0 + m_h0; + LO(&s_h0) = 0; + yr = y0 - (s_h0 - m_h0); + + do + { + /* perform 2 ** ((s_h0+yr)*yi/256) */ +start0: + PREP_X(0) + py += stridey; + pz += stridez; + i = 1; + if ( --n <= 0 ) + break; + +start1: + PREP_X(1) + py += stridey; + pz += stridez; + i = 2; + if ( --n <= 0 ) + break; + +start2: + PREP_X(2) + + s0 = yd0 = *py0; + s1 = yd1 = *py1; + s2 = yd2 = *py2; + + LO(&s0) = 0; + LO(&s1) = 0; + LO(&s2) = 0; + + yd0 = (yd0 - s0) * s_h0 + yd0 * yr; + yd1 = (yd1 - s1) * s_h0 + yd1 * yr; + yd2 = (yd2 - s2) * s_h0 + yd2 * yr; + + s0 = s_h0 * s0; + s1 = s_h0 * s1; + s2 = s_h0 * s2; + + if ( s0 > HTHRESH ) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if ( s1 > HTHRESH ) + { + s1 = HTHRESH; + yd1 = DZERO; + } + if ( s2 > HTHRESH ) + { + s2 = HTHRESH; + yd2 = DZERO; + } + + if ( s0 < LTHRESH ) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + if ( s1 < LTHRESH ) + { + s1 = LTHRESH; + yd1 = DZERO; + } + ind1 = (int) (s1 + yd1); + if ( s2 < LTHRESH ) + { + s2 = LTHRESH; + yd2 = DZERO; + } + ind2 = (int) (s2 + yd2); + + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + + i1 = (ind1 & 0xff) << 4; + u1 = (double) ind1; + ind1 >>= 8; + + i2 = (ind2 & 0xff) << 4; + u2 = (double) ind2; + ind2 >>= 8; + + y0 = s0 - u0 + yd0; + y1 = s1 - u1 + yd1; + y2 = s2 - u2 + yd2; + + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + u1 = *(double*)((char*)__TBL_exp2 + i1); + y1 = ((((KB5 * y1 + KB4) * y1 + KB3) * y1 + KB2) * y1 + KB1) * y1; + u2 = *(double*)((char*)__TBL_exp2 + i2); + y2 = ((((KB5 * y2 + KB4) * y2 + KB3) * y2 + KB2) * y2 + KB1) * y2; + + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + eflag1 = (ind1 + 1021) >> 31; + gflag1 = (1022 - ind1) >> 31; + eflag2 = (ind2 + 1021) >> 31; + gflag2 = (1022 - ind2) >> 31; + + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + + u1 = *(double*)((char*)__TBL_exp2 + i1 + 8) + u1 * y1 + u1; + ind1 = ind1 + (54 & eflag1) - (52 & gflag1); + ind1 <<= 20; + ull_x1 = *(unsigned long long*)&u1; + HI(&ull_x1) += ind1; + u1 = *(double*)&ull_x1; + + u2 = *(double*)((char*)__TBL_exp2 + i2 + 8) + u2 * y2 + u2; + ind2 = ind2 + (54 & eflag2) - (52 & gflag2); + ind2 <<= 20; + ull_x2 = *(unsigned long long*)&u2; + HI(&ull_x2) += ind2; + u2 = *(double*)&ull_x2; + + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + *pz1 = u1 * SCALE_ARR[eflag1 - gflag1]; + *pz2 = u2 * SCALE_ARR[eflag2 - gflag2]; + + py += stridey; + pz += stridez; + i = 0; + + } while ( --n > 0 ); + + if ( i > 0 ) + { + /* perform 2 ** ((s_h0+yr)*yi/256) */ + s0 = y0 = *py0; + LO(&s0) = 0; + yd0 = (y0 - s0) * s_h0 + y0 * yr; + s0 = s_h0 * s0; + if ( s0 > HTHRESH ) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if ( s0 < LTHRESH ) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + *pz0 = u0 * SCALE_ARR[eflag0 - gflag0]; + + if ( i > 1 ) + { + /* perform 2 ** ((s_h0+yr)*yi/256) */ + s0 = y0 = *py1; + LO(&s0) = 0; + yd0 = (y0 - s0) * s_h0 + y0 * yr; + s0 = s_h0 * s0; + if ( s0 > HTHRESH ) + { + s0 = HTHRESH; + yd0 = DZERO; + } + if ( s0 < LTHRESH ) + { + s0 = LTHRESH; + yd0 = DZERO; + } + ind0 = (int) (s0 + yd0); + i0 = (ind0 & 0xff) << 4; + u0 = (double) ind0; + ind0 >>= 8; + y0 = s0 - u0 + yd0; + u0 = *(double*)((char*)__TBL_exp2 + i0); + y0 = ((((KB5 * y0 + KB4) * y0 + KB3) * y0 + KB2) * y0 + KB1) * y0; + eflag0 = (ind0 + 1021) >> 31; + gflag0 = (1022 - ind0) >> 31; + u0 = *(double*)((char*)__TBL_exp2 + i0 + 8) + u0 * y0 + u0; + ind0 = ind0 + (54 & eflag0) - (52 & gflag0); + ind0 <<= 20; + ull_x0 = *(unsigned long long*)&u0; + HI(&ull_x0) += ind0; + u0 = *(double*)&ull_x0; + *pz1 = u0 * SCALE_ARR[eflag0 - gflag0]; + } + } +} diff --git a/usr/src/libm/src/mvec/__vpowf.c b/usr/src/libm/src/mvec/__vpowf.c new file mode 100644 index 0000000..a8710a6 --- /dev/null +++ b/usr/src/libm/src/mvec/__vpowf.c @@ -0,0 +1,823 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vpowf.c 1.12 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float powf(float x, float y) + * + * Method : + * 1. Special cases: + * for (anything) ** 0 => 1 + * for (anything) ** NaN => QNaN + invalid + * for NaN ** (anything) => QNaN + invalid + * for +-1 ** +-Inf => QNaN + invalid + * for +-(|x| < 1) ** +Inf => +0 + * for +-(|x| < 1) ** -Inf => +Inf + * for +-(|x| > 1) ** +Inf => +Inf + * for +-(|x| > 1) ** -Inf => +0 + * for +Inf ** (negative) => +0 + * for +Inf ** (positive) => +Inf + * for -Inf ** (negative except odd integer) => +0 + * for -Inf ** (negative odd integer) => -0 + * for -Inf ** (positive except odd integer) => +Inf + * for -Inf ** (positive odd integer) => -Inf + * for (negative) ** (non-integer) => QNaN + invalid + * for +0 ** (negative) => +Inf + overflow + * for +0 ** (positive) => +0 + * for -0 ** (negative except odd integer) => +Inf + overflow + * for -0 ** (negative odd integer) => -Inf + overflow + * for -0 ** (positive except odd integer) => +0 + * for -0 ** (positive odd integer) => -0 + * 2. Computes x**y from: + * x**y = 2**(y*log2(x)) = 2**(w/256), where w = 256*log2(|x|)*y. + * 3. Computes w = 256 * log2(|x|) * y from + * |x| = m * 2**n => log2(|x|) = n + log2(m). + * Let m = m0 + dm, where m0 = 1 + k / 128, + * k = [0, 128], + * dm = [-1/256, 1/256]. + * Then 256*log2(m) = 256*log2(m0 + dm) = 256*log2(m0) + 256*log2(1+z), + * where z = dm*(1/m0), z = [-1/258, 1/256]. + * Then + * 1/m0 is looked up in a table of 1, 1/(1+1/128), ..., 1/(1+128/128). + * 256*log2(m0) is looked up in a table of 256*log2(1), 256*log2(1+1/128), + * ..., 256*log2(1+128/128). + * 256*log2(1+z) is computed using approximation: + * 256*log2(1+z) = (((a3*z + a2)*z + a1)*z + a0)*z. + * 3. For w >= 32768 + * then for (negative) ** (odd integer) => -Inf + overflow + * else => +Inf + overflow + * For w <= -38400 + * then for (negative) ** (odd integer) => -0 + underflow + * else => +0 + underflow + * 4. Computes 2 ** (w/256) from: + * 2 ** (w/256) = 2**a * 2**(k/256) * 2**(r/256) + * Where: + * a = int ( w ) >> 8; + * k = int ( w ) & 0xFF; + * r = frac ( w ). + * Note that: + * k = 0, 1, ..., 255; + * r = (-1, 1). + * Then: + * 2**(k/256) is looked up in a table of 2**0, 2**1/256, ... + * 2**(r/256) is computed using approximation: + * 2**(r/256) = a0 + a1 * r + a2 * r**2 + * Multiplication by 2**a is done by adding "a" to + * the biased exponent. + * 5. For (negative) ** (odd integer) => -(2**(w/256)) + * otherwise => 2**(w/256) + * + * Accuracy: + * Max. relative aproximation error < 2**(-37.35) for 256*log2(1+z). + * Max. relative aproximation error < 2**(-29.18) for 2**(r/256). + * All calculations are done in double precision. + * Maximum error observed: less than 0.528 ulp after 700.000.000 + * results. + */ + +static void __vpowfx( int n, float * restrict px, float * restrict py, + int stridey, float * restrict pz, int stridez ); + +static void __vpowf_n( int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez ); + +static void __vpowfx_n( int n, double yy, float * restrict py, + int stridey, float * restrict pz, int stridez ); + +#pragma no_inline(__vpowfx) +#pragma no_inline(__vpowf_n) +#pragma no_inline(__vpowfx_n) + +static const double __TBL_exp2f[] = { + /* 2^(i/256), i = [0, 255] */ +1.000000000000000000e+00, 1.002711275050202522e+00, 1.005429901112802726e+00, +1.008155898118417548e+00, 1.010889286051700475e+00, 1.013630084951489430e+00, +1.016378314910953096e+00, 1.019133996077737914e+00, 1.021897148654116627e+00, +1.024667792897135721e+00, 1.027445949118763746e+00, 1.030231637686040980e+00, +1.033024879021228415e+00, 1.035825693601957198e+00, 1.038634101961378731e+00, +1.041450124688316103e+00, 1.044273782427413755e+00, 1.047105095879289793e+00, +1.049944085800687210e+00, 1.052790773004626423e+00, 1.055645178360557157e+00, +1.058507322794512762e+00, 1.061377227289262093e+00, 1.064254912884464499e+00, +1.067140400676823697e+00, 1.070033711820241873e+00, 1.072934867525975555e+00, +1.075843889062791048e+00, 1.078760797757119860e+00, 1.081685614993215250e+00, +1.084618362213309206e+00, 1.087559060917769660e+00, 1.090507732665257690e+00, +1.093464399072885840e+00, 1.096429081816376883e+00, 1.099401802630221914e+00, +1.102382583307840891e+00, 1.105371445701741173e+00, 1.108368411723678726e+00, +1.111373503344817548e+00, 1.114386742595892432e+00, 1.117408151567369279e+00, +1.120437752409606746e+00, 1.123475567333019898e+00, 1.126521618608241848e+00, +1.129575928566288079e+00, 1.132638519598719196e+00, 1.135709414157805464e+00, +1.138788634756691565e+00, 1.141876203969561576e+00, 1.144972144431804173e+00, +1.148076478840178938e+00, 1.151189229952982673e+00, 1.154310420590215935e+00, +1.157440073633751121e+00, 1.160578212027498779e+00, 1.163724858777577476e+00, +1.166880036952481658e+00, 1.170043769683250190e+00, 1.173216080163637320e+00, +1.176396991650281221e+00, 1.179586527462875845e+00, 1.182784710984341014e+00, +1.185991565660993841e+00, 1.189207115002721027e+00, 1.192431382583151178e+00, +1.195664392039827328e+00, 1.198906167074380580e+00, 1.202156731452703076e+00, +1.205416109005123859e+00, 1.208684323626581625e+00, 1.211961399276801243e+00, +1.215247359980468955e+00, 1.218542229827408452e+00, 1.221846032972757623e+00, +1.225158793637145527e+00, 1.228480536106870025e+00, 1.231811284734075862e+00, +1.235151063936933413e+00, 1.238499898199816540e+00, 1.241857812073484002e+00, +1.245224830175257980e+00, 1.248600977189204819e+00, 1.251986277866316222e+00, +1.255380757024691096e+00, 1.258784439549716527e+00, 1.262197350394250739e+00, +1.265619514578806282e+00, 1.269050957191733220e+00, 1.272491703389402762e+00, +1.275941778396392001e+00, 1.279401207505669325e+00, 1.282870016078778264e+00, +1.286348229546025568e+00, 1.289835873406665723e+00, 1.293332973229089466e+00, +1.296839554651009641e+00, 1.300355643379650594e+00, 1.303881265191935812e+00, +1.307416445934677318e+00, 1.310961211524764414e+00, 1.314515587949354636e+00, +1.318079601266064049e+00, 1.321653277603157539e+00, 1.325236643159741323e+00, +1.328829724205954355e+00, 1.332432547083161500e+00, 1.336045138204145832e+00, +1.339667524053302916e+00, 1.343299731186835322e+00, 1.346941786232945804e+00, +1.350593715892034474e+00, 1.354255546936892651e+00, 1.357927306212901142e+00, +1.361609020638224754e+00, 1.365300717204011915e+00, 1.369002422974590516e+00, +1.372714165087668414e+00, 1.376435970754530169e+00, 1.380167867260237990e+00, +1.383909881963832023e+00, 1.387662042298529075e+00, 1.391424375771926236e+00, +1.395196909966200272e+00, 1.398979672538311236e+00, 1.402772691220204759e+00, +1.406575993819015435e+00, 1.410389608217270663e+00, 1.414213562373095145e+00, +1.418047884320415175e+00, 1.421892602169165576e+00, 1.425747744105494208e+00, +1.429613338391970023e+00, 1.433489413367788901e+00, 1.437375997448982368e+00, +1.441273119128625657e+00, 1.445180806977046650e+00, 1.449099089642035043e+00, +1.453027995849052623e+00, 1.456967554401443765e+00, 1.460917794180647045e+00, +1.464878744146405731e+00, 1.468850433336981842e+00, 1.472832890869367528e+00, +1.476826145939499346e+00, 1.480830227822471867e+00, 1.484845165872752393e+00, +1.488870989524397004e+00, 1.492907728291264835e+00, 1.496955411767235455e+00, +1.501014069626425584e+00, 1.505083731623406473e+00, 1.509164427593422841e+00, +1.513256187452609813e+00, 1.517359041198214742e+00, 1.521473018908814590e+00, +1.525598150744538417e+00, 1.529734466947286986e+00, 1.533881997840955913e+00, +1.538040773831656827e+00, 1.542210825407940744e+00, 1.546392183141021448e+00, +1.550584877684999974e+00, 1.554788939777088652e+00, 1.559004400237836929e+00, +1.563231289971357629e+00, 1.567469639965552997e+00, 1.571719481292341403e+00, +1.575980845107886497e+00, 1.580253762652824578e+00, 1.584538265252493749e+00, +1.588834384317163950e+00, 1.593142151342266999e+00, 1.597461597908627073e+00, +1.601792755682693414e+00, 1.606135656416771029e+00, 1.610490331949254283e+00, +1.614856814204860713e+00, 1.619235135194863728e+00, 1.623625327017328868e+00, +1.628027421857347834e+00, 1.632441451987274972e+00, 1.636867449766964411e+00, +1.641305447644006321e+00, 1.645755478153964946e+00, 1.650217573920617742e+00, +1.654691767656194301e+00, 1.659178092161616158e+00, 1.663676580326736376e+00, +1.668187265130582464e+00, 1.672710179641596628e+00, 1.677245357017878469e+00, +1.681792830507429004e+00, 1.686352633448393368e+00, 1.690924799269305279e+00, +1.695509361489332623e+00, 1.700106353718523478e+00, 1.704715809658051251e+00, +1.709337763100462926e+00, 1.713972247929925974e+00, 1.718619298122477934e+00, +1.723278947746273992e+00, 1.727951230961837670e+00, 1.732636182022311067e+00, +1.737333835273706217e+00, 1.742044225155156445e+00, 1.746767386199169048e+00, +1.751503353031878207e+00, 1.756252160373299454e+00, 1.761013843037583904e+00, +1.765788435933272726e+00, 1.770575974063554714e+00, 1.775376492526521188e+00, +1.780190026515424462e+00, 1.785016611318934965e+00, 1.789856282321401038e+00, +1.794709075003107168e+00, 1.799575024940535117e+00, 1.804454167806623932e+00, +1.809346539371031959e+00, 1.814252175500398856e+00, 1.819171112158608494e+00, +1.824103385407053413e+00, 1.829049031404897274e+00, 1.834008086409342431e+00, +1.838980586775893711e+00, 1.843966568958625984e+00, 1.848966069510450838e+00, +1.853979125083385471e+00, 1.859005772428820480e+00, 1.864046048397788979e+00, +1.869099989941238604e+00, 1.874167634110299963e+00, 1.879249018056560194e+00, +1.884344179032334532e+00, 1.889453154390939194e+00, 1.894575981586965607e+00, +1.899712698176555303e+00, 1.904863341817674138e+00, 1.910027950270389852e+00, +1.915206561397147400e+00, 1.920399213163047403e+00, 1.925605943636125028e+00, +1.930826790987627106e+00, 1.936061793492294347e+00, 1.941310989528640452e+00, +1.946574417579233218e+00, 1.951852116230978318e+00, 1.957144124175400179e+00, +1.962450480208927317e+00, 1.967771223233175881e+00, 1.973106392255234320e+00, +1.978456026387950928e+00, 1.983820164850219392e+00, 1.989198846967266343e+00, +1.994592112170940235e+00 +}; + +static const double __TBL_log2f[] = { + /* __TBL_log2f[2*i] = 256*log2(1+i/128), i = [0, 128] */ + /* __TBL_log2f[2*i+1] = 2**(-23)/(1+i/128), i = [0, 128] */ +0.000000000000000000e+00, 1.192092895507812500e-07, 2.874177388353054585e+00, +1.182851865310077503e-07, 5.726160135284354524e+00, 1.173753004807692373e-07, +8.556288393587271557e+00, 1.164793058206106825e-07, 1.136489455576407970e+01, +1.155968868371212153e-07, 1.415230348830453799e+01, 1.147277373120300688e-07, +1.691883275718974389e+01, 1.138715601679104456e-07, 1.966479284501270897e+01, +1.130280671296296339e-07, 2.239048736008688678e+01, 1.121969784007352926e-07, +2.509621323789484038e+01, 1.113780223540145949e-07, 2.778226093521127638e+01, +1.105709352355072477e-07, 3.044891461721790193e+01, 1.097754608812949697e-07, +3.309645233791141550e+01, 1.089913504464285680e-07, 3.572514621409114710e+01, +1.082183621453900683e-07, 3.833526259319860685e+01, 1.074562610035211292e-07, +4.092706221526768928e+01, 1.067048186188811188e-07, 4.350080036923196758e+01, +1.059638129340277719e-07, 4.605672704382322280e+01, 1.052330280172413778e-07, +4.859508707328441091e+01, 1.045122538527397202e-07, 5.111612027810928538e+01, +1.038012861394557784e-07, 5.362006160101114460e+01, 1.030999260979729787e-07, +5.610714123831336053e+01, 1.024079802852348971e-07, 5.857758476694550609e+01, +1.017252604166666732e-07, 6.103161326722020164e+01, 1.010515831953642383e-07, +6.346944344155788542e+01, 1.003867701480263102e-07, 6.589128772931884725e+01, +9.973064746732026447e-08, 6.829735441789475203e+01, 9.908304586038961692e-08, +7.068784775020480993e+01, 9.844380040322580637e-08, 7.306296802873558249e+01, +9.781275040064102225e-08, 7.542291171625650748e+01, 9.718973925159236158e-08, +7.776787153333835079e+01, 9.657461431962025166e-08, 8.009803655279496581e+01, +9.596722680817610579e-08, 8.241359229116476115e+01, 9.536743164062500529e-08, +8.471472079734193983e+01, 9.477508734472049048e-08, 8.700160073846393516e+01, +9.419005594135801946e-08, 8.927440748315585495e+01, 9.361220283742331508e-08, +9.153331318222942059e+01, 9.304139672256097884e-08, 9.377848684692884262e+01, +9.247750946969696962e-08, 9.601009442481273481e+01, 9.192041603915663129e-08, +9.822829887335737453e+01, 9.136999438622755046e-08, 1.004332602313626381e+02, +9.082612537202380448e-08, 1.026251356882391832e+02, 9.028869267751479078e-08, +1.048040796512516550e+02, 8.975758272058823405e-08, 1.069702438107898530e+02, +8.923268457602338686e-08, 1.091237772037370775e+02, 8.871388989825581272e-08, +1.112648262750015107e+02, 8.820109284682080489e-08, 1.133935349372744383e+02, +8.769419001436781487e-08, 1.155100446290761766e+02, 8.719308035714285707e-08, +1.176144943711480977e+02, 8.669766512784091150e-08, 1.197070208212473403e+02, +8.620784781073446298e-08, 1.217877583273978246e+02, 8.572353405898876167e-08, +1.238568389796496376e+02, 8.524463163407821503e-08, 1.259143926603967287e+02, +8.477105034722222546e-08, 1.279605470933005762e+02, 8.430270200276242743e-08, +1.299954278908662388e+02, 8.383950034340659995e-08, 1.320191586007148601e+02, +8.338136099726775949e-08, 1.340318607505952855e+02, 8.292820142663043248e-08, +1.360336538921758915e+02, 8.247994087837838296e-08, 1.380246556436560468e+02, +8.203650033602151192e-08, 1.400049817312349774e+02, 8.159780247326202734e-08, +1.419747460294751704e+02, 8.116377160904255122e-08, 1.439340606005945915e+02, +8.073433366402115954e-08, 1.458830357327226466e+02, 8.030941611842105082e-08, +1.478217799771516638e+02, 7.988894797120419333e-08, 1.497504001846159838e+02, +7.947285970052082892e-08, 1.516690015406285852e+02, 7.906108322538860398e-08, +1.535776875999046922e+02, 7.865355186855669953e-08, 1.554765603199003294e+02, +7.825020032051282044e-08, 1.573657200934933087e+02, 7.785096460459183052e-08, +1.592452657808323124e+02, 7.745578204314720208e-08, 1.611152947403800511e+02, +7.706459122474748130e-08, 1.629759028591741128e+02, 7.667733197236181018e-08, +1.648271845823295223e+02, 7.629394531250000159e-08, 1.666692329418057170e+02, +7.591437344527363039e-08, 1.685021395844594565e+02, 7.553855971534653557e-08, +1.703259947994051231e+02, 7.516644858374384321e-08, 1.721408875447028777e+02, +7.479798560049019504e-08, 1.739469054733941960e+02, 7.443311737804878042e-08, +1.757441349589039135e+02, 7.407179156553397416e-08, 1.775326611198272531e+02, +7.371395682367149407e-08, 1.793125678441195987e+02, 7.335956280048077330e-08, +1.810839378127059831e+02, 7.300856010765549954e-08, 1.828468525225273993e+02, +7.266090029761905417e-08, 1.846013923090393973e+02, 7.231653584123223301e-08, +1.863476363681789962e+02, 7.197542010613207272e-08, 1.880856627778145764e+02, +7.163750733568075279e-08, 1.898155485186936176e+02, 7.130275262850466758e-08, +1.915373694949018386e+02, 7.097111191860465018e-08, 1.932512005538479514e+02, +7.064254195601851460e-08, 1.949571155057867031e+02, 7.031700028801843312e-08, +1.966551871428931406e+02, 6.999444524082569196e-08, 1.983454872579004018e+02, +6.967483590182648015e-08, 2.000280866623128588e+02, 6.935813210227272390e-08, +2.017030552042064926e+02, 6.904429440045249486e-08, 2.033704617856271284e+02, +6.873328406531531472e-08, 2.050303743795980154e+02, 6.842506306053811558e-08, +2.066828600467466401e+02, 6.811959402901785336e-08, 2.083279849515614899e+02, +6.781684027777777772e-08, 2.099658143782880586e+02, 6.751676576327433535e-08, +2.115964127464742432e+02, 6.721933507709251725e-08, 2.132198436261738550e+02, +6.692451343201754014e-08, 2.148361697528176535e+02, 6.663226664847161225e-08, +2.164454530417600608e+02, 6.634256114130434863e-08, 2.180477546025107358e+02, +6.605536390692640687e-08, 2.196431347526584545e+02, 6.577064251077586116e-08, +2.212316530314957390e+02, 6.548836507510729591e-08, 2.228133682133515663e+02, +6.520850026709402365e-08, 2.243883383206399174e+02, 6.493101728723404362e-08, +2.259566206366313565e+02, 6.465588585805084723e-08, 2.275182717179543204e+02, +6.438307621308016336e-08, 2.290733474068335340e+02, 6.411255908613445100e-08, +2.306219028430716378e+02, 6.384430570083681460e-08, 2.321639924757807307e+02, +6.357828776041666578e-08, 2.336996700748701699e+02, 6.331447743775933615e-08, +2.352289887422961954e+02, 6.305284736570248109e-08, 2.367520009230799189e+02, +6.279337062757202180e-08, 2.382687584160988763e+02, 6.253602074795082293e-08, +2.397793123846580556e+02, 6.228077168367347501e-08, 2.412837133668454044e+02, +6.202759781504065697e-08, 2.427820112856774699e+02, 6.177647393724696421e-08, +2.442742554590400630e+02, 6.152737525201612732e-08, 2.457604946094287186e+02, +6.128027735943774537e-08, 2.472407768734942692e+02, 6.103515625000000127e-08, +2.487151498113976231e+02, 6.079198829681274795e-08, 2.501836604159786077e+02, +6.055075024801586965e-08, 2.516463551217433974e+02, 6.031141921936758485e-08, +2.531032798136744475e+02, 6.007397268700787318e-08, 2.545544798358676246e+02, +5.983838848039215603e-08, 2.560000000000000000e+02, 5.960464477539062500e-08 +}; + +static const double __TBL_expfb[] = { +7.006492321624085355e-46, 1.401298464324817071e-45, 2.802596928649634142e-45, +5.605193857299268284e-45, 1.121038771459853657e-44, 2.242077542919707313e-44, +4.484155085839414627e-44, 8.968310171678829254e-44, 1.793662034335765851e-43, +3.587324068671531702e-43, 7.174648137343063403e-43, 1.434929627468612681e-42, +2.869859254937225361e-42, 5.739718509874450723e-42, 1.147943701974890145e-41, +2.295887403949780289e-41, 4.591774807899560578e-41, 9.183549615799121156e-41, +1.836709923159824231e-40, 3.673419846319648462e-40, 7.346839692639296925e-40, +1.469367938527859385e-39, 2.938735877055718770e-39, 5.877471754111437540e-39, +1.175494350822287508e-38, 2.350988701644575016e-38, 4.701977403289150032e-38, +9.403954806578300064e-38, 1.880790961315660013e-37, 3.761581922631320025e-37, +7.523163845262640051e-37, 1.504632769052528010e-36, 3.009265538105056020e-36, +6.018531076210112041e-36, 1.203706215242022408e-35, 2.407412430484044816e-35, +4.814824860968089633e-35, 9.629649721936179265e-35, 1.925929944387235853e-34, +3.851859888774471706e-34, 7.703719777548943412e-34, 1.540743955509788682e-33, +3.081487911019577365e-33, 6.162975822039154730e-33, 1.232595164407830946e-32, +2.465190328815661892e-32, 4.930380657631323784e-32, 9.860761315262647568e-32, +1.972152263052529514e-31, 3.944304526105059027e-31, 7.888609052210118054e-31, +1.577721810442023611e-30, 3.155443620884047222e-30, 6.310887241768094443e-30, +1.262177448353618889e-29, 2.524354896707237777e-29, 5.048709793414475555e-29, +1.009741958682895111e-28, 2.019483917365790222e-28, 4.038967834731580444e-28, +8.077935669463160887e-28, 1.615587133892632177e-27, 3.231174267785264355e-27, +6.462348535570528710e-27, 1.292469707114105742e-26, 2.584939414228211484e-26, +5.169878828456422968e-26, 1.033975765691284594e-25, 2.067951531382569187e-25, +4.135903062765138374e-25, 8.271806125530276749e-25, 1.654361225106055350e-24, +3.308722450212110699e-24, 6.617444900424221399e-24, 1.323488980084844280e-23, +2.646977960169688560e-23, 5.293955920339377119e-23, 1.058791184067875424e-22, +2.117582368135750848e-22, 4.235164736271501695e-22, 8.470329472543003391e-22, +1.694065894508600678e-21, 3.388131789017201356e-21, 6.776263578034402713e-21, +1.355252715606880543e-20, 2.710505431213761085e-20, 5.421010862427522170e-20, +1.084202172485504434e-19, 2.168404344971008868e-19, 4.336808689942017736e-19, +8.673617379884035472e-19, 1.734723475976807094e-18, 3.469446951953614189e-18, +6.938893903907228378e-18, 1.387778780781445676e-17, 2.775557561562891351e-17, +5.551115123125782702e-17, 1.110223024625156540e-16, 2.220446049250313081e-16, +4.440892098500626162e-16, 8.881784197001252323e-16, 1.776356839400250465e-15, +3.552713678800500929e-15, 7.105427357601001859e-15, 1.421085471520200372e-14, +2.842170943040400743e-14, 5.684341886080801487e-14, 1.136868377216160297e-13, +2.273736754432320595e-13, 4.547473508864641190e-13, 9.094947017729282379e-13, +1.818989403545856476e-12, 3.637978807091712952e-12, 7.275957614183425903e-12, +1.455191522836685181e-11, 2.910383045673370361e-11, 5.820766091346740723e-11, +1.164153218269348145e-10, 2.328306436538696289e-10, 4.656612873077392578e-10, +9.313225746154785156e-10, 1.862645149230957031e-09, 3.725290298461914062e-09, +7.450580596923828125e-09, 1.490116119384765625e-08, 2.980232238769531250e-08, +5.960464477539062500e-08, 1.192092895507812500e-07, 2.384185791015625000e-07, +4.768371582031250000e-07, 9.536743164062500000e-07, 1.907348632812500000e-06, +3.814697265625000000e-06, 7.629394531250000000e-06, 1.525878906250000000e-05, +3.051757812500000000e-05, 6.103515625000000000e-05, 1.220703125000000000e-04, +2.441406250000000000e-04, 4.882812500000000000e-04, 9.765625000000000000e-04, +1.953125000000000000e-03, 3.906250000000000000e-03, 7.812500000000000000e-03, +1.562500000000000000e-02, 3.125000000000000000e-02, 6.250000000000000000e-02, +1.250000000000000000e-01, 2.500000000000000000e-01, 5.000000000000000000e-01, +1.000000000000000000e+00, 2.000000000000000000e+00, 4.000000000000000000e+00, +8.000000000000000000e+00, 1.600000000000000000e+01, 3.200000000000000000e+01, +6.400000000000000000e+01, 1.280000000000000000e+02, 2.560000000000000000e+02, +5.120000000000000000e+02, 1.024000000000000000e+03, 2.048000000000000000e+03, +4.096000000000000000e+03, 8.192000000000000000e+03, 1.638400000000000000e+04, +3.276800000000000000e+04, 6.553600000000000000e+04, 1.310720000000000000e+05, +2.621440000000000000e+05, 5.242880000000000000e+05, 1.048576000000000000e+06, +2.097152000000000000e+06, 4.194304000000000000e+06, 8.388608000000000000e+06, +1.677721600000000000e+07, 3.355443200000000000e+07, 6.710886400000000000e+07, +1.342177280000000000e+08, 2.684354560000000000e+08, 5.368709120000000000e+08, +1.073741824000000000e+09, 2.147483648000000000e+09, 4.294967296000000000e+09, +8.589934592000000000e+09, 1.717986918400000000e+10, 3.435973836800000000e+10, +6.871947673600000000e+10, 1.374389534720000000e+11, 2.748779069440000000e+11, +5.497558138880000000e+11, 1.099511627776000000e+12, 2.199023255552000000e+12, +4.398046511104000000e+12, 8.796093022208000000e+12, 1.759218604441600000e+13, +3.518437208883200000e+13, 7.036874417766400000e+13, 1.407374883553280000e+14, +2.814749767106560000e+14, 5.629499534213120000e+14, 1.125899906842624000e+15, +2.251799813685248000e+15, 4.503599627370496000e+15, 9.007199254740992000e+15, +1.801439850948198400e+16, 3.602879701896396800e+16, 7.205759403792793600e+16, +1.441151880758558720e+17, 2.882303761517117440e+17, 5.764607523034234880e+17, +1.152921504606846976e+18, 2.305843009213693952e+18, 4.611686018427387904e+18, +9.223372036854775808e+18, 1.844674407370955162e+19, 3.689348814741910323e+19, +7.378697629483820646e+19, 1.475739525896764129e+20, 2.951479051793528259e+20, +5.902958103587056517e+20, 1.180591620717411303e+21, 2.361183241434822607e+21, +4.722366482869645214e+21, 9.444732965739290427e+21, 1.888946593147858085e+22, +3.777893186295716171e+22, 7.555786372591432342e+22, 1.511157274518286468e+23, +3.022314549036572937e+23, 6.044629098073145874e+23, 1.208925819614629175e+24, +2.417851639229258349e+24, 4.835703278458516699e+24, 9.671406556917033398e+24, +1.934281311383406680e+25, 3.868562622766813359e+25, 7.737125245533626718e+25, +1.547425049106725344e+26, 3.094850098213450687e+26, 6.189700196426901374e+26, +1.237940039285380275e+27, 2.475880078570760550e+27, 4.951760157141521100e+27, +9.903520314283042199e+27, 1.980704062856608440e+28, 3.961408125713216880e+28, +7.922816251426433759e+28, 1.584563250285286752e+29, 3.169126500570573504e+29, +6.338253001141147007e+29, 1.267650600228229401e+30, 2.535301200456458803e+30, +5.070602400912917606e+30, 1.014120480182583521e+31, 2.028240960365167042e+31, +4.056481920730334085e+31, 8.112963841460668170e+31, 1.622592768292133634e+32, +3.245185536584267268e+32, 6.490371073168534536e+32, 1.298074214633706907e+33, +2.596148429267413814e+33, 5.192296858534827629e+33, 1.038459371706965526e+34, +2.076918743413931051e+34, 4.153837486827862103e+34, 8.307674973655724206e+34, +1.661534994731144841e+35, 3.323069989462289682e+35, 6.646139978924579365e+35, +1.329227995784915873e+36, 2.658455991569831746e+36, 5.316911983139663492e+36, +1.063382396627932698e+37, 2.126764793255865397e+37, 4.253529586511730793e+37, +8.507059173023461587e+37, 1.701411834604692317e+38, 3.402823669209384635e+38 +}; + +static const double + KA3 = -3.60659926599003171364e-01*256.0, + KA2 = 4.80902715189356683026e-01*256.0, + KA1 = -7.21347520569871841065e-01*256.0, + KA0 = 1.44269504088069658645e+00*256.0, + KB2 = 3.66556671660783833261e-06, + KB1 = 2.70760782821392980564e-03, + DONE = 1.0, + HTHRESH = 32768.0, + LTHRESH = -38400.0; + +#define RETURN(ret) \ +{ \ + *pz = (ret); \ + px += stridex; \ + py += stridey; \ + pz += stridez; \ + if ( n_n == 0 ) \ + { \ + spx = px; spy = py; spz = pz; \ + continue; \ + } \ + n--; \ + break; \ +} + +void +__vpowf( int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez ) +{ + float *spx, *spy, *spz; + double y0, yy0; + long long di0; + unsigned ux, sx, uy, ay, ax0; + int exp, i0, ind0, exp0, yisint0, n_n; + +#ifndef NOPOWFIX + if ( stridex == 0 ) + { + unsigned hx = *(unsigned*)px; + + if ( (hx >= 0x00800000) && /* x not zero or subnormal */ + (hx < 0x7f800000) && /* x not inf, nan or negative sign bit */ + (hx != 0x3f800000) ) /* x not 1 */ + { + __vpowfx( n, px, py, stridey, pz, stridez ); + return; + } + } +#endif + + while ( n > 0 ) + { + n_n = 0; + spx = px; + spy = py; + spz = pz; + for ( ; n > 0 ; n-- ) + { + uy = *(unsigned int*)py; + ux = *(unsigned int*)px; + ay = uy & 0x7fffffff; + ax0 = ux & 0x7fffffff; + sx = ux >> 31; + yisint0 = 0; /* Y - non-integer */ + + /* |X| or |Y| = Inf,Nan */ + if ( ax0 >= 0x7f800000 || ay >= 0x7f800000 ) + { + if ( ay == 0 ) + RETURN( 1.0f ) /* pow(X,0) */ + /* |X| or |Y| = Nan */ + if ( ax0 > 0x7f800000 || ay > 0x7f800000 ) + RETURN ( *px + *py ) + if ( ay == 0x7f800000 ) /* |Y| = Inf */ + { + float fy; + if ( ax0 == 0x3f800000 ) + fy = *py - *py; /* +-1 ** +-Inf = NaN */ + else + fy = ( (ax0 < 0x3f800000) != (uy >> 31) ) ? 0.0f : *(float*) &ay; + RETURN( fy ) + } + if ( sx ) /* X = -Inf */ + { + exp = ay >> 23; + if ( exp >= 0x97 ) /* |Y| >= 2^24 */ + yisint0 = 2; /* Y - even */ + else if ( exp >= 0x7f ) /* |Y| >= 1 */ + { + i0 = ay >> ((0x7f + 23) - exp); + if ( (i0 << ((0x7f + 23) - exp)) == ay ) + yisint0 = 2 - (i0 & 1); + } + } + if ( uy >> 31 ) + ax0 = 0; + ax0 += yisint0 << 31; + RETURN( *(float*)&ax0 ) + } + + if ( (int)ux < 0x00800000 ) /* X = denormal or negative */ + { + if ( ay == 0 ) + RETURN( 1.0f ) /* pow(X,0) */ + exp0 = (ax0 >> 23) - 127; + + if ( (int)ax0 < 0x00800000 ) /* X = denormal */ + { + *((float*) &ax0) = (float) (int)ax0; + exp0 = (ax0 >> 23) - (127 + 149); + } + + if ( (int)ux <= 0 ) /* X <= 0 */ + { + exp = ay >> 23; + if ( exp >= 0x97 ) /* |Y| >= 2^24 */ + yisint0 = 2; /* Y - even */ + else if ( exp >= 0x7f ) /* |Y| >= 1 */ + { + i0 = ay >> ((0x7f + 23) - exp); + if ( (i0 << ((0x7f + 23) - exp)) == ay ) + yisint0 = 2 - (i0 & 1); + } + + if ( ax0 == 0 ) /* pow(0,Y) */ + { + float fy; + fy = (uy >> 31) ? 1.0f / 0.0f : 0.0f; + if ( sx & yisint0 ) + fy = -fy; + RETURN( fy ) + } + + if ( yisint0 == 0 ) /* pow(neg,non-integer) */ + RETURN( 0.0f / 0.0f ) /* NaN */ + } + + /* perform yy0 = 256*log2(xi)*yi */ + ax0 &= 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + i0 = ax0 - i0; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + yy0 = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy0 += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + yy0 = (double)py[0] * yy0; + + /* perform 2 ** (yy0/256) */ + if ( yy0 >= HTHRESH ) + yy0 = HTHRESH; + if ( yy0 <= LTHRESH ) + yy0 = LTHRESH; + ind0 = (int) yy0; + y0 = yy0 - (double)ind0; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + di0 = ((long long)((ind0 >> 8) + (yisint0 << 11))) << 52; + di0 += ((long long*)__TBL_exp2f)[ind0 & 255]; + RETURN( (float) (yy0 * *(double*)&di0) ) + } + px += stridex; + py += stridey; + pz += stridez; + n_n++; + } + if ( n_n > 0 ) + __vpowf_n( n_n, spx, stridex, spy, stridey, spz, stridez ); + } +} + + +static void +__vpowf_n( int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez ) +{ + double y0, yy0; + double di0; + int ind0, i0, exp0; + unsigned ax0; + double y1, yy1; + double di1; + int ind1, i1, exp1; + unsigned ax1; + double y2, yy2; + double di2; + int ind2, i2, exp2; + unsigned ax2; + + for ( ; n > 2 ; n -= 3 ) + { + /* perform yy0 = 256*log2(xi)*yi */ + ax0 = ((int*)px)[0]; + px += stridex; + ax1 = ((int*)px)[0]; + px += stridex; + ax2 = ((int*)px)[0]; + px += stridex; + exp0 = ((ax0 & 0x7fffffff) >> 23) - 127; + exp1 = ((ax1 & 0x7fffffff) >> 23) - 127; + exp2 = ((ax2 & 0x7fffffff) >> 23) - 127; + ax0 &= 0x007fffff; + ax1 &= 0x007fffff; + ax2 &= 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + i1 = (ax1 + 0x8000) & 0xffff0000; + i2 = (ax2 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + ind1 = i1 >> 15; + ind2 = i2 >> 15; + i0 = ax0 - i0; + i1 = ax1 - i1; + i2 = ax2 - i2; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + y1 = (double) i1 * __TBL_log2f[ind1 + 1]; + y2 = (double) i2 * __TBL_log2f[ind2 + 1]; + yy0 = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy1 = __TBL_log2f[ind1] + (double) (exp1 << 8); + yy2 = __TBL_log2f[ind2] + (double) (exp2 << 8); + yy0 += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + yy1 += (((KA3 * y1 + KA2) * y1 + KA1) * y1 + KA0) * y1; + yy2 += (((KA3 * y2 + KA2) * y2 + KA1) * y2 + KA0) * y2; + yy0 = (double)py[0] * yy0; + py += stridey; + yy1 = (double)py[0] * yy1; + py += stridey; + yy2 = (double)py[0] * yy2; + py += stridey; + + /* perform 2 ** (yy0/256) */ + if ( yy0 >= HTHRESH ) + yy0 = HTHRESH; + if ( yy0 <= LTHRESH ) + yy0 = LTHRESH; + if ( yy1 >= HTHRESH ) + yy1 = HTHRESH; + if ( yy1 <= LTHRESH ) + yy1 = LTHRESH; + if ( yy2 >= HTHRESH ) + yy2 = HTHRESH; + if ( yy2 <= LTHRESH ) + yy2 = LTHRESH; + + ind0 = (int) yy0; + ind1 = (int) yy1; + ind2 = (int) yy2; + y0 = yy0 - (double)ind0; + y1 = yy1 - (double)ind1; + y2 = yy2 - (double)ind2; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + yy1 = (KB2 * y1 + KB1) * y1 + DONE; + yy2 = (KB2 * y2 + KB1) * y2 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di1 = (__TBL_expfb + 150)[ind1 >> 8]; + di2 = (__TBL_expfb + 150)[ind2 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + di1 *= __TBL_exp2f[ind1 & 255]; + di2 *= __TBL_exp2f[ind2 & 255]; + pz[0] = (float) (yy0 * di0); + pz += stridez; + pz[0] = (float) (yy1 * di1); + pz += stridez; + pz[0] = (float) (yy2 * di2); + pz += stridez; + } + + for ( ; n > 0 ; n-- ) + { + /* perform yy0 = 256*log2(xi)*yi */ + ax0 = ((int*)px)[0]; + exp0 = ((ax0 & 0x7fffffff) >> 23) - 127; + ax0 &= 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + i0 = ax0 - i0; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + yy0 = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy0 += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + yy0 = (double)py[0] * yy0; + + /* perform 2 ** (yy0/256) */ + if ( yy0 >= HTHRESH ) + yy0 = HTHRESH; + if ( yy0 <= LTHRESH ) + yy0 = LTHRESH; + ind0 = (int) yy0; + y0 = yy0 - (double)ind0; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + pz[0] = (float) (yy0 * di0); + px += stridex; + py += stridey; + pz += stridez; + } +} + + +static void +__vpowfx( int n, float * restrict px, float * restrict py, + int stridey, float * restrict pz, int stridez ) +{ + float *spy, *spz; + double yy, y0; + int ind0, exp0, i0, n_n; + unsigned ux, ax, ax0, uy, ay; + + /* perform yy = 256*log2(xi)*yi */ + ux = *(unsigned int*)px; + ax = ux & 0x7fffffff; + exp0 = (ax >> 23) - 127; + ax0 = ux & 0x007fffff; + i0 = (ax0 + 0x8000) & 0xffff0000; + ind0 = i0 >> 15; + i0 = ax0 - i0; + y0 = (double) i0 * __TBL_log2f[ind0 + 1]; + yy = __TBL_log2f[ind0] + (double) (exp0 << 8); + yy += (((KA3 * y0 + KA2) * y0 + KA1) * y0 + KA0) * y0; + + while ( n > 0 ) + { + n_n = 0; + spy = py; + spz = pz; + for ( ; n > 0 ; n-- ) + { + uy = *(unsigned int*)py; + ay = uy & 0x7fffffff; + + if ( ay >= 0x7f800000 ) /* |Y| = Inf or Nan */ + { + float fy; + if ( ay > 0x7f800000 ) + fy = *py + *py; /* |Y| = Nan */ + else + fy = ( (ax < 0x3f800000) != (uy >> 31) ) ? 0.0f : *(float*)&ay; + *pz = fy; + py += stridey; + pz += stridez; + if ( n_n == 0 ) + { + spy = py; + spz = pz; + continue; + } + n--; + break; + } + py += stridey; + pz += stridez; + n_n++; + } + if ( n_n > 0 ) + __vpowfx_n( n_n, yy, spy, stridey, spz, stridez ); + } +} + + +static void +__vpowfx_n( int n, double yy, float * restrict py, + int stridey, float * restrict pz, int stridez ) +{ + double y0, yy0, di0; + double y1, yy1, di1; + double y2, yy2, di2; + int ind0, ind1, ind2; + + for ( ; n > 2 ; n-= 3 ) + { + /* perform 2 ** (yy/256) */ + yy0 = (double)py[0] * yy; + py += stridey; + yy1 = (double)py[0] * yy; + py += stridey; + yy2 = (double)py[0] * yy; + py += stridey; + if ( yy0 >= HTHRESH ) + yy0 = HTHRESH; + if ( yy0 <= LTHRESH ) + yy0 = LTHRESH; + if ( yy1 >= HTHRESH ) + yy1 = HTHRESH; + if ( yy1 <= LTHRESH ) + yy1 = LTHRESH; + if ( yy2 >= HTHRESH ) + yy2 = HTHRESH; + if ( yy2 <= LTHRESH ) + yy2 = LTHRESH; + ind0 = (int) yy0; + ind1 = (int) yy1; + ind2 = (int) yy2; + y0 = yy0 - (double)ind0; + y1 = yy1 - (double)ind1; + y2 = yy2 - (double)ind2; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + yy1 = (KB2 * y1 + KB1) * y1 + DONE; + yy2 = (KB2 * y2 + KB1) * y2 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di1 = (__TBL_expfb + 150)[ind1 >> 8]; + di2 = (__TBL_expfb + 150)[ind2 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + di1 *= __TBL_exp2f[ind1 & 255]; + di2 *= __TBL_exp2f[ind2 & 255]; + pz[0] = (float) (yy0 * di0); + pz += stridez; + pz[0] = (float) (yy1 * di1); + pz += stridez; + pz[0] = (float) (yy2 * di2); + pz += stridez; + } + for ( ; n > 0 ; n-- ) + { + /* perform 2 ** (yy/256) */ + yy0 = (double)py[0] * yy; + if ( yy0 >= HTHRESH ) + yy0 = HTHRESH; + if ( yy0 <= LTHRESH ) + yy0 = LTHRESH; + ind0 = (int) yy0; + y0 = yy0 - (double)ind0; + yy0 = (KB2 * y0 + KB1) * y0 + DONE; + di0 = (__TBL_expfb + 150)[ind0 >> 8]; + di0 *= __TBL_exp2f[ind0 & 255]; + pz[0] = (float) (yy0 * di0); + py += stridey; + pz += stridez; + } +} diff --git a/usr/src/libm/src/mvec/__vrem_pio2m.c b/usr/src/libm/src/mvec/__vrem_pio2m.c new file mode 100644 index 0000000..449f693 --- /dev/null +++ b/usr/src/libm/src/mvec/__vrem_pio2m.c @@ -0,0 +1,308 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vrem_pio2m.c 1.8 06/01/31 SMI" + +/* + * Given X, __vlibm_rem_pio2m finds Y and an integer n such that + * Y = X - n*pi/2 and |Y| < pi/2. + * + * On entry, X is represented by x, an array of nx 24-bit integers + * stored in double precision format, and e: + * + * X = sum (x[i] * 2^(e - 24*i)) + * + * nx must be 1, 2, or 3, and e must be >= -24. For example, a + * suitable representation for the double precision number z can + * be computed as follows: + * + * e = ilogb(z)-23 + * z = scalbn(z,-e) + * for i = 0,1,2 + * x[i] = floor(z) + * z = (z-x[i])*2**24 + * + * On exit, Y is approximated by y[0] if prec is 0 and by the un- + * evaluated sum y[0] + y[1] if prec != 0. The approximation is + * accurate to 53 bits in the former case and to at least 72 bits + * in the latter. + * + * __vlibm_rem_pio2m returns n mod 8. + * + * Notes: + * + * As n is the integer nearest X * 2/pi, we approximate the latter + * product to a precision that is determined dynamically so as to + * ensure that the final value Y is approximated accurately enough. + * We don't bother to compute terms in the product that are multiples + * of 8, so the cost of this multiplication is independent of the + * magnitude of X. The variable ip determines the offset into the + * array ipio2 of the first term we need to use. The variable eq0 + * is the corresponding exponent of the first partial product. + * + * The partial products are scaled, summed, and split into an array + * of non-overlapping 24-bit terms (not necessarily having the same + * signs). Each partial product overlaps three elements of the + * resulting array: + * + * q[i] xxxxxxxxxxxxxx + * q[i+1] xxxxxxxxxxxxxx + * q[i+2] xxxxxxxxxxxxxx + * ... ... + * + * + * r[i] xxxxxx + * r[i+1] xxxxxx + * r[i+2] xxxxxx + * ... ... + * + * In order that the last element of the r array have some correct + * bits, we compute an extra term in the q array, but we don't bother + * to split this last term into 24-bit chunks; thus, the final term + * of the r array could have more than 24 bits, but this doesn't + * matter. + * + * After we subtract the nearest integer to the product, we multiply + * the remaining part of r by pi/2 to obtain Y. Before we compute + * this last product, however, we make sure that the remaining part + * of r has at least five nonzero terms, computing more if need be. + * This ensures that even if the first nonzero term is only a single + * bit and the last term is wrong in several trailing bits, we still + * have enough accuracy to obtain 72 bits of Y. + * + * IMPORTANT: This code assumes that the rounding mode is round-to- + * nearest in several key places. First, after we compute X * 2/pi, + * we round to the nearest integer by adding and subtracting a power + * of two. This step must be done in round-to-nearest mode to ensure + * that the remainder is less than 1/2 in absolute value. (Because + * we only take two adjacent terms of r into account when we perform + * this rounding, in very rare cases the remainder could be just + * barely greater than 1/2, but this shouldn't matter in practice.) + * + * Second, we also split the partial products of X * 2/pi into 24-bit + * pieces by adding and subtracting a power of two. In this step, + * round-to-nearest mode is important in order to guarantee that + * the index of the first nonzero term in the remainder gives an + * accurate indication of the number of significant terms. For + * example, suppose eq0 = -1, so that r[1] is a multiple of 1/2 and + * |r[2]| < 1/2. After we subtract the nearest integer, r[1] could + * be -1/2, and r[2] could be very nearly 1/2, so that r[1] != 0, + * yet the remainder is much smaller than the least significant bit + * corresponding to r[1]. As long as we use round-to-nearest mode, + * this can't happen; instead, the absolute value of each r[j] will + * be less than 1/2 the least significant bit corresponding to r[j-1], + * so that the entire remainder must be at least half as large as + * the first nonzero term (or perhaps just barely smaller than this). + */ + +#include + +#ifdef _LITTLE_ENDIAN +#define HIWORD 1 +#define LOWORD 0 +#else +#define HIWORD 0 +#define LOWORD 1 +#endif + +/* 396 hex digits of 2/pi, with two leading zeroes to make life easier */ +static const double ipio2[] = { + 0, 0, + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, +}; + +/* pi/2 in 24-bit pieces */ +static const double pio2[] = { + 1.57079625129699707031e+00, + 7.54978941586159635335e-08, + 5.39030252995776476554e-15, + 3.28200341580791294123e-22, + 1.27065575308067607349e-29, +}; + +/* miscellaneous constants */ +static const double + zero = 0.0, + two24 = 16777216.0, + round1 = 6755399441055744.0, /* 3 * 2^51 */ + round24 = 113336795588871485128704.0, /* 3 * 2^75 */ + twon24 = 5.960464477539062500E-8; + +int +__vlibm_rem_pio2m(double *x, double *y, int e, int nx, int prec) +{ + union { + double d; + int i[2]; + } s; + double z, t, p, q[20], r[21], *pr; + int nq, ip, n, i, j, k, eq0, eqnqm1; + + /* determine ip and eq0; note that -48 <= eq0 <= 2 */ + ip = (e - 3) / 24; + if (ip < 0) + ip = 0; + eq0 = e - 24 * (ip + 1); + + /* compute q[0,...,5] = x * ipio2 and initialize nq and eqnqm1 */ + if (nx == 3) { + q[0] = x[0] * ipio2[ip+2] + x[1] * ipio2[ip+1] + x[2] * ipio2[ip]; + q[1] = x[0] * ipio2[ip+3] + x[1] * ipio2[ip+2] + x[2] * ipio2[ip+1]; + q[2] = x[0] * ipio2[ip+4] + x[1] * ipio2[ip+3] + x[2] * ipio2[ip+2]; + q[3] = x[0] * ipio2[ip+5] + x[1] * ipio2[ip+4] + x[2] * ipio2[ip+3]; + q[4] = x[0] * ipio2[ip+6] + x[1] * ipio2[ip+5] + x[2] * ipio2[ip+4]; + q[5] = x[0] * ipio2[ip+7] + x[1] * ipio2[ip+6] + x[2] * ipio2[ip+5]; + } else if (nx == 2) { + q[0] = x[0] * ipio2[ip+2] + x[1] * ipio2[ip+1]; + q[1] = x[0] * ipio2[ip+3] + x[1] * ipio2[ip+2]; + q[2] = x[0] * ipio2[ip+4] + x[1] * ipio2[ip+3]; + q[3] = x[0] * ipio2[ip+5] + x[1] * ipio2[ip+4]; + q[4] = x[0] * ipio2[ip+6] + x[1] * ipio2[ip+5]; + q[5] = x[0] * ipio2[ip+7] + x[1] * ipio2[ip+6]; + } else { + q[0] = x[0] * ipio2[ip+2]; + q[1] = x[0] * ipio2[ip+3]; + q[2] = x[0] * ipio2[ip+4]; + q[3] = x[0] * ipio2[ip+5]; + q[4] = x[0] * ipio2[ip+6]; + q[5] = x[0] * ipio2[ip+7]; + } + nq = 5; + eqnqm1 = eq0 - 96; + +recompute: + /* propagate carries and incorporate powers of two */ + s.i[HIWORD] = (0x3ff + eqnqm1) << 20; + s.i[LOWORD] = 0; + p = s.d; + z = q[nq] * twon24; + for (j = nq-1; j >= 1; j--) { + z += q[j]; + t = (z + round24) - round24; /* must be rounded to nearest */ + r[j+1] = (z - t) * p; + z = t * twon24; + p *= two24; + } + z += q[0]; + t = (z + round24) - round24; /* must be rounded to nearest */ + r[1] = (z - t) * p; + r[0] = t * p; + + /* form n = [r] mod 8 and leave the fractional part of r */ + if (eq0 > 0) { + /* binary point lies within r[2] */ + z = r[2] + r[3]; + t = (z + round1) - round1; /* must be rounded to nearest */ + r[2] -= t; + n = (int)(r[1] + t); + r[0] = r[1] = zero; + } else if (eq0 > -24) { + /* binary point lies within or just to the right of r[1] */ + z = r[1] + r[2]; + t = (z + round1) - round1; /* must be rounded to nearest */ + r[1] -= t; + z = r[0] + t; + /* cut off high part of z so conversion to int doesn't + overflow */ + t = (z + round24) - round24; + n = (int)(z - t); + r[0] = zero; + } else { + /* binary point lies within or just to the right of r[0] */ + z = r[0] + r[1]; + t = (z + round1) - round1; /* must be rounded to nearest */ + r[0] -= t; + n = (int)t; + } + + /* count the number of leading zeroes in r */ + for (j = 0; j <= nq; j++) { + if (r[j] != zero) + break; + } + + /* if fewer than 5 terms remain, add more */ + if (nq - j < 4) { + k = 4 - (nq - j); + /* + * compute q[nq+1] to q[nq+k] + * + * For some reason, writing out the nx loop explicitly + * for each of the three possible values (as above) seems + * to run a little slower, so we'll leave this code as is. + */ + for (i = nq + 1; i <= nq + k; i++) { + t = x[0] * ipio2[ip+2+i]; + for (j = 1; j < nx; j++) + t += x[j] * ipio2[ip+2+i-j]; + q[i] = t; + eqnqm1 -= 24; + } + nq += k; + goto recompute; + } + + /* set pr and nq so that pr[0,...,nq] is the part of r remaining */ + pr = &r[j]; + nq = nq - j; + + /* compute pio2 * pr[0,...,nq]; note that nq >= 4 here */ + q[0] = pio2[0] * pr[0]; + q[1] = pio2[0] * pr[1] + pio2[1] * pr[0]; + q[2] = pio2[0] * pr[2] + pio2[1] * pr[1] + pio2[2] * pr[0]; + q[3] = pio2[0] * pr[3] + pio2[1] * pr[2] + pio2[2] * pr[1] + + pio2[3] * pr[0]; + for (i = 4; i <= nq; i++) { + q[i] = pio2[0] * pr[i] + pio2[1] * pr[i-1] + pio2[2] * pr[i-2] + + pio2[3] * pr[i-3] + pio2[4] * pr[i-4]; + } + + /* sum q in increasing order to obtain the first term of y */ + t = q[nq]; + for (i = nq - 1; i >= 0; i--) + t += q[i]; + y[0] = t; + if (prec) { + /* subtract and sum again in decreasing order + to obtain the second term */ + t = q[0] - t; + for (i = 1; i <= nq; i++) + t += q[i]; + y[1] = t; + } + + return (n & 7); +} diff --git a/usr/src/libm/src/mvec/__vrhypot.c b/usr/src/libm/src/mvec/__vrhypot.c new file mode 100644 index 0000000..80e816b --- /dev/null +++ b/usr/src/libm/src/mvec/__vrhypot.c @@ -0,0 +1,428 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vrhypot.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double rhypot(double x, double y) + * + * Method : + * 1. Special cases: + * x or y = Inf => 0 + * x or y = NaN => QNaN + * x and y = 0 => Inf + divide-by-zero + * 2. Computes rhypot(x,y): + * rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm)) + * Where: + * m = 1/max(|x|,|y|) + * xnm = x * m + * ynm = y * m + * + * Compute 1/(xnm * xnm + ynm * ynm) by simulating + * muti-precision arithmetic. + * + * Accuracy: + * Maximum error observed: less than 0.869 ulp after 1.000.000.000 + * results. + */ + +#define sqrt __sqrt + +extern double sqrt( double ); + +extern double fabs( double ); + +static const int __vlibm_TBL_rhypot[] = { +/* i = [0,127] + * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */ + 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, + 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, + 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, + 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, + 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, + 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, + 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, + 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, + 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, + 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, + 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, + 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, + 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, + 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, + 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, + 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, + 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, + 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, + 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, + 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, + 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, + 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, + 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, + 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, + 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, + 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, + 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, + 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, + 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, + 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, + 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, + 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, +}; + +static const unsigned long long LCONST[] = { +0x3ff0000000000000ULL, /* DONE = 1.0 */ +0x4000000000000000ULL, /* DTWO = 2.0 */ +0x4230000000000000ULL, /* D2ON36 = 2**36 */ +0x7fd0000000000000ULL, /* D2ON1022 = 2**1022 */ +0x3cb0000000000000ULL, /* D2ONM52 = 2**-52 */ +}; + +#define RET_SC(I) \ + px += stridex; \ + py += stridey; \ + pz += stridez; \ + if ( --n <= 0 ) \ + break; \ + goto start##I; + +#define RETURN(I, ret) \ +{ \ + pz[0] = (ret); \ + RET_SC(I) \ +} + +#define PREP(I) \ +hx##I = HI(px); \ +hy##I = HI(py); \ +hx##I &= 0x7fffffff; \ +hy##I &= 0x7fffffff; \ +pz##I = pz; \ +if ( hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000 ) /* |X| or |Y| = Inf,NaN */ \ +{ \ + lx = LO(px); \ + ly = LO(py); \ + x = *px; \ + y = *py; \ + if ( hx##I == 0x7ff00000 && lx == 0 ) res0 = 0.0; /* |X| = Inf */ \ + else if ( hy##I == 0x7ff00000 && ly == 0 ) res0 = 0.0; /* |Y| = Inf */ \ + else res0 = fabs(x) + fabs(y); \ + \ + RETURN (I, res0) \ +} \ +x##I = *px; \ +y##I = *py; \ +diff0 = hy##I - hx##I; \ +j0 = diff0 >> 31; \ +if ( hx##I < 0x00100000 && hy##I < 0x00100000 ) /* |X| and |Y| = subnormal or zero */ \ +{ \ + lx = LO(px); \ + ly = LO(py); \ + x = x##I; \ + y = y##I; \ + \ + if ( (hx##I | hy##I | lx | ly) == 0 ) /* |X| and |Y| = 0 */ \ + RETURN (I, DONE / 0.0) \ + \ + x = fabs(x); \ + y = fabs(y); \ + \ + x = *(long long*)&x; \ + y = *(long long*)&y; \ + \ + x *= D2ONM52; \ + y *= D2ONM52; \ + \ + x_hi0 = ( x + D2ON36 ) - D2ON36; \ + y_hi0 = ( y + D2ON36 ) - D2ON36; \ + x_lo0 = x - x_hi0; \ + y_lo0 = y - y_hi0; \ + res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); \ + res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0); \ + \ + dres0 = res0_hi + res0_lo; \ + \ + iarr0 = HI(&dres0); \ + iexp0 = iarr0 & 0xfff00000; \ + \ + iarr0 = (iarr0 >> 11) & 0x1fc; \ + itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; \ + itbl0 -= iexp0; \ + HI(&dd0) = itbl0; \ + LO(&dd0) = 0; \ + \ + dd0 = dd0 * (DTWO - dd0 * dres0); \ + dd0 = dd0 * (DTWO - dd0 * dres0); \ + dres0 = dd0 * (DTWO - dd0 * dres0); \ + \ + HI(&res0) = HI(&dres0) & 0xffffff00; \ + LO(&res0) = 0; \ + res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; \ + res0 = sqrt ( res0 ); \ + \ + res0 = D2ON1022 * res0; \ + RETURN (I, res0) \ +} \ +j0 = hy##I - (diff0 & j0); \ +j0 &= 0x7ff00000; \ +HI(&scl##I) = 0x7ff00000 - j0; + +void +__vrhypot( int n, double * restrict px, int stridex, double * restrict py, + int stridey, double * restrict pz, int stridez ) +{ + int i = 0; + double x, y; + double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; + double x0, y0, res0, dd0; + double res0_hi,res0_lo, dres0; + double x_hi1, x_lo1, y_hi1, y_lo1, scl1 = 0; + double x1, y1, res1, dd1; + double res1_hi,res1_lo, dres1; + double x_hi2, x_lo2, y_hi2, y_lo2, scl2 = 0; + double x2, y2, res2, dd2; + double res2_hi,res2_lo, dres2; + + int hx0, hy0, j0, diff0; + int iarr0, iexp0, itbl0; + int hx1, hy1; + int iarr1, iexp1, itbl1; + int hx2, hy2; + int iarr2, iexp2, itbl2; + + int lx, ly; + + double DONE = ((double*)LCONST)[0]; + double DTWO = ((double*)LCONST)[1]; + double D2ON36 = ((double*)LCONST)[2]; + double D2ON1022 = ((double*)LCONST)[3]; + double D2ONM52 = ((double*)LCONST)[4]; + + double *pz0, *pz1, *pz2; + + do + { +start0: + PREP(0) + px += stridex; + py += stridey; + pz += stridez; + i = 1; + if ( --n <= 0 ) + break; + +start1: + PREP(1) + px += stridex; + py += stridey; + pz += stridez; + i = 2; + if ( --n <= 0 ) + break; + +start2: + PREP(2) + + x0 *= scl0; + y0 *= scl0; + x1 *= scl1; + y1 *= scl1; + x2 *= scl2; + y2 *= scl2; + + x_hi0 = ( x0 + D2ON36 ) - D2ON36; + y_hi0 = ( y0 + D2ON36 ) - D2ON36; + x_hi1 = ( x1 + D2ON36 ) - D2ON36; + y_hi1 = ( y1 + D2ON36 ) - D2ON36; + x_hi2 = ( x2 + D2ON36 ) - D2ON36; + y_hi2 = ( y2 + D2ON36 ) - D2ON36; + x_lo0 = x0 - x_hi0; + y_lo0 = y0 - y_hi0; + x_lo1 = x1 - x_hi1; + y_lo1 = y1 - y_hi1; + x_lo2 = x2 - x_hi2; + y_lo2 = y2 - y_hi2; + res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); + res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1); + res2_hi = (x_hi2 * x_hi2 + y_hi2 * y_hi2); + res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); + res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1); + res2_lo = ((x2 + x_hi2) * x_lo2 + (y2 + y_hi2) * y_lo2); + + dres0 = res0_hi + res0_lo; + dres1 = res1_hi + res1_lo; + dres2 = res2_hi + res2_lo; + + iarr0 = HI(&dres0); + iarr1 = HI(&dres1); + iarr2 = HI(&dres2); + iexp0 = iarr0 & 0xfff00000; + iexp1 = iarr1 & 0xfff00000; + iexp2 = iarr2 & 0xfff00000; + + iarr0 = (iarr0 >> 11) & 0x1fc; + iarr1 = (iarr1 >> 11) & 0x1fc; + iarr2 = (iarr2 >> 11) & 0x1fc; + itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; + itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0]; + itbl2 = ((int*)((char*)__vlibm_TBL_rhypot + iarr2))[0]; + itbl0 -= iexp0; + itbl1 -= iexp1; + itbl2 -= iexp2; + HI(&dd0) = itbl0; + HI(&dd1) = itbl1; + HI(&dd2) = itbl2; + LO(&dd0) = 0; + LO(&dd1) = 0; + LO(&dd2) = 0; + + dd0 = dd0 * (DTWO - dd0 * dres0); + dd1 = dd1 * (DTWO - dd1 * dres1); + dd2 = dd2 * (DTWO - dd2 * dres2); + dd0 = dd0 * (DTWO - dd0 * dres0); + dd1 = dd1 * (DTWO - dd1 * dres1); + dd2 = dd2 * (DTWO - dd2 * dres2); + dres0 = dd0 * (DTWO - dd0 * dres0); + dres1 = dd1 * (DTWO - dd1 * dres1); + dres2 = dd2 * (DTWO - dd2 * dres2); + + HI(&res0) = HI(&dres0) & 0xffffff00; + HI(&res1) = HI(&dres1) & 0xffffff00; + HI(&res2) = HI(&dres2) & 0xffffff00; + LO(&res0) = 0; + LO(&res1) = 0; + LO(&res2) = 0; + res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; + res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1; + res2 += (DONE - res2_hi * res2 - res2_lo * res2) * dres2; + res0 = sqrt ( res0 ); + res1 = sqrt ( res1 ); + res2 = sqrt ( res2 ); + + res0 = scl0 * res0; + res1 = scl1 * res1; + res2 = scl2 * res2; + + *pz0 = res0; + *pz1 = res1; + *pz2 = res2; + + px += stridex; + py += stridey; + pz += stridez; + i = 0; + + } while ( --n > 0 ); + + if ( i > 0 ) + { + x0 *= scl0; + y0 *= scl0; + + x_hi0 = ( x0 + D2ON36 ) - D2ON36; + y_hi0 = ( y0 + D2ON36 ) - D2ON36; + x_lo0 = x0 - x_hi0; + y_lo0 = y0 - y_hi0; + res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); + res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); + + dres0 = res0_hi + res0_lo; + + iarr0 = HI(&dres0); + iexp0 = iarr0 & 0xfff00000; + + iarr0 = (iarr0 >> 11) & 0x1fc; + itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; + itbl0 -= iexp0; + HI(&dd0) = itbl0; + LO(&dd0) = 0; + + dd0 = dd0 * (DTWO - dd0 * dres0); + dd0 = dd0 * (DTWO - dd0 * dres0); + dres0 = dd0 * (DTWO - dd0 * dres0); + + HI(&res0) = HI(&dres0) & 0xffffff00; + LO(&res0) = 0; + res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; + res0 = sqrt ( res0 ); + + res0 = scl0 * res0; + + *pz0 = res0; + + if ( i > 1 ) + { + x1 *= scl1; + y1 *= scl1; + + x_hi1 = ( x1 + D2ON36 ) - D2ON36; + y_hi1 = ( y1 + D2ON36 ) - D2ON36; + x_lo1 = x1 - x_hi1; + y_lo1 = y1 - y_hi1; + res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1); + res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1); + + dres1 = res1_hi + res1_lo; + + iarr1 = HI(&dres1); + iexp1 = iarr1 & 0xfff00000; + + iarr1 = (iarr1 >> 11) & 0x1fc; + itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0]; + itbl1 -= iexp1; + HI(&dd1) = itbl1; + LO(&dd1) = 0; + + dd1 = dd1 * (DTWO - dd1 * dres1); + dd1 = dd1 * (DTWO - dd1 * dres1); + dres1 = dd1 * (DTWO - dd1 * dres1); + + HI(&res1) = HI(&dres1) & 0xffffff00; + LO(&res1) = 0; + res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1; + res1 = sqrt ( res1 ); + + res1 = scl1 * res1; + + *pz1 = res1; + } + } +} + diff --git a/usr/src/libm/src/mvec/__vrhypotf.c b/usr/src/libm/src/mvec/__vrhypotf.c new file mode 100644 index 0000000..199a39c --- /dev/null +++ b/usr/src/libm/src/mvec/__vrhypotf.c @@ -0,0 +1,462 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vrhypotf.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float rhypotf(float x, float y) + * + * Method : + * 1. Special cases: + * for x or y = Inf => 0; + * for x or y = NaN => QNaN; + * for x and y = 0 => +Inf + divide-by-zero; + * 2. Computes d = x * x + y * y; + * 3. Computes reciprocal square root from: + * d = m * 2**n + * Where: + * m = [0.5, 2), + * n = ((exponent + 1) & ~1). + * Then: + * rsqrtf(d) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m)) + * 4. Computes 1/sqrt(m) from: + * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm)) + * Where: + * m = m0 + dm, + * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63]; + * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127]; + * Then: + * 1/sqrt(m0), 1/m0 are looked up in a table, + * 1/sqrt(1 + (1/m0)*dm) is computed using approximation: + * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0 + * where z = [-1/64, 1/64]. + * + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-27.87). + * Maximum error observed: less than 0.535 ulp after 3.000.000.000 + * results. + */ + +#pragma align 32 (__vlibm_TBL_rhypotf) + +static const double __vlibm_TBL_rhypotf[] = { +/* + i = [0,63] + TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); + TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); + TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); + TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); +*/ + 1.0000000000000000000e+00, 3.5355339059327378637e-01, + 9.8461538461538467004e-01, 3.5082320772281166965e-01, + 9.6969696969696972388e-01, 3.4815531191139570399e-01, + 9.5522388059701490715e-01, 3.4554737023254405992e-01, + 9.4117647058823528106e-01, 3.4299717028501769400e-01, + 9.2753623188405798228e-01, 3.4050261230349943009e-01, + 9.1428571428571425717e-01, 3.3806170189140660742e-01, + 9.0140845070422537244e-01, 3.3567254331867563133e-01, + 8.8888888888888883955e-01, 3.3333333333333331483e-01, + 8.7671232876712323900e-01, 3.3104235544094717802e-01, + 8.6486486486486491287e-01, 3.2879797461071458287e-01, + 8.5333333333333338810e-01, 3.2659863237109043599e-01, + 8.4210526315789469010e-01, 3.2444284226152508843e-01, + 8.3116883116883122362e-01, 3.2232918561015211356e-01, + 8.2051282051282048435e-01, 3.2025630761017426229e-01, + 8.1012658227848100001e-01, 3.1822291367029204023e-01, + 8.0000000000000004441e-01, 3.1622776601683794118e-01, + 7.9012345679012341293e-01, 3.1426968052735443360e-01, + 7.8048780487804880757e-01, 3.1234752377721214378e-01, + 7.7108433734939763049e-01, 3.1046021028253312224e-01, + 7.6190476190476186247e-01, 3.0860669992418382490e-01, + 7.5294117647058822484e-01, 3.0678599553894819740e-01, + 7.4418604651162789665e-01, 3.0499714066520933198e-01, + 7.3563218390804596680e-01, 3.0323921743156134756e-01, + 7.2727272727272729291e-01, 3.0151134457776362918e-01, + 7.1910112359550559802e-01, 2.9981267559834456904e-01, + 7.1111111111111113825e-01, 2.9814239699997197031e-01, + 7.0329670329670335160e-01, 2.9649972666444046610e-01, + 6.9565217391304345895e-01, 2.9488391230979427160e-01, + 6.8817204301075274309e-01, 2.9329423004270660513e-01, + 6.8085106382978721751e-01, 2.9172998299578911663e-01, + 6.7368421052631577428e-01, 2.9019050004400465115e-01, + 6.6666666666666662966e-01, 2.8867513459481286553e-01, + 6.5979381443298967813e-01, 2.8718326344709527165e-01, + 6.5306122448979586625e-01, 2.8571428571428569843e-01, + 6.4646464646464651960e-01, 2.8426762180748055275e-01, + 6.4000000000000001332e-01, 2.8284271247461900689e-01, + 6.3366336633663367106e-01, 2.8143901789211672737e-01, + 6.2745098039215685404e-01, 2.8005601680560193723e-01, + 6.2135922330097081989e-01, 2.7869320571664707442e-01, + 6.1538461538461541878e-01, 2.7735009811261457369e-01, + 6.0952380952380957879e-01, 2.7602622373694168934e-01, + 6.0377358490566035432e-01, 2.7472112789737807015e-01, + 5.9813084112149528249e-01, 2.7343437080986532361e-01, + 5.9259259259259255970e-01, 2.7216552697590867815e-01, + 5.8715596330275232617e-01, 2.7091418459143856712e-01, + 5.8181818181818178992e-01, 2.6967994498529684888e-01, + 5.7657657657657657158e-01, 2.6846242208560971987e-01, + 5.7142857142857139685e-01, 2.6726124191242439654e-01, + 5.6637168141592919568e-01, 2.6607604209509572168e-01, + 5.6140350877192979340e-01, 2.6490647141300877054e-01, + 5.5652173913043478937e-01, 2.6375218935831479250e-01, + 5.5172413793103447510e-01, 2.6261286571944508772e-01, + 5.4700854700854706358e-01, 2.6148818018424535570e-01, + 5.4237288135593220151e-01, 2.6037782196164771520e-01, + 5.3781512605042014474e-01, 2.5928148942086576278e-01, + 5.3333333333333332593e-01, 2.5819888974716115326e-01, + 5.2892561983471075848e-01, 2.5712973861329002645e-01, + 5.2459016393442625681e-01, 2.5607375986579195004e-01, + 5.2032520325203257539e-01, 2.5503068522533534068e-01, + 5.1612903225806450180e-01, 2.5400025400038100942e-01, + 5.1200000000000001066e-01, 2.5298221281347033074e-01, + 5.0793650793650790831e-01, 2.5197631533948483540e-01, + 5.0393700787401574104e-01, 2.5098232205526344041e-01, + 1.0000000000000000000e+00, 2.5000000000000000000e-01, + 9.8461538461538467004e-01, 2.4806946917841690703e-01, + 9.6969696969696972388e-01, 2.4618298195866547551e-01, + 9.5522388059701490715e-01, 2.4433888871261044695e-01, + 9.4117647058823528106e-01, 2.4253562503633296910e-01, + 9.2753623188405798228e-01, 2.4077170617153839660e-01, + 9.1428571428571425717e-01, 2.3904572186687872426e-01, + 9.0140845070422537244e-01, 2.3735633163877067897e-01, + 8.8888888888888883955e-01, 2.3570226039551583908e-01, + 8.7671232876712323900e-01, 2.3408229439226113655e-01, + 8.6486486486486491287e-01, 2.3249527748763856860e-01, + 8.5333333333333338810e-01, 2.3094010767585029797e-01, + 8.4210526315789469010e-01, 2.2941573387056177213e-01, + 8.3116883116883122362e-01, 2.2792115291927589338e-01, + 8.2051282051282048435e-01, 2.2645540682891915352e-01, + 8.1012658227848100001e-01, 2.2501758018520479077e-01, + 8.0000000000000004441e-01, 2.2360679774997896385e-01, + 7.9012345679012341293e-01, 2.2222222222222220989e-01, + 7.8048780487804880757e-01, 2.2086305214969309541e-01, + 7.7108433734939763049e-01, 2.1952851997938069295e-01, + 7.6190476190476186247e-01, 2.1821789023599238999e-01, + 7.5294117647058822484e-01, 2.1693045781865616384e-01, + 7.4418604651162789665e-01, 2.1566554640687682354e-01, + 7.3563218390804596680e-01, 2.1442250696755896233e-01, + 7.2727272727272729291e-01, 2.1320071635561044232e-01, + 7.1910112359550559802e-01, 2.1199957600127200541e-01, + 7.1111111111111113825e-01, 2.1081851067789195153e-01, + 7.0329670329670335160e-01, 2.0965696734438366011e-01, + 6.9565217391304345895e-01, 2.0851441405707477061e-01, + 6.8817204301075274309e-01, 2.0739033894608505104e-01, + 6.8085106382978721751e-01, 2.0628424925175867233e-01, + 6.7368421052631577428e-01, 2.0519567041703082322e-01, + 6.6666666666666662966e-01, 2.0412414523193150862e-01, + 6.5979381443298967813e-01, 2.0306923302672380549e-01, + 6.5306122448979586625e-01, 2.0203050891044216364e-01, + 6.4646464646464651960e-01, 2.0100756305184241945e-01, + 6.4000000000000001332e-01, 2.0000000000000001110e-01, + 6.3366336633663367106e-01, 1.9900743804199783060e-01, + 6.2745098039215685404e-01, 1.9802950859533485772e-01, + 6.2135922330097081989e-01, 1.9706585563285863860e-01, + 6.1538461538461541878e-01, 1.9611613513818404453e-01, + 6.0952380952380957879e-01, 1.9518001458970662965e-01, + 6.0377358490566035432e-01, 1.9425717247145282696e-01, + 5.9813084112149528249e-01, 1.9334729780913270658e-01, + 5.9259259259259255970e-01, 1.9245008972987526219e-01, + 5.8715596330275232617e-01, 1.9156525704423027490e-01, + 5.8181818181818178992e-01, 1.9069251784911847580e-01, + 5.7657657657657657158e-01, 1.8983159915049979682e-01, + 5.7142857142857139685e-01, 1.8898223650461362655e-01, + 5.6637168141592919568e-01, 1.8814417367671945613e-01, + 5.6140350877192979340e-01, 1.8731716231633879777e-01, + 5.5652173913043478937e-01, 1.8650096164806276300e-01, + 5.5172413793103447510e-01, 1.8569533817705186074e-01, + 5.4700854700854706358e-01, 1.8490006540840969729e-01, + 5.4237288135593220151e-01, 1.8411492357966466327e-01, + 5.3781512605042014474e-01, 1.8333969940564226464e-01, + 5.3333333333333332593e-01, 1.8257418583505535814e-01, + 5.2892561983471075848e-01, 1.8181818181818182323e-01, + 5.2459016393442625681e-01, 1.8107149208503706128e-01, + 5.2032520325203257539e-01, 1.8033392693348646030e-01, + 5.1612903225806450180e-01, 1.7960530202677491007e-01, + 5.1200000000000001066e-01, 1.7888543819998317663e-01, + 5.0793650793650790831e-01, 1.7817416127494958844e-01, + 5.0393700787401574104e-01, 1.7747130188322274291e-01, +}; + +#define fabsf __fabsf + +extern float fabsf( float ); + +static const double + A0 = 9.99999997962321453275e-01, + A1 =-4.99999998166077580600e-01, + A2 = 3.75066768969515586277e-01, + A3 =-3.12560092408808548438e-01; + +static void +__vrhypotf_n( int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez ); + +#pragma no_inline(__vrhypotf_n) + +#define RETURN(ret) \ +{ \ + *pz = (ret); \ + pz += stridez; \ + if ( n_n == 0 ) \ + { \ + spx = px; spy = py; spz = pz; \ + ay0 = *(int*)py; \ + continue; \ + } \ + n--; \ + break; \ +} + + +void +__vrhypotf( int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez ) +{ + float *spx, *spy, *spz; + int ax0, ay0, n_n; + float res, x0, y0; + + while ( n > 1 ) + { + n_n = 0; + spx = px; + spy = py; + spz = pz; + ax0 = *(int*)px; + ay0 = *(int*)py; + for ( ; n > 1 ; n-- ) + { + ax0 &= 0x7fffffff; + ay0 &= 0x7fffffff; + + px += stridex; + + if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) /* X or Y = NaN or Inf */ + { + x0 = *(px - stridex); + y0 = *py; + res = fabsf(x0) + fabsf(y0); + if( ax0 == 0x7f800000 ) res = 0.0f; + else if( ay0 == 0x7f800000 ) res = 0.0f; + ax0 = *(int*)px; + py += stridey; + RETURN ( res ) + } + ax0 = *(int*)px; + py += stridey; + if ( ay0 == 0 ) /* Y = 0 */ + { + int tx = *(int*)(px - stridex) & 0x7fffffff; + if ( tx == 0 ) /* X = 0 */ + { + RETURN ( 1.0f / 0.0f ) + } + } + pz += stridez; + n_n++; + ay0 = *(int*)py; + } + if ( n_n > 0 ) + __vrhypotf_n( n_n, spx, stridex, spy, stridey, spz, stridez ); + } + if ( n > 0 ) + { + ax0 = *(int*)px; + ay0 = *(int*)py; + x0 = *px; + y0 = *py; + + ax0 &= 0x7fffffff; + ay0 &= 0x7fffffff; + + if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) /* X or Y = NaN or Inf */ + { + res = fabsf(x0) + fabsf(y0); + if( ax0 == 0x7f800000 ) res = 0.0f; + else if( ay0 == 0x7f800000 ) res = 0.0f; + *pz = res; + } + else if ( ax0 == 0 && ay0 == 0 ) /* X and Y = 0 */ + { + *pz = 1.0f / 0.0f; + } + else + { + double xx0, res0, hyp0, h_hi0 = 0, dbase0 = 0; + int ibase0, si0, hyp0h; + + hyp0 = x0 * (double)x0 + y0 * (double)y0; + + ibase0 = HI(&hyp0); + + HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1)); + + hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000; + HI(&hyp0) = hyp0h; + HI(&h_hi0) = hyp0h & 0x7fffc000; + + ibase0 >>= 10; + si0 = ibase0 & 0x7f0; + xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0]; + + xx0 = (hyp0 - h_hi0) * xx0; + res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1]; + res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res0 *= dbase0; + *pz = res0; + } + } +} + +static void +__vrhypotf_n( int n, float * restrict px, int stridex, float * restrict py, + int stridey, float * restrict pz, int stridez ) +{ + double xx0, res0, hyp0, h_hi0 = 0, dbase0 = 0; + double xx1, res1, hyp1, h_hi1 = 0, dbase1 = 0; + double xx2, res2, hyp2, h_hi2 = 0, dbase2 = 0; + float x0, y0; + float x1, y1; + float x2, y2; + int ibase0, si0, hyp0h; + int ibase1, si1, hyp1h; + int ibase2, si2, hyp2h; + + for ( ; n > 2 ; n -= 3 ) + { + x0 = *px; + px += stridex; + x1 = *px; + px += stridex; + x2 = *px; + px += stridex; + + y0 = *py; + py += stridey; + y1 = *py; + py += stridey; + y2 = *py; + py += stridey; + + hyp0 = x0 * (double)x0 + y0 * (double)y0; + hyp1 = x1 * (double)x1 + y1 * (double)y1; + hyp2 = x2 * (double)x2 + y2 * (double)y2; + + ibase0 = HI(&hyp0); + ibase1 = HI(&hyp1); + ibase2 = HI(&hyp2); + + HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1)); + HI(&dbase1) = (0x60000000 - ((ibase1 & 0x7fe00000) >> 1)); + HI(&dbase2) = (0x60000000 - ((ibase2 & 0x7fe00000) >> 1)); + + hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000; + hyp1h = (ibase1 & 0x000fffff) | 0x3ff00000; + hyp2h = (ibase2 & 0x000fffff) | 0x3ff00000; + HI(&hyp0) = hyp0h; + HI(&hyp1) = hyp1h; + HI(&hyp2) = hyp2h; + HI(&h_hi0) = hyp0h & 0x7fffc000; + HI(&h_hi1) = hyp1h & 0x7fffc000; + HI(&h_hi2) = hyp2h & 0x7fffc000; + + ibase0 >>= 10; + ibase1 >>= 10; + ibase2 >>= 10; + si0 = ibase0 & 0x7f0; + si1 = ibase1 & 0x7f0; + si2 = ibase2 & 0x7f0; + xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0]; + xx1 = ((double*)((char*)__vlibm_TBL_rhypotf + si1))[0]; + xx2 = ((double*)((char*)__vlibm_TBL_rhypotf + si2))[0]; + + xx0 = (hyp0 - h_hi0) * xx0; + xx1 = (hyp1 - h_hi1) * xx1; + xx2 = (hyp2 - h_hi2) * xx2; + res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1]; + res1 = ((double*)((char*)__vlibm_TBL_rhypotf + si1))[1]; + res2 = ((double*)((char*)__vlibm_TBL_rhypotf + si2))[1]; + res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res1 *= (((A3 * xx1 + A2) * xx1 + A1) * xx1 + A0); + res2 *= (((A3 * xx2 + A2) * xx2 + A1) * xx2 + A0); + res0 *= dbase0; + res1 *= dbase1; + res2 *= dbase2; + *pz = res0; + pz += stridez; + *pz = res1; + pz += stridez; + *pz = res2; + pz += stridez; + } + + for ( ; n > 0 ; n-- ) + { + x0 = *px; + px += stridex; + + y0 = *py; + py += stridey; + + hyp0 = x0 * (double)x0 + y0 * (double)y0; + + ibase0 = HI(&hyp0); + + HI(&dbase0) = (0x60000000 - ((ibase0 & 0x7fe00000) >> 1)); + + hyp0h = (ibase0 & 0x000fffff) | 0x3ff00000; + HI(&hyp0) = hyp0h; + HI(&h_hi0) = hyp0h & 0x7fffc000; + + ibase0 >>= 10; + si0 = ibase0 & 0x7f0; + xx0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[0]; + + xx0 = (hyp0 - h_hi0) * xx0; + res0 = ((double*)((char*)__vlibm_TBL_rhypotf + si0))[1]; + res0 *= (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res0 *= dbase0; + *pz = res0; + pz += stridez; + } +} + diff --git a/usr/src/libm/src/mvec/__vrsqrt.c b/usr/src/libm/src/mvec/__vrsqrt.c new file mode 100644 index 0000000..80a4cbd --- /dev/null +++ b/usr/src/libm/src/mvec/__vrsqrt.c @@ -0,0 +1,412 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vrsqrt.c 1.4 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* double rsqrt(double x) + * + * Method : + * 1. Special cases: + * for x = NaN => QNaN; + * for x = +Inf => 0; + * for x is negative, -Inf => QNaN + invalid; + * for x = +0 => +Inf + divide-by-zero; + * for x = -0 => -Inf + divide-by-zero. + * 2. Computes reciprocal square root from: + * x = m * 2**n + * Where: + * m = [0.5, 2), + * n = ((exponent + 1) & ~1). + * Then: + * rsqrt(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m)) + * 2. Computes 1/sqrt(m) from: + * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm)) + * Where: + * m = m0 + dm, + * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63]; + * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127]; + * m0 = 2.0 for m = [1.0+127/128, 2.0), k = 128. + * Then: + * 1/sqrt(m0) is looked up in a table, + * 1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)). + * 1/sqrt(1 + (1/m0)*dm) is computed using approximation: + * 1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3) + * * z + a2) * z + a1) * z + a0 + * where z = [-1/128, 1/128]. + * + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-56.26). + * Maximum error observed: less than 0.563 ulp after 1.500.000.000 + * results. + */ + +#define sqrt __sqrt + +extern double sqrt ( double ); +extern const double __vlibm_TBL_rsqrt[]; + +static void +__vrsqrt_n( int n, double * restrict px, int stridex, double * restrict py, int stridey ); + +#pragma no_inline(__vrsqrt_n) + +#define RETURN(ret) \ +{ \ + *py = (ret); \ + py += stridey; \ + if ( n_n == 0 ) \ + { \ + spx = px; spy = py; \ + hx = HI(px); \ + continue; \ + } \ + n--; \ + break; \ +} + +static const double + DONE = 1.0, + K1 = -5.00000000000005209867e-01, + K2 = 3.75000000000004884257e-01, + K3 = -3.12499999317136886551e-01, + K4 = 2.73437499359815081532e-01, + K5 = -2.46116125605037803130e-01, + K6 = 2.25606914648617522896e-01; + +void +__vrsqrt( int n, double * restrict px, int stridex, double * restrict py, int stridey ) +{ + double *spx, *spy; + int ax, lx, hx, n_n; + double res; + + while ( n > 1 ) + { + n_n = 0; + spx = px; + spy = py; + hx = HI(px); + for ( ; n > 1 ; n--) + { + px += stridex; + if ( hx >= 0x7ff00000 ) /* X = NaN or Inf */ + { + res = *(px - stridex); + RETURN ( DONE / res ) + } + + py += stridey; + + if ( hx < 0x00100000 ) /* X = denormal, zero or negative */ + { + py -= stridey; + ax = hx & 0x7fffffff; + lx = LO((px - stridex)); + res = *(px - stridex); + + if ( (ax | lx) == 0 ) /* |X| = zero */ + { + RETURN ( DONE / res ) + } + else if ( hx >= 0 ) /* X = denormal */ + { + double res_c0, dsqrt_exp0; + int ind0, sqrt_exp0; + double xx0, dexp_hi0, dexp_lo0; + int hx0, resh0, res_ch0; + + res = *(long long*)&res; + + hx0 = HI(&res); + sqrt_exp0 = (0x817 - (hx0 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res) = resh0; + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res - res_c0) * xx0; + res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res = dexp_hi0 * res + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res *= dsqrt_exp0; + + RETURN ( res ) + } + else /* X = negative */ + { + RETURN ( sqrt(res) ) + } + } + n_n++; + hx = HI(px); + } + if ( n_n > 0 ) + __vrsqrt_n( n_n, spx, stridex, spy, stridey ); + } + if ( n > 0 ) + { + hx = HI(px); + + if ( hx >= 0x7ff00000 ) /* X = NaN or Inf */ + { + res = *px; + *py = DONE / res; + } + else if ( hx < 0x00100000 ) /* X = denormal, zero or negative */ + { + ax = hx & 0x7fffffff; + lx = LO(px); + res = *px; + + if ( (ax | lx) == 0 ) /* |X| = zero */ + { + *py = DONE / res; + } + else if ( hx >= 0 ) /* X = denormal */ + { + double res_c0, dsqrt_exp0; + int ind0, sqrt_exp0; + double xx0, dexp_hi0, dexp_lo0; + int hx0, resh0, res_ch0; + + res = *(long long*)&res; + + hx0 = HI(&res); + sqrt_exp0 = (0x817 - (hx0 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res) = resh0; + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res - res_c0) * xx0; + res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res = dexp_hi0 * res + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res *= dsqrt_exp0; + + *py = res; + } + else /* X = negative */ + { + *py = sqrt(res); + } + } + else + { + double res_c0, dsqrt_exp0; + int ind0, sqrt_exp0; + double xx0, dexp_hi0, dexp_lo0; + int resh0, res_ch0; + + sqrt_exp0 = (0x5fe - (hx >> 21)) << 20; + ind0 = (((hx >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res) = resh0; + LO(&res) = LO(px); + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res - res_c0) * xx0; + res = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res = dexp_hi0 * res + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res *= dsqrt_exp0; + + *py = res; + } + } +} + +static void +__vrsqrt_n( int n, double * restrict px, int stridex, double * restrict py, int stridey ) +{ + double res0, res_c0, dsqrt_exp0; + double res1, res_c1, dsqrt_exp1; + double res2, res_c2, dsqrt_exp2; + int ind0, sqrt_exp0; + int ind1, sqrt_exp1; + int ind2, sqrt_exp2; + double xx0, dexp_hi0, dexp_lo0; + double xx1, dexp_hi1, dexp_lo1; + double xx2, dexp_hi2, dexp_lo2; + int hx0, resh0, res_ch0; + int hx1, resh1, res_ch1; + int hx2, resh2, res_ch2; + + LO(&dsqrt_exp0) = 0; + LO(&dsqrt_exp1) = 0; + LO(&dsqrt_exp2) = 0; + LO(&res_c0) = 0; + LO(&res_c1) = 0; + LO(&res_c2) = 0; + + for( ; n > 2 ; n -= 3 ) + { + hx0 = HI(px); + LO(&res0) = LO(px); + px += stridex; + + hx1 = HI(px); + LO(&res1) = LO(px); + px += stridex; + + hx2 = HI(px); + LO(&res2) = LO(px); + px += stridex; + + sqrt_exp0 = (0x5fe - (hx0 >> 21)) << 20; + sqrt_exp1 = (0x5fe - (hx1 >> 21)) << 20; + sqrt_exp2 = (0x5fe - (hx2 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + ind1 = (((hx1 >> 10) & 0x7f8) + 8) & -16; + ind2 = (((hx2 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + resh1 = (hx1 & 0x001fffff) | 0x3fe00000; + resh2 = (hx2 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + res_ch1 = (resh1 + 0x00002000) & 0x7fffc000; + res_ch2 = (resh2 + 0x00002000) & 0x7fffc000; + HI(&res0) = resh0; + HI(&res1) = resh1; + HI(&res2) = resh2; + HI(&res_c0) = res_ch0; + HI(&res_c1) = res_ch1; + HI(&res_c2) = res_ch2; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_hi1 = ((double*)((char*)__vlibm_TBL_rsqrt + ind1))[0]; + dexp_hi2 = ((double*)((char*)__vlibm_TBL_rsqrt + ind2))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + dexp_lo1 = ((double*)((char*)__vlibm_TBL_rsqrt + ind1))[1]; + dexp_lo2 = ((double*)((char*)__vlibm_TBL_rsqrt + ind2))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx1 = dexp_hi1 * dexp_hi1; + xx2 = dexp_hi2 * dexp_hi2; + xx0 = (res0 - res_c0) * xx0; + xx1 = (res1 - res_c1) * xx1; + xx2 = (res2 - res_c2) * xx2; + res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + res1 = (((((K6 * xx1 + K5) * xx1 + K4) * xx1 + K3) * xx1 + K2) * xx1 + K1) * xx1; + res2 = (((((K6 * xx2 + K5) * xx2 + K4) * xx2 + K3) * xx2 + K2) * xx2 + K1) * xx2; + + res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0; + res1 = dexp_hi1 * res1 + dexp_lo1 + dexp_hi1; + res2 = dexp_hi2 * res2 + dexp_lo2 + dexp_hi2; + + HI(&dsqrt_exp0) = sqrt_exp0; + HI(&dsqrt_exp1) = sqrt_exp1; + HI(&dsqrt_exp2) = sqrt_exp2; + res0 *= dsqrt_exp0; + res1 *= dsqrt_exp1; + res2 *= dsqrt_exp2; + + *py = res0; + py += stridey; + + *py = res1; + py += stridey; + + *py = res2; + py += stridey; + } + + for( ; n > 0 ; n-- ) + { + hx0 = HI(px); + + sqrt_exp0 = (0x5fe - (hx0 >> 21)) << 20; + ind0 = (((hx0 >> 10) & 0x7f8) + 8) & -16; + + resh0 = (hx0 & 0x001fffff) | 0x3fe00000; + res_ch0 = (resh0 + 0x00002000) & 0x7fffc000; + HI(&res0) = resh0; + LO(&res0) = LO(px); + HI(&res_c0) = res_ch0; + LO(&res_c0) = 0; + + px += stridex; + + dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0]; + dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1]; + xx0 = dexp_hi0 * dexp_hi0; + xx0 = (res0 - res_c0) * xx0; + res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0; + + res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0; + + HI(&dsqrt_exp0) = sqrt_exp0; + LO(&dsqrt_exp0) = 0; + res0 *= dsqrt_exp0; + + *py = res0; + py += stridey; + } +} + diff --git a/usr/src/libm/src/mvec/__vrsqrtf.c b/usr/src/libm/src/mvec/__vrsqrtf.c new file mode 100644 index 0000000..2ce7b39 --- /dev/null +++ b/usr/src/libm/src/mvec/__vrsqrtf.c @@ -0,0 +1,500 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vrsqrtf.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* float rsqrtf(float x) + * + * Method : + * 1. Special cases: + * for x = NaN => QNaN; + * for x = +Inf => 0; + * for x is negative, -Inf => QNaN + invalid; + * for x = +0 => +Inf + divide-by-zero; + * for x = -0 => -Inf + divide-by-zero. + * 2. Computes reciprocal square root from: + * x = m * 2**n + * Where: + * m = [0.5, 2), + * n = ((exponent + 1) & ~1). + * Then: + * rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m)) + * 2. Computes 1/sqrt(m) from: + * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm)) + * Where: + * m = m0 + dm, + * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63]; + * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127]; + * Then: + * 1/sqrt(m0), 1/m0 are looked up in a table, + * 1/sqrt(1 + (1/m0)*dm) is computed using approximation: + * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0 + * where z = [-1/64, 1/64]. + * + * Accuracy: + * The maximum relative error for the approximating + * polynomial is 2**(-27.87). + * Maximum error observed: less than 0.534 ulp for the + * whole float type range. + */ + +#define sqrtf __sqrtf + +extern float sqrtf( float ); + +static const double __TBL_rsqrtf[] = { +/* +i = [0,63] + TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24; + TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); +i = [64,127] + TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23; + TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); +*/ + 1.1920928955078125000e-07, 1.4142135623730951455e+00, + 1.1737530048076923728e-07, 1.4032928308912466786e+00, + 1.1559688683712121533e-07, 1.3926212476455828160e+00, + 1.1387156016791044559e-07, 1.3821894809301762397e+00, + 1.1219697840073529256e-07, 1.3719886811400707760e+00, + 1.1057093523550724772e-07, 1.3620104492139977204e+00, + 1.0899135044642856803e-07, 1.3522468075656264297e+00, + 1.0745626100352112918e-07, 1.3426901732747025253e+00, + 1.0596381293402777190e-07, 1.3333333333333332593e+00, + 1.0451225385273972023e-07, 1.3241694217637887121e+00, + 1.0309992609797297870e-07, 1.3151918984428583315e+00, + 1.0172526041666667320e-07, 1.3063945294843617440e+00, + 1.0038677014802631022e-07, 1.2977713690461003537e+00, + 9.9083045860389616921e-08, 1.2893167424406084542e+00, + 9.7812750400641022247e-08, 1.2810252304406970492e+00, + 9.6574614319620251657e-08, 1.2728916546811681609e+00, + 9.5367431640625005294e-08, 1.2649110640673517647e+00, + 9.4190055941358019463e-08, 1.2570787221094177344e+00, + 9.3041396722560978838e-08, 1.2493900951088485751e+00, + 9.1920416039156631290e-08, 1.2418408411301324890e+00, + 9.0826125372023804482e-08, 1.2344267996967352996e+00, + 8.9757582720588234048e-08, 1.2271439821557927896e+00, + 8.8713889898255812722e-08, 1.2199885626608373279e+00, + 8.7694190014367814875e-08, 1.2129568697262453902e+00, + 8.6697665127840911497e-08, 1.2060453783110545167e+00, + 8.5723534058988761666e-08, 1.1992507023933782762e+00, + 8.4771050347222225457e-08, 1.1925695879998878812e+00, + 8.3839500343406599951e-08, 1.1859989066577618644e+00, + 8.2928201426630432481e-08, 1.1795356492391770864e+00, + 8.2036500336021511923e-08, 1.1731769201708264205e+00, + 8.1163771609042551220e-08, 1.1669199319831564665e+00, + 8.0309416118421050820e-08, 1.1607620001760186046e+00, + 7.9472859700520828922e-08, 1.1547005383792514621e+00, + 7.8653551868556699530e-08, 1.1487330537883810866e+00, + 7.7850964604591830522e-08, 1.1428571428571427937e+00, + 7.7064591224747481298e-08, 1.1370704872299222110e+00, + 7.6293945312500001588e-08, 1.1313708498984760276e+00, + 7.5538559715346535571e-08, 1.1257560715684669095e+00, + 7.4797985600490195040e-08, 1.1202240672224077489e+00, + 7.4071791565533974158e-08, 1.1147728228665882977e+00, + 7.3359562800480773303e-08, 1.1094003924504582947e+00, + 7.2660900297619054173e-08, 1.1041048949477667573e+00, + 7.1975420106132072725e-08, 1.0988845115895122806e+00, + 7.1302752628504667579e-08, 1.0937374832394612945e+00, + 7.0642541956018514597e-08, 1.0886621079036347126e+00, + 6.9994445240825691959e-08, 1.0836567383657542685e+00, + 6.9358132102272723904e-08, 1.0787197799411873955e+00, + 6.8733284065315314719e-08, 1.0738496883424388795e+00, + 6.8119594029017853361e-08, 1.0690449676496975862e+00, + 6.7516765763274335346e-08, 1.0643041683803828867e+00, + 6.6924513432017540145e-08, 1.0596258856520350822e+00, + 6.6342561141304348632e-08, 1.0550087574332591700e+00, + 6.5770642510775861156e-08, 1.0504514628777803509e+00, + 6.5208500267094023655e-08, 1.0459527207369814228e+00, + 6.4655885858050847233e-08, 1.0415112878465908608e+00, + 6.4112559086134451001e-08, 1.0371259576834630511e+00, + 6.3578287760416665784e-08, 1.0327955589886446131e+00, + 6.3052847365702481089e-08, 1.0285189544531601058e+00, + 6.2536020747950822927e-08, 1.0242950394631678002e+00, + 6.2027597815040656970e-08, 1.0201227409013413627e+00, + 6.1527375252016127325e-08, 1.0160010160015240377e+00, + 6.1035156250000001271e-08, 1.0119288512538813229e+00, + 6.0550750248015869655e-08, 1.0079052613579393416e+00, + 6.0073972687007873182e-08, 1.0039292882210537616e+00, + 1.1920928955078125000e-07, 1.0000000000000000000e+00, + 1.1737530048076923728e-07, 9.9227787671366762812e-01, + 1.1559688683712121533e-07, 9.8473192783466190203e-01, + 1.1387156016791044559e-07, 9.7735555485044178781e-01, + 1.1219697840073529256e-07, 9.7014250014533187638e-01, + 1.1057093523550724772e-07, 9.6308682468615358641e-01, + 1.0899135044642856803e-07, 9.5618288746751489704e-01, + 1.0745626100352112918e-07, 9.4942532655508271588e-01, + 1.0596381293402777190e-07, 9.4280904158206335630e-01, + 1.0451225385273972023e-07, 9.3632917756904454620e-01, + 1.0309992609797297870e-07, 9.2998110995055427441e-01, + 1.0172526041666667320e-07, 9.2376043070340119190e-01, + 1.0038677014802631022e-07, 9.1766293548224708854e-01, + 9.9083045860389616921e-08, 9.1168461167710357351e-01, + 9.7812750400641022247e-08, 9.0582162731567661407e-01, + 9.6574614319620251657e-08, 9.0007032074081916306e-01, + 9.5367431640625005294e-08, 8.9442719099991585541e-01, + 9.4190055941358019463e-08, 8.8888888888888883955e-01, + 9.3041396722560978838e-08, 8.8345220859877238162e-01, + 9.1920416039156631290e-08, 8.7811407991752277180e-01, + 9.0826125372023804482e-08, 8.7287156094396955996e-01, + 8.9757582720588234048e-08, 8.6772183127462465535e-01, + 8.8713889898255812722e-08, 8.6266218562750729415e-01, + 8.7694190014367814875e-08, 8.5769002787023584933e-01, + 8.6697665127840911497e-08, 8.5280286542244176928e-01, + 8.5723534058988761666e-08, 8.4799830400508802164e-01, + 8.4771050347222225457e-08, 8.4327404271156780613e-01, + 8.3839500343406599951e-08, 8.3862786937753464045e-01, + 8.2928201426630432481e-08, 8.3405765622829908246e-01, + 8.2036500336021511923e-08, 8.2956135578434020417e-01, + 8.1163771609042551220e-08, 8.2513699700703468931e-01, + 8.0309416118421050820e-08, 8.2078268166812329287e-01, + 7.9472859700520828922e-08, 8.1649658092772603446e-01, + 7.8653551868556699530e-08, 8.1227693210689522196e-01, + 7.7850964604591830522e-08, 8.0812203564176865456e-01, + 7.7064591224747481298e-08, 8.0403025220736967782e-01, + 7.6293945312500001588e-08, 8.0000000000000004441e-01, + 7.5538559715346535571e-08, 7.9602975216799132241e-01, + 7.4797985600490195040e-08, 7.9211803438133943089e-01, + 7.4071791565533974158e-08, 7.8826342253143455441e-01, + 7.3359562800480773303e-08, 7.8446454055273617811e-01, + 7.2660900297619054173e-08, 7.8072005835882651859e-01, + 7.1975420106132072725e-08, 7.7702868988581130782e-01, + 7.1302752628504667579e-08, 7.7338919123653082632e-01, + 7.0642541956018514597e-08, 7.6980035891950104876e-01, + 6.9994445240825691959e-08, 7.6626102817692109959e-01, + 6.9358132102272723904e-08, 7.6277007139647390321e-01, + 6.8733284065315314719e-08, 7.5932639660199918730e-01, + 6.8119594029017853361e-08, 7.5592894601845450619e-01, + 6.7516765763274335346e-08, 7.5257669470687782454e-01, + 6.6924513432017540145e-08, 7.4926864926535519107e-01, + 6.6342561141304348632e-08, 7.4600384659225105199e-01, + 6.5770642510775861156e-08, 7.4278135270820744296e-01, + 6.5208500267094023655e-08, 7.3960026163363878915e-01, + 6.4655885858050847233e-08, 7.3645969431865865307e-01, + 6.4112559086134451001e-08, 7.3335879762256905856e-01, + 6.3578287760416665784e-08, 7.3029674334022143256e-01, + 6.3052847365702481089e-08, 7.2727272727272729291e-01, + 6.2536020747950822927e-08, 7.2428596834014824513e-01, + 6.2027597815040656970e-08, 7.2133570773394584119e-01, + 6.1527375252016127325e-08, 7.1842120810709964029e-01, + 6.1035156250000001271e-08, 7.1554175279993270653e-01, + 6.0550750248015869655e-08, 7.1269664509979835376e-01, + 6.0073972687007873182e-08, 7.0988520753289097165e-01, +}; + +static const unsigned long long LCONST[] = { +0x3feffffffee7f18fULL, /* A0 = 9.99999997962321453275e-01 */ +0xbfdffffffe07e52fULL, /* A1 =-4.99999998166077580600e-01 */ +0x3fd801180ca296d9ULL, /* A2 = 3.75066768969515586277e-01 */ +0xbfd400fc0bbb8e78ULL, /* A3 =-3.12560092408808548438e-01 */ +}; + +static void +__vrsqrtf_n( int n, float * restrict px, int stridex, float * restrict py, int stridey ); + +#pragma no_inline(__vrsqrtf_n) + +#define RETURN(ret) \ +{ \ + *py = (ret); \ + py += stridey; \ + if ( n_n == 0 ) \ + { \ + spx = px; spy = py; \ + ax0 = *(int*)px; \ + continue; \ + } \ + n--; \ + break; \ +} + +void +__vrsqrtf( int n, float * restrict px, int stridex, float * restrict py, int stridey ) +{ + float *spx, *spy; + int ax0, n_n; + float res; + float FONE = 1.0f, FTWO = 2.0f; + + while ( n > 1 ) + { + n_n = 0; + spx = px; + spy = py; + ax0 = *(int*)px; + for ( ; n > 1 ; n-- ) + { + px += stridex; + if ( ax0 >= 0x7f800000 ) /* X = NaN or Inf */ + { + res = *(px - stridex); + RETURN ( FONE / res ) + } + + py += stridey; + + if ( ax0 < 0x00800000 ) /* X = denormal, zero or negative */ + { + py -= stridey; + res = *(px - stridex); + + if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ + { + RETURN ( FONE / res ) + } + else if ( ax0 >= 0 ) /* X = denormal */ + { + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, si0, iexp0; + + res = *(int*)&res; + res *= FTWO; + ax0 = *(int*)&res; + iexp0 = ax0 >> 24; + iexp0 = 0x3f + 0x4b - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + RETURN(*(float*)&iexp0) + } + else /* X = negative */ + { + RETURN ( sqrtf(res) ) + } + } + n_n++; + ax0 = *(int*)px; + } + if ( n_n > 0 ) + __vrsqrtf_n( n_n, spx, stridex, spy, stridey ); + } + + if ( n > 0 ) + { + ax0 = *(int*)px; + + if ( ax0 >= 0x7f800000 ) /* X = NaN or Inf */ + { + res = *px; + *py = FONE / res; + } + else if ( ax0 < 0x00800000 ) /* X = denormal, zero or negative */ + { + res = *px; + + if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ + { + *py = FONE / res; + } + else if ( ax0 >= 0 ) /* X = denormal */ + { + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, si0, iexp0; + + res = *(int*)&res; + res *= FTWO; + ax0 = *(int*)&res; + iexp0 = ax0 >> 24; + iexp0 = 0x3f + 0x4b - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + + *(int*)py = iexp0; + } + else /* X = negative */ + { + *py = sqrtf(res); + } + } + else + { + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + double res0, xx0, tbl_div0, tbl_sqrt0; + float fres0; + int iax0, si0, iexp0; + + iexp0 = ax0 >> 24; + iexp0 = 0x3f - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + + *(int*)py = iexp0; + } + } +} + +void +__vrsqrtf_n( int n, float * restrict px, int stridex, float * restrict py, int stridey ) +{ + double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */ + double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */ + double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */ + double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */ + double res0, xx0, tbl_div0, tbl_sqrt0; + double res1, xx1, tbl_div1, tbl_sqrt1; + double res2, xx2, tbl_div2, tbl_sqrt2; + float fres0, fres1, fres2; + int iax0, ax0, si0, iexp0; + int iax1, ax1, si1, iexp1; + int iax2, ax2, si2, iexp2; + +#if defined(ARCH_v7) || defined(ARCH_v8) + for( ; n > 2 ; n -= 3 ) + { + ax0 = *(int*)px; + px += stridex; + + ax1 = *(int*)px; + px += stridex; + + ax2 = *(int*)px; + px += stridex; + + iexp0 = ax0 >> 24; + iexp1 = ax1 >> 24; + iexp2 = ax2 >> 24; + iexp0 = 0x3f - iexp0; + iexp1 = 0x3f - iexp1; + iexp2 = 0x3f - iexp2; + + iexp0 = iexp0 << 23; + iexp1 = iexp1 << 23; + iexp2 = iexp2 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + si1 = (ax1 >> 13) & 0x7f0; + si2 = (ax2 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_div1 = ((double*)((char*)__TBL_rsqrtf + si1))[0]; + tbl_div2 = ((double*)((char*)__TBL_rsqrtf + si2))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + tbl_sqrt1 = ((double*)((char*)__TBL_rsqrtf + si1))[1]; + tbl_sqrt2 = ((double*)((char*)__TBL_rsqrtf + si2))[1]; + iax0 = ax0 & 0x7ffe0000; + iax1 = ax1 & 0x7ffe0000; + iax2 = ax2 & 0x7ffe0000; + iax0 = ax0 - iax0; + iax1 = ax1 - iax1; + iax2 = ax2 - iax2; + xx0 = iax0 * tbl_div0; + xx1 = iax1 * tbl_div1; + xx2 = iax2 * tbl_div2; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + res1 = tbl_sqrt1 * (((A3 * xx1 + A2) * xx1 + A1) * xx1 + A0); + res2 = tbl_sqrt2 * (((A3 * xx2 + A2) * xx2 + A1) * xx2 + A0); + + fres0 = res0; + fres1 = res1; + fres2 = res2; + + iexp0 += *(int*)&fres0; + iexp1 += *(int*)&fres1; + iexp2 += *(int*)&fres2; + *(int*)py = iexp0; + py += stridey; + *(int*)py = iexp1; + py += stridey; + *(int*)py = iexp2; + py += stridey; + } +#endif + for( ; n > 0 ; n-- ) + { + ax0 = *(int*)px; + px += stridex; + + iexp0 = ax0 >> 24; + iexp0 = 0x3f - iexp0; + iexp0 = iexp0 << 23; + + si0 = (ax0 >> 13) & 0x7f0; + + tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; + tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; + iax0 = ax0 & 0x7ffe0000; + iax0 = ax0 - iax0; + xx0 = iax0 * tbl_div0; + res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); + + fres0 = res0; + iexp0 += *(int*)&fres0; + *(int*)py = iexp0; + py += stridey; + } +} + diff --git a/usr/src/libm/src/mvec/__vsin.c b/usr/src/libm/src/mvec/__vsin.c new file mode 100644 index 0000000..0562061 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsin.c @@ -0,0 +1,1106 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsin.c 1.8 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, + pio2_1 = 1.570796326734125614166, + pio2_2 = 6.077100506303965976596e-11, + pio2_3 = 2.022266248711166455796e-21, + pio2_3t = 8.478427660368899643959e-32, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs( double ); +extern void __vlibm_vsin_big( int, double *, int, double *, int, int ); + +void +__vsin( int n, double * restrict x, int stridex, double * restrict y, + int stridey ) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; + unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + do + { +LOOP0: + xsb0 = HI(x); + hx0 = xsb0 & ~0x80000000; + if ( hx0 > 0x3fe921fb ) + { + biguns = 1; + goto MEDIUM; + } + if ( hx0 < 0x3e400000 ) + { + volatile int v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto LOOP0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + +LOOP1: + xsb1 = HI(x); + hx1 = xsb1 & ~0x80000000; + if ( hx1 > 0x3fe921fb ) + { + biguns = 2; + goto MEDIUM; + } + if ( hx1 < 0x3e400000 ) + { + volatile int v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto LOOP1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + +LOOP2: + xsb2 = HI(x); + hx2 = xsb2 & ~0x80000000; + if ( hx2 > 0x3fe921fb ) + { + biguns = 3; + goto MEDIUM; + } + if ( hx2 < 0x3e400000 ) + { + volatile int v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto LOOP2; + } + x2 = *x; + py2 = y; + + i = ( hx0 - 0x3fc90000 ) >> 31; + i |= ( ( hx1 - 0x3fc90000 ) >> 30 ) & 2; + i |= ( ( hx2 - 0x3fc90000 ) >> 29 ) & 4; + switch ( i ) + { + double a0, a1, a2, w0, w1, w2; + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 -= t0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 1: + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = x0 + x0 * t0; + t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = t0; + *py1 = a1 + t1; + *py2 = a2 + t2; + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x0 -= t0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = x1 + x1 * t1; + t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = a0 + t0; + *py1 = t1; + *py2 = a2 + t2; + break; + + case 3: + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + a2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + t0 = x0 + x0 * t0; + t1 = x1 + x1 * t1; + t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2]; + *py0 = t0; + *py1 = t1; + *py2 = a2 + t2; + break; + + case 4: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x0 -= t0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = x2 + x2 * t2; + *py0 = a0 + t0; + *py1 = a1 + t1; + *py2 = t2; + break; + + case 5: + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + t0 = x0 + x0 * t0; + t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = x2 + x2 * t2; + *py0 = t0; + *py1 = a1 + t1; + *py2 = t2; + break; + + case 6: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0]; + t1 = x1 + x1 * t1; + t2 = x2 + x2 * t2; + *py0 = a0 + t0; + *py1 = t1; + *py2 = t2; + break; + + case 7: + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + t0 = x0 + x0 * t0; + t1 = x1 + x1 * t1; + t2 = x2 + x2 * t2; + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + double a0, a1, w0, w1; + double t0, t1, z0, z1; + unsigned j0, j1; + + if ( i > 1 ) + { + if ( hx1 < 0x3fc90000 ) + { + z1 = x1 * x1; + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t1 = x1 + x1 * t1; + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + a1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1]; + *py1 = a1 + t1; + } + } + if ( hx0 < 0x3fc90000 ) + { + z0 = x0 * x0; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t0 = x0 + x0 * t0; + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + a0 = __vlibm_TBL_sincos_hi[j0+xsb0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0]; + *py0 = a0 + t0; + } + } + + return; + + /* + * MEDIUM RANGE PROCESSING + * Jump here at first sign of medium range argument. We are a bit + * confused due to the jump.. fix up several variables and jump into + * the nth loop, same as was being processed above. + */ + +MEDIUM: + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + if ( biguns == 3 ) + { + biguns = 0; + xsb0 = xsb0 >> 31; + xsb1 = xsb1 >> 31; + goto loop2; + } + else if ( biguns == 2 ) + { + xsb0 = xsb0 >> 31; + biguns = 0; + goto loop1; + } + biguns = 0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if ( hx < 0x3e400000 ) + { + volatile int v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x0 = *x; + *y = x0 - x0; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if ( hx < 0x3e400000 ) + { + volatile int v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x1 = *x; + *y = x1 - x1; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if ( hx < 0x3e400000 ) + { + volatile int v = *x; + *y = *x; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x2 = *x; + *y = x2 - x2; + } + else + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + n2 = (int) ( x2 * invpio2 + half[xsb2] ); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 &= 3; + n1 &= 3; + n2 &= 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + xsb0 = HI(&x0); + i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31; + xsb1 = HI(&x1); + i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2; + xsb2 = HI(&x2); + i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4; + switch ( i ) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned hx, j0, j1; + int n0, n1; + + if ( i > 1 ) + { + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + fn1 = (double) n1; + n1 &= 3; + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + xsb1 = HI(&x1); + if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] ) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = ( x1 - t1 ) + y1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + fn0 = (double) n0; + n0 &= 3; + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + xsb0 = HI(&x0); + if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] ) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = ( x0 - t0 ) + y0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if ( biguns ) + __vlibm_vsin_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb ); +} diff --git a/usr/src/libm/src/mvec/__vsinbig.c b/usr/src/libm/src/mvec/__vsinbig.c new file mode 100644 index 0000000..2bf4bb8 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsinbig.c @@ -0,0 +1,171 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsinbig.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m( double *, double *, int, int, int ); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsin_big( int n, double * restrict x, int stridex, double * restrict y, + int stridey, int thresh ) +{ + for ( ; n--; x += stridex, y += stridey ) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, i, j; + + hx = HI(x); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if ( hx <= thresh || hx >= 0x7ff00000 ) + continue; + e0 = ( hx >> 20 ) - 1046; + HI(&tx) = 0x41600000 | ( hx & 0xfffff ); + LO(&tx) = LO(x); + tt[0] = (double)( (int) tx ); + tx = ( tx - tt[0] ) * two24; + if ( tx != zero ) + { + nx = 2; + tt[1] = (double)( (int) tx ); + tt[2] = ( tx - tt[1] ) * two24; + if ( tt[2] != zero ) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m( tt, ty, e0, nx, 2 ); + if ( xsb ) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = ( nx & 2 ) << 30; + hx = HI(&ty[0]); + if ( nx & 1 ) + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if ( hx < 0x3fc40000 ) + { + z = ty[0] * ty[0]; + t = z * ( q1 + z * ( q2 + z * ( q3 + z * q4 ) ) ); + a = one + t; + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - ( __vlibm_TBL_sincos_hi[j] * w - a * t ); + a += t; + } + } + else + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if ( hx < 0x3fc90000 ) + { + z = ty[0] * ty[0]; + t = z * ( p1 + z * ( p2 + z * ( p3 + z * p4 ) ) ); + a = ty[0] + ( ty[1] + ty[0] * t ); + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = ( __vlibm_TBL_sincos_hi[j+1] * w + a * t ) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if ( xsb ) a = -a; + *y = a; + } +} diff --git a/usr/src/libm/src/mvec/__vsinbig_ultra3.c b/usr/src/libm/src/mvec/__vsinbig_ultra3.c new file mode 100644 index 0000000..ad05aa5 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsinbig_ultra3.c @@ -0,0 +1,652 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsinbig_ultra3.c 1.4 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, + pio2_1 = 1.570796326734125614166, + pio2_2 = 6.077100506303965976596e-11, + pio2_3 = 2.022266248711166455796e-21, + pio2_3t = 8.478427660368899643959e-32, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +extern void __vlibm_vsin_big( int, double *, int, double *, int, int ); + +void +__vlibm_vsin_big_ultra3( int n, double * restrict x, int stridex, double * restrict y, + int stridey, int pthresh ) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; + unsigned xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + biguns = 0; + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= pthresh || hx > 0x413921fb ) + { + if ( hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + x0 = *x; + py0 = y; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= pthresh || hx > 0x413921fb ) + { + if ( hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + x1 = *x; + py1 = y; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= pthresh || hx > 0x413921fb ) + { + if ( hx > 0x413921fb && hx < 0x7ff00000) + biguns = 1; + x += stridex; + y += stridey; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + x2 = *x; + py2 = y; + + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + n2 = (int) ( x2 * invpio2 + half[xsb2] ); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 &= 3; + n1 &= 3; + n2 &= 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + xsb0 = HI(&x0); + i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31; + xsb1 = HI(&x1); + i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2; + xsb2 = HI(&x2); + i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4; + switch ( i ) + { + double t0, t1, t2, z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 1: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb1 |= 1; + xsb2 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = ( a2 + t2 ); + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb2 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 3: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb2 |= 1; + a2 = __vlibm_TBL_sincos_hi[j2+n2]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; + *py0 = t0; + *py1 = t1; + *py2 = ( a2 + t2 ); + break; + + case 4: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 5: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = ( a1 + t1 ); + *py2 = t2; + break; + + case 6: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = ( a0 + t0 ); + *py1 = t1; + *py2 = t2; + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double t0, t1, z0, z1; + unsigned j0, j1; + int n0, n1; + + if ( i > 1 ) + { + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + fn1 = (double) n1; + n1 &= 3; + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + xsb1 = HI(&x1); + if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] ) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = ( x1 - t1 ) + y1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1 = __vlibm_TBL_sincos_hi[j1+n1]; + t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = ( a1 + t1 ); + } + } + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + fn0 = (double) n0; + n0 &= 3; + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + xsb0 = HI(&x0); + if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] ) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = ( x0 - t0 ) + y0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a0 = __vlibm_TBL_sincos_hi[j0+n0]; + t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = ( a0 + t0 ); + } + } + + if ( biguns ) + __vlibm_vsin_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb ); +} diff --git a/usr/src/libm/src/mvec/__vsinbigf.c b/usr/src/libm/src/mvec/__vsinbigf.c new file mode 100644 index 0000000..259b547 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsinbigf.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsinbigf.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m( double *, double *, int, int, int ); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsin_bigf( int n, float * restrict x, int stridex, float * restrict y, + int stridey ) +{ + for ( ; n--; x += stridex, y += stridey ) + { + double tx, tt[3], ty[2], t, w, z, a; + unsigned hx, xsb; + int e0, nx, i, j; + + tx = *x; + hx = HI(&tx); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if ( hx <= 0x413921fb || hx >= 0x7ff00000 ) + continue; + e0 = ( hx >> 20 ) - 1046; + HI(&tx) = 0x41600000 | ( hx & 0xfffff ); + + tt[0] = (double)( (int) tx ); + tx = ( tx - tt[0] ) * two24; + if ( tx != zero ) + { + nx = 2; + tt[1] = (double)( (int) tx ); + tt[2] = ( tx - tt[1] ) * two24; + if ( tt[2] != zero ) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m( tt, ty, e0, nx, 2 ); + if ( xsb ) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = ( nx & 2 ) << 30; + hx = HI(&ty[0]); + if ( nx & 1 ) + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + } + if ( hx < 0x3fc40000 ) + { + z = ty[0] * ty[0]; + t = z * ( q1 + z * ( q2 + z * ( q3 + z * q4 ) ) ); + a = one + t; + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j+1]; + t = __vlibm_TBL_sincos_lo[j+1] - ( __vlibm_TBL_sincos_hi[j] * w - a * t ); + a += t; + } + } + else + { + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb ^= 0x80000000; + } + if ( hx < 0x3fc90000 ) + { + z = ty[0] * ty[0]; + t = z * ( p1 + z * ( p2 + z * ( p3 + z * p4 ) ) ); + a = ty[0] + ( ty[1] + ty[0] * t ); + } + else + { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + a = __vlibm_TBL_sincos_hi[j]; + t = ( __vlibm_TBL_sincos_hi[j+1] * w + a * t ) + __vlibm_TBL_sincos_lo[j]; + a += t; + } + } + if ( xsb ) a = -a; + *y = a; + } +} diff --git a/usr/src/libm/src/mvec/__vsincos.c b/usr/src/libm/src/mvec/__vsincos.c new file mode 100644 index 0000000..4f42422 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsincos.c @@ -0,0 +1,1545 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsincos.c 1.6 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* + * vsincos.c + * + * Vector sine and cosine function. Just slight modifications to vcos.c. + */ + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double + half[2] = { 0.5, -0.5 }, + one = 1.0, + invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */ + pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */ + pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ + pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ + pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + poly1[2]= { -1.666666666666629669805215138920301589656e-0001, + -4.999999999999931701464060878888294524481e-0001 }, + poly2[2]= { 8.333333332390951295683993455280336376663e-0003, + 4.166666666394861917535640593963708222319e-0002 }, + poly3[2]= { -1.984126237997976692791551778230098403960e-0004, + -1.388888552656142867832756687736851681462e-0003 }, + poly4[2]= { 2.753403624854277237649987622848330351110e-0006, + 2.478519423681460796618128289454530524759e-0005 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs( double ); +extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int ); + +/* + * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n. + * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n. + * + * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06. + * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. + * + * elts < 2^-27 use the approximation 1.0 ~ cos(x). + */ +void +__vsincos( int n, double * restrict x, int stridex, + double * restrict y, int stridey, + double * restrict c, int stridec ) +{ + double x0_or_one[4], x1_or_one[4], x2_or_one[4]; + double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; + double x0, x1, x2, + *py0, *py1, *py2, + *pc0, *pc1, *pc2, + *xsave, *ysave, *csave; + unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2; + int i, biguns, nsave, sxsave, sysave, scsave; + + nsave = n; + xsave = x; + sxsave = stridex; + ysave = y; + sysave = stridey; + csave = c; + scsave = stridec; + biguns = 0; + + do /* MAIN LOOP */ + { + + /* Gotos here so _break_ exits MAIN LOOP. */ +LOOP0: /* Find first arg in right range. */ + xsb0 = HI(x); /* get most significant word */ + hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ + if ( hx0 > 0x3fe921fb ) { + /* Too big: arg reduction needed, so leave for second part */ + biguns = 1; + x += stridex; + y += stridey; + c += stridec; + i = 0; + if ( --n <= 0 ) + break; + goto LOOP0; + } + if ( hx0 < 0x3e400000 ) { + /* Too small. cos x ~ 1, sin x ~ x. */ + volatile int v = *x; + *c = 1.0; + *y = *x; + x += stridex; + y += stridey; + c += stridec; + i = 0; + if ( --n <= 0 ) + break; + goto LOOP0; + } + x0 = *x; + py0 = y; + pc0 = c; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if ( --n <= 0 ) + break; + +LOOP1: /* Get second arg, same as above. */ + xsb1 = HI(x); + hx1 = xsb1 & ~0x80000000; + if ( hx1 > 0x3fe921fb ) + { + biguns = 1; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if ( --n <= 0 ) + break; + goto LOOP1; + } + if ( hx1 < 0x3e400000 ) + { + volatile int v = *x; + *c = 1.0; + *y = *x; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if ( --n <= 0 ) + break; + goto LOOP1; + } + x1 = *x; + py1 = y; + pc1 = c; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if ( --n <= 0 ) + break; + +LOOP2: /* Get third arg, same as above. */ + xsb2 = HI(x); + hx2 = xsb2 & ~0x80000000; + if ( hx2 > 0x3fe921fb ) + { + biguns = 1; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if ( --n <= 0 ) + break; + goto LOOP2; + } + if ( hx2 < 0x3e400000 ) + { + volatile int v = *x; + *c = 1.0; + *y = *x; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if ( --n <= 0 ) + break; + goto LOOP2; + } + x2 = *x; + py2 = y; + pc2 = c; + + /* + * 0x3fc40000 = 5/32 ~ 0.15625 + * Get msb after subtraction. Will be 1 only if + * hx0 - 5/32 is negative. + */ + i = ( hx2 - 0x3fc40000 ) >> 31; + i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2; + i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4; + switch ( i ) + { + double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2; + double w0, w1, w2; + double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2; + double z0, z1, z2; + unsigned j0, j1, j2; + + case 0: /* All are > 5/32 */ + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + + x0 -= t0; + x1 -= t1; + x2 -= t2; + + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 ); + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 ); + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1_1 = a2_1*w1 + a1_1*t1; + t1_2 = a2_2*w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + + break; + + case 1: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x0 -= t0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + /* cos_lo(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 ); + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 ); + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *pc2 = one + t2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + t2 = x2 + x2 * t2; + *py2 = t2; + + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x0 -= t0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 ); + + *pc0 = a2_0 + t2_0; + *pc1 = one + t1; + *pc2 = a2_2 + t2_2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t1_2 = a2_2*w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + *py0 = a1_0 + t1_0; + t1 = x1 + x1 * t1; + *py1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 3: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 ); + + *pc0 = a2_0 + t2_0; + *pc1 = one + t1; + *pc2 = one + t2; + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + + *py0 = a1_0 + t1_0; + t1 = x1 + x1 * t1; + *py1 = t1; + t2 = x2 + x2 * t2; + *py2 = t2; + + break; + + case 4: + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x1 -= t1; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + /* cos_lo(t) */ + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 ); + + *pc0 = one + t0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1_1 = a2_1*w1 + a1_1*t1; + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + t0 = x0 + x0 * t0; + *py0 = t0; + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + + break; + + case 5: + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 ); + + *pc0 = one + t0; + *pc1 = a2_1 + t2_1; + *pc2 = one + t2; + + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + + t0 = x0 + x0 * t0; + *py0 = t0; + *py1 = a1_1 + t1_1; + t2 = x2 + x2 * t2; + *py2 = t2; + + break; + + case 6: + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x2 -= t2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2]; + + a2_2 = __vlibm_TBL_sincos_hi[j2+1]; + + t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 ); + + *pc0 = one + t0; + *pc1 = one + t1; + *pc2 = a2_2 + t2_2; + + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2]; + + t0 = x0 + x0 * t0; + *py0 = t0; + t1 = x1 + x1 * t1; + *py1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 7: /* All are < 5/32 */ + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t2 = z2 * ( poly3[1] + z2 * poly4[1] ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) ); + *pc0 = one + t0; + *pc1 = one + t1; + *pc2 = one + t2; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t2 = z2 * ( poly3[0] + z2 * poly4[0] ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) ); + t0 = x0 + x0 * t0; + t1 = x1 + x1 * t1; + t2 = x2 + x2 * t2; + *py0 = t0; + *py1 = t1; + *py2 = t2; + break; + } + + x += stridex; + y += stridey; + c += stridec; + i = 0; + } while ( --n > 0 ); /* END MAIN LOOP */ + + /* + * CLEAN UP last 0, 1, or 2 elts. + */ + if ( i > 0 ) /* Clean up elts at tail. i < 3. */ + { + double a1_0, a1_1, a2_0, a2_1; + double w0, w1; + double t0, t1, t1_0, t1_1, t2_0, t2_1; + double z0, z1; + unsigned j0, j1; + + if ( i > 1 ) + { + if ( hx1 < 0x3fc40000 ) + { + z1 = x1 * x1; + t1 = z1 * ( poly3[1] + z1 * poly4[1] ); + t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) ); + t1 = one + t1; + *pc1 = t1; + t1 = z1 * ( poly3[0] + z1 * poly4[0] ); + t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) ); + t1 = x1 + x1 * t1; + *py1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 -= t1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1]; + a2_1 = __vlibm_TBL_sincos_hi[j1+1]; + t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 ); + *pc1 = a2_1 + t2_1; + t1_1 = a2_1*w1 + a1_1*t1; + t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1]; + *py1 = a1_1 + t1_1; + } + } + if ( hx0 < 0x3fc40000 ) + { + z0 = x0 * x0; + t0 = z0 * ( poly3[1] + z0 * poly4[1] ); + t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) ); + t0 = one + t0; + *pc0 = t0; + t0 = z0 * ( poly3[0] + z0 * poly4[0] ); + t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) ); + t0 = x0 + x0 * t0; + *py0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 -= t0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */ + a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ + t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 ); + *pc0 = a2_0 + t2_0; + t1_0 = a2_0*w0 + a1_0*t0; + t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */ + *py0 = a1_0 + t1_0; + } + } /* END CLEAN UP */ + + if ( !biguns ) + return; + + /* + * Take care of BIGUNS. + */ + n = nsave; + x = xsave; + stridex = sxsave; + y = ysave; + stridey = sysave; + c = csave; + stridec = scsave; + biguns = 0; + + x0_or_one[1] = 1.0; + x1_or_one[1] = 1.0; + x2_or_one[1] = 1.0; + x0_or_one[3] = -1.0; + x1_or_one[3] = -1.0; + x2_or_one[3] = -1.0; + y0_or_zero[1] = 0.0; + y1_or_zero[1] = 0.0; + y2_or_zero[1] = 0.0; + y0_or_zero[3] = 0.0; + y1_or_zero[3] = 0.0; + y2_or_zero[3] = 0.0; + + do + { + double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; + unsigned hx; + int n0, n1, n2; + + /* + * Find 3 more to work on: Not already done, not too big. + */ +loop0: + hx = HI(x); + xsb0 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= 0x3fe921fb ) /* Done above. */ + { + x += stridex; + y += stridey; + c += stridec; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */ + { + if ( hx >= 0x7ff00000 ) /* Inf or NaN */ + { + x0 = *x; + *y = x0 - x0; + *c = x0 - x0; + } + else { + biguns = 1; + } + x += stridex; + y += stridey; + c += stridec; + i = 0; + if ( --n <= 0 ) + break; + goto loop0; + } + x0 = *x; + py0 = y; + pc0 = c; + x += stridex; + y += stridey; + c += stridec; + i = 1; + if ( --n <= 0 ) + break; + +loop1: + hx = HI(x); + xsb1 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= 0x3fe921fb ) + { + x += stridex; + y += stridey; + c += stridec; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x1 = *x; + *y = x1 - x1; + *c = x1 - x1; + } + else { + biguns = 1; + } + x += stridex; + y += stridey; + c += stridec; + i = 1; + if ( --n <= 0 ) + break; + goto loop1; + } + x1 = *x; + py1 = y; + pc1 = c; + x += stridex; + y += stridey; + c += stridec; + i = 2; + if ( --n <= 0 ) + break; + +loop2: + hx = HI(x); + xsb2 = hx >> 31; + hx &= ~0x80000000; + if ( hx <= 0x3fe921fb ) + { + x += stridex; + y += stridey; + c += stridec; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + if ( hx > 0x413921fb ) + { + if ( hx >= 0x7ff00000 ) + { + x2 = *x; + *y = x2 - x2; + *c = x2 - x2; + } + else { + biguns = 1; + } + x += stridex; + y += stridey; + c += stridec; + i = 2; + if ( --n <= 0 ) + break; + goto loop2; + } + x2 = *x; + py2 = y; + pc2 = c; + + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + n2 = (int) ( x2 * invpio2 + half[xsb2] ); + fn0 = (double) n0; + fn1 = (double) n1; + fn2 = (double) n2; + n0 &= 3; + n1 &= 3; + n2 &= 3; + a0 = x0 - fn0 * pio2_1; + a1 = x1 - fn1 * pio2_1; + a2 = x2 - fn2 * pio2_1; + w0 = fn0 * pio2_2; + w1 = fn1 * pio2_2; + w2 = fn2 * pio2_2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3 - y0; + w1 = fn1 * pio2_3 - y1; + w2 = fn2 * pio2_3 - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + a0 = x0; + a1 = x1; + a2 = x2; + w0 = fn0 * pio2_3t - y0; + w1 = fn1 * pio2_3t - y1; + w2 = fn2 * pio2_3t - y2; + x0 = a0 - w0; + x1 = a1 - w1; + x2 = a2 - w2; + y0 = ( a0 - x0 ) - w0; + y1 = ( a1 - x1 ) - w1; + y2 = ( a2 - x2 ) - w2; + xsb2 = HI(&x2); + i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31; + xsb1 = HI(&x1); + i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2; + xsb0 = HI(&x0); + i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4; + switch ( i ) + { + double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2; + double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2; + double z0, z1, z2; + unsigned j0, j1, j2; + + case 0: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t1) = 0; + LO(&t2) = 0; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + xsb2 |= 1; + + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 ); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 ); + + w0 *= a2_0; + w1 *= a2_1; + w2 *= a2_2; + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + t1_0 = w0 + a1_0*t0; + t1_1 = w1 + a1_1*t1; + t1_2 = w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + + break; + + case 1: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t0) = j0; + HI(&t1) = j1; + LO(&t0) = 0; + LO(&t1) = 0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb1 = ( xsb1 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb0 |= 1; + xsb1 |= 1; + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 ); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + + *pc0 = a2_0 + t2_0; + *pc1 = a2_1 + t2_1; + *py2 = t2; + + n2 = (n2 + 1) & 3; + j2 = (j2 + 1) & 1; + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + + t1_0 = a2_0*w0 + a1_0*t0; + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + + *py0 = a1_0 + t1_0; + *py1 = a1_1 + t1_1; + *pc2 = t2; + + break; + + case 2: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + HI(&t2) = j2; + LO(&t0) = 0; + LO(&t2) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb0 |= 1; + xsb2 |= 1; + + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 ); + + *pc0 = a2_0 + t2_0; + *py1 = t1; + *pc2 = a2_2 + t2_2; + + n1 = (n1 + 1) & 3; + j1 = (j1 + 1) & 1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1_2 = a2_2*w2 + a1_2*t2; + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + *py0 = a1_0 + t1_0; + *pc1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 3: + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + j1 = n1 & 1; + j2 = n2 & 1; + HI(&t0) = j0; + LO(&t0) = 0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + x0 = ( x0 - t0 ) + y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( qq1 + z0 * qq2 ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + + *pc0 = a2_0 + t2_0; + *py1 = t1; + *py2 = t2; + + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + j1 = (j1 + 1) & 1; + j2 = (j2 + 1) & 1; + + t1_0 = a2_0*w0 + a1_0*t0; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + + *py0 = a1_0 + t1_0; + *pc1 = t1; + *pc2 = t2; + + break; + + case 4: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + HI(&t2) = j2; + LO(&t1) = 0; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + xsb2 = ( xsb2 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb1 |= 1; + xsb2 |= 1; + + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 ); + + *py0 = t0; + *pc1 = a2_1 + t2_1; + *pc2 = a2_2 + t2_2; + + n0 = (n0 + 1) & 3; + j0 = (j0 + 1) & 1; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1_1 = a2_1*w1 + a1_1*t1; + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + *py1 = a1_1 + t1_1; + *py2 = a1_2 + t1_2; + *pc0 = t0; + + break; + + case 5: + j0 = n0 & 1; + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + j2 = n2 & 1; + HI(&t1) = j1; + LO(&t1) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + x1 = ( x1 - t1 ) + y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( qq1 + z1 * qq2 ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + + *py0 = t0; + *pc1 = a2_1 + t2_1; + *py2 = t2; + + n0 = (n0 + 1) & 3; + n2 = (n2 + 1) & 3; + j0 = (j0 + 1) & 1; + j2 = (j2 + 1) & 1; + + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1_1 = a2_1*w1 + a1_1*t1; + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + + *pc0 = t0; + *py1 = a1_1 + t1_1; + *pc2 = t2; + + break; + + case 6: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = ( xsb2 + 0x4000 ) & 0xffff8000; + HI(&t2) = j2; + LO(&t2) = 0; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + x2 = ( x2 - t2 ) + y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( qq1 + z2 * qq2 ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); + j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb2 = ( xsb2 >> 30 ) & 2; + n2 ^= ( xsb2 & ~( n2 << 1 ) ); + xsb2 |= 1; + + a1_2 = __vlibm_TBL_sincos_hi[j2+n2]; + a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)]; + + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 ); + + *py0 = t0; + *py1 = t1; + *pc2 = a2_2 + t2_2; + + n0 = (n0 + 1) & 3; + n1 = (n1 + 1) & 3; + j0 = (j0 + 1) & 1; + j1 = (j1 + 1) & 1; + + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1_2 = a2_2*w2 + a1_2*t2; + + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1_2 += __vlibm_TBL_sincos_lo[j2+n2]; + + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + + *pc0 = t0; + *pc1 = t1; + *py2 = a1_2 + t1_2; + + break; + + case 7: + j0 = n0 & 1; + j1 = n1 & 1; + j2 = n2 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + x2_or_one[0] = x2; + x2_or_one[2] = -x2; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + y2_or_zero[0] = y2; + y2_or_zero[2] = -y2; + z0 = x0 * x0; + z1 = x1 * x1; + z2 = x2 * x2; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *py0 = t0; + *py1 = t1; + *py2 = t2; + + n0 = (n0 + 1) & 3; + n1 = (n1 + 1) & 3; + n2 = (n2 + 1) & 3; + j0 = (j0 + 1) & 1; + j1 = (j1 + 1) & 1; + j2 = (j2 + 1) & 1; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); + *pc0 = t0; + *pc1 = t1; + *pc2 = t2; + break; + } + + x += stridex; + y += stridey; + c += stridec; + i = 0; + } while ( --n > 0 ); + + if ( i > 0 ) + { + double a1_0, a1_1, a2_0, a2_1; + double t0, t1, t1_0, t1_1, t2_0, t2_1; + double fn0, fn1, a0, a1, w0, w1, y0, y1; + double z0, z1; + unsigned hx, j0, j1; + int n0, n1; + + if ( i > 1 ) + { + n1 = (int) ( x1 * invpio2 + half[xsb1] ); + fn1 = (double) n1; + n1 &= 3; + a1 = x1 - fn1 * pio2_1; + w1 = fn1 * pio2_2; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3 - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + a1 = x1; + w1 = fn1 * pio2_3t - y1; + x1 = a1 - w1; + y1 = ( a1 - x1 ) - w1; + xsb1 = HI(&x1); + if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 ) + { + j1 = n1 & 1; + x1_or_one[0] = x1; + x1_or_one[2] = -x1; + y1_or_zero[0] = y1; + y1_or_zero[2] = -y1; + z1 = x1 * x1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + *py1 = t1; + n1 = (n1 + 1) & 3; + j1 = (j1 + 1) & 1; + t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); + t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); + t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); + *pc1 = t1; + } + else + { + j1 = ( xsb1 + 0x4000 ) & 0xffff8000; + HI(&t1) = j1; + LO(&t1) = 0; + x1 = ( x1 - t1 ) + y1; + z1 = x1 * x1; + t1 = z1 * ( qq1 + z1 * qq2 ); + w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); + j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb1 = ( xsb1 >> 30 ) & 2; + n1 ^= ( xsb1 & ~( n1 << 1 ) ); + xsb1 |= 1; + a1_1 = __vlibm_TBL_sincos_hi[j1+n1]; + a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)]; + t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 ); + *pc1 = a2_1 + t2_1; + t1_1 = a2_1*w1 + a1_1*t1; + t1_1 += __vlibm_TBL_sincos_lo[j1+n1]; + *py1 = a1_1 + t1_1; + } + } + n0 = (int) ( x0 * invpio2 + half[xsb0] ); + fn0 = (double) n0; + n0 &= 3; + a0 = x0 - fn0 * pio2_1; + w0 = fn0 * pio2_2; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3 - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + a0 = x0; + w0 = fn0 * pio2_3t - y0; + x0 = a0 - w0; + y0 = ( a0 - x0 ) - w0; + xsb0 = HI(&x0); + if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 ) + { + j0 = n0 & 1; + x0_or_one[0] = x0; + x0_or_one[2] = -x0; + y0_or_zero[0] = y0; + y0_or_zero[2] = -y0; + z0 = x0 * x0; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + *py0 = t0; + n0 = (n0 + 1) & 3; + j0 = (j0 + 1) & 1; + t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); + t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); + t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); + *pc0 = t0; + } + else + { + j0 = ( xsb0 + 0x4000 ) & 0xffff8000; + HI(&t0) = j0; + LO(&t0) = 0; + x0 = ( x0 - t0 ) + y0; + z0 = x0 * x0; + t0 = z0 * ( qq1 + z0 * qq2 ); + w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); + j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; + xsb0 = ( xsb0 >> 30 ) & 2; + n0 ^= ( xsb0 & ~( n0 << 1 ) ); + xsb0 |= 1; + a1_0 = __vlibm_TBL_sincos_hi[j0+n0]; + a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)]; + t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 ); + *pc0 = a2_0 + t2_0; + t1_0 = a2_0*w0 + a1_0*t0; + t1_0 += __vlibm_TBL_sincos_lo[j0+n0]; + *py0 = a1_0 + t1_0; + } + } + + if ( biguns ) { + __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb ); + } +} diff --git a/usr/src/libm/src/mvec/__vsincosbig.c b/usr/src/libm/src/mvec/__vsincosbig.c new file mode 100644 index 0000000..dffd5b7 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsincosbig.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsincosbig.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m( double *, double *, int, int, int ); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsincos_big( int n, double * restrict x, int stridex, + double * restrict ss, int stridess, + double * restrict cc, int stridecc, int thresh ) +{ + for ( ; n--; x += stridex, ss += stridess, cc += stridecc ) + { + double ts, tc, tx, tt[3], ty[2], t, w, z, c, s; + unsigned hx, xsb; + int e0, nx, i, j; + + hx = HI(x); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if ( hx <= thresh || hx >= 0x7ff00000 ) + continue; + + /* + * Argument reduction part. + */ + e0 = ( hx >> 20 ) - 1046; + HI(&tx) = 0x41600000 | ( hx & 0xfffff ); + LO(&tx) = LO(x); + tt[0] = (double)( (int) tx ); + tx = ( tx - tt[0] ) * two24; + if ( tx != zero ) + { + nx = 2; + tt[1] = (double)( (int) tx ); + tt[2] = ( tx - tt[1] ) * two24; + if ( tt[2] != zero ) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m( tt, ty, e0, nx, 2 ); + if ( xsb ) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + hx = HI(&ty[0]); + xsb = 0; + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb = 1; + } + if ( hx < 0x3fc40000 ) + { + z = ty[0] * ty[0]; + t = z * ( q1 + z * ( q2 + z * ( q3 + z * q4 ) ) ); + c = one + t; + t = z * ( p1 + z * ( p2 + z * ( p3 + z * p4 ) ) ); + s = ty[0] + ( ty[1] + ty[0] * t ); + } + else { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + + c = __vlibm_TBL_sincos_hi[j+1]; + tc = __vlibm_TBL_sincos_lo[j+1] - ( __vlibm_TBL_sincos_hi[j] * w - c * t ); + c += tc; + + s = __vlibm_TBL_sincos_hi[j]; + ts = ( __vlibm_TBL_sincos_hi[j+1] * w + s * t ) + __vlibm_TBL_sincos_lo[j]; + s += ts; + } + if ( xsb ) { + s = -s; + } + + switch ( nx & 3 ) { + case 0: + *ss = s; + *cc = c; + break; + + case 1: + *ss = c; + *cc = -s; + break; + + case 2: + *ss = -s; + *cc = -c; + break; + + case 3: + *ss = -c; + *cc = s; + break; + } + } +} diff --git a/usr/src/libm/src/mvec/__vsincosbigf.c b/usr/src/libm/src/mvec/__vsincosbigf.c new file mode 100644 index 0000000..2ab0b35 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsincosbigf.c @@ -0,0 +1,170 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsincosbigf.c 1.5 06/01/31 SMI" + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; +extern int __vlibm_rem_pio2m( double *, double *, int, int, int ); + +static const double + zero = 0.0, + one = 1.0, + two24 = 16777216.0, + pp1 = -1.666666666605760465276263943134982554676e-0001, + pp2 = 8.333261209690963126718376566146180944442e-0003, + p1 = -1.666666666666629669805215138920301589656e-0001, + p2 = 8.333333332390951295683993455280336376663e-0003, + p3 = -1.984126237997976692791551778230098403960e-0004, + p4 = 2.753403624854277237649987622848330351110e-0006, + qq1 = -4.999999999977710986407023955908711557870e-0001, + qq2 = 4.166654863857219350645055881018842089580e-0002, + q1 = -4.999999999999931701464060878888294524481e-0001, + q2 = 4.166666666394861917535640593963708222319e-0002, + q3 = -1.388888552656142867832756687736851681462e-0003, + q4 = 2.478519423681460796618128289454530524759e-0005; + +void +__vlibm_vsincos_bigf( int n, float * restrict x, int stridex, + float * restrict ss, int stridess, float * restrict cc, int stridecc ) +{ + for ( ; n--; x += stridex, ss += stridess, cc += stridecc ) + { + double ts, tc, tx, tt[3], ty[2], t, w, z, c, s; + unsigned hx, xsb; + int e0, nx, i, j; + + tx = *x; + hx = HI(&tx); + xsb = hx & 0x80000000; + hx &= ~0x80000000; + if ( hx <= 0x413921fb || hx >= 0x7ff00000 ) + continue; + e0 = ( hx >> 20 ) - 1046; + HI(&tx) = 0x41600000 | ( hx & 0xfffff ); + + tt[0] = (double)( (int) tx ); + tx = ( tx - tt[0] ) * two24; + if ( tx != zero ) + { + nx = 2; + tt[1] = (double)( (int) tx ); + tt[2] = ( tx - tt[1] ) * two24; + if ( tt[2] != zero ) + nx = 3; + } + else + { + nx = 1; + tt[1] = tt[2] = zero; + } + nx = __vlibm_rem_pio2m( tt, ty, e0, nx, 2 ); + if ( xsb ) + { + nx = -nx; + ty[0] = -ty[0]; + ty[1] = -ty[1]; + } + + /* now nx and ty[*] are the quadrant and reduced arg */ + xsb = 0; + hx = HI(&ty[0]); + if ( hx & 0x80000000 ) + { + ty[0] = -ty[0]; + ty[1] = -ty[1]; + hx &= ~0x80000000; + xsb = 1; + } + if ( hx < 0x3fc40000 ) + { + z = ty[0] * ty[0]; + t = z * ( q1 + z * ( q2 + z * ( q3 + z * q4 ) ) ); + c = one + t; + + t = z * ( p1 + z * ( p2 + z * ( p3 + z * p4 ) ) ); + s = ty[0] + ( ty[1] + ty[0] * t ); + } + else { + j = ( hx + 0x4000 ) & 0x7fff8000; + HI(&t) = j; + LO(&t) = 0; + ty[0] = ( ty[0] - t ) + ty[1]; + z = ty[0] * ty[0]; + t = z * ( qq1 + z * qq2 ); + w = ty[0] * ( one + z * ( pp1 + z * pp2 ) ); + j = ( ( j - 0x3fc40000 ) >> 13 ) & ~3; + + c = __vlibm_TBL_sincos_hi[j+1]; + tc = __vlibm_TBL_sincos_lo[j+1] - ( __vlibm_TBL_sincos_hi[j] * w - c * t ); + c += tc; + + s = __vlibm_TBL_sincos_hi[j]; + ts = ( __vlibm_TBL_sincos_hi[j+1] * w + s * t ) + __vlibm_TBL_sincos_lo[j]; + s += ts; + } + if ( xsb ) { + s = -s; + } + + switch ( nx & 3 ) { + case 0: + *ss = s; + *cc = c; + break; + + case 1: + *ss = c; + *cc = -s; + break; + + case 2: + *ss = -s; + *cc = -c; + break; + + case 3: + *ss = -c; + *cc = s; + break; + } + } +} diff --git a/usr/src/libm/src/mvec/__vsincosf.c b/usr/src/libm/src/mvec/__vsincosf.c new file mode 100644 index 0000000..497e781 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsincosf.c @@ -0,0 +1,313 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsincosf.c 1.4 06/01/23 SMI" + +/* + * __vsincosf: single precision vector sincos + * + * Algorithm: + * + * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+ + * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+ + * z*C2))), where z = x*x, all evaluated in double precision. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double C[] = { + -1.66666552424430847168e-01, /* 2^ -3 * -1.5555460000000 */ + 8.33219196647405624390e-03, /* 2^ -7 * 1.11077E0000000 */ + -1.95187909412197768688e-04, /* 2^-13 * -1.9956B60000000 */ + 1.0, + -0.5, + 4.16666455566883087158e-02, /* 2^ -5 * 1.55554A0000000 */ + -1.38873036485165357590e-03, /* 2^-10 * -1.6C0C1E0000000 */ + 2.44309903791872784495e-05, /* 2^-16 * 1.99E24E0000000 */ + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 6755399441055744.0, /* 2^ 52 * 1.8000000000000 */ + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define one C[3] +#define mhalf C[4] +#define C0 C[5] +#define C1 C[6] +#define C2 C[7] +#define invpio2 C[8] +#define c3two51 C[9] +#define pio2_1 C[10] +#define pio2_t C[11] + +#define PREPROCESS(N, sindex, cindex, label) \ + hx = *(int *)x; \ + ix = hx & 0x7fffffff; \ + t = *x; \ + x += stridex; \ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ \ + if (ix == 0) { \ + s[sindex] = t; \ + c[cindex] = one; \ + goto label; \ + } \ + y##N = (double)t; \ + n##N = 0; \ + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ \ + y##N = (double)t; \ + medium = 1; \ + } else { \ + if (ix >= 0x7f800000) { /* inf or nan */ \ + s[sindex] = c[cindex] = t / t; \ + goto label; \ + } \ + z##N = y##N = (double)t; \ + hx = HI(y##N); \ + n##N = ((hx >> 20) & 0x7ff) - 1046; \ + HI(z##N) = (hx & 0xfffff) | 0x41600000; \ + n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0); \ + if (hx < 0) { \ + y##N = -y##N; \ + n##N = -n##N; \ + } \ + z##N = y##N * y##N; \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * \ + (S1 + z##N * S2))); \ + g##N = (float)(one + z##N * (mhalf + z##N * (C0 + \ + z##N * (C1 + z##N * C2)))); \ + if (n##N & 2) { \ + f##N = -f##N; \ + g##N = -g##N; \ + } \ + if (n##N & 1) { \ + s[sindex] = g##N; \ + c[cindex] = -f##N; \ + } else { \ + s[sindex] = f##N; \ + c[cindex] = g##N; \ + } \ + goto label; \ + } + +#define PROCESS(N) \ + if (medium) { \ + z##N = y##N * invpio2 + c3two51; \ + n##N = LO(z##N); \ + z##N -= c3two51; \ + y##N = (y##N - z##N * pio2_1) - z##N * pio2_t; \ + } \ + z##N = y##N * y##N; \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + z##N * S2)));\ + g##N = (float)(one + z##N * (mhalf + z##N * (C0 + z##N * \ + (C1 + z##N * C2)))); \ + if (n##N & 2) { \ + f##N = -f##N; \ + g##N = -g##N; \ + } \ + if (n##N & 1) { \ + *s = g##N; \ + *c = -f##N; \ + } else { \ + *s = f##N; \ + *c = g##N; \ + } \ + s += strides; \ + c += stridec + +void +__vsincosf(int n, float *restrict x, int stridex, + float *restrict s, int strides, float *restrict c, int stridec) +{ + double y0, y1, y2, y3; + double z0, z1, z2, z3; + float f0, f1, f2, f3, t; + float g0, g1, g2, g3; + int n0, n1, n2, n3, hx, ix, medium; + + s -= strides; + c -= stridec; + + for (;;) { +begin: + s += strides; + c += stridec; + + if (--n < 0) + break; + + medium = 0; + PREPROCESS(0, 0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, strides, stridec, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (strides << 1), (stridec << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (strides << 1) + strides, + (stridec << 1) + stridec, process3); + + if (medium) { + z0 = y0 * invpio2 + c3two51; + z1 = y1 * invpio2 + c3two51; + z2 = y2 * invpio2 + c3two51; + z3 = y3 * invpio2 + c3two51; + + n0 = LO(z0); + n1 = LO(z1); + n2 = LO(z2); + n3 = LO(z3); + + z0 -= c3two51; + z1 -= c3two51; + z2 -= c3two51; + z3 -= c3two51; + + y0 = (y0 - z0 * pio2_1) - z0 * pio2_t; + y1 = (y1 - z1 * pio2_1) - z1 * pio2_t; + y2 = (y2 - z2 * pio2_1) - z2 * pio2_t; + y3 = (y3 - z3 * pio2_1) - z3 * pio2_t; + } + + z0 = y0 * y0; + z1 = y1 * y1; + z2 = y2 * y2; + z3 = y3 * y3; + + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + + g0 = (float)(one + z0 * (mhalf + z0 * (C0 + z0 * + (C1 + z0 * C2)))); + g1 = (float)(one + z1 * (mhalf + z1 * (C0 + z1 * + (C1 + z1 * C2)))); + g2 = (float)(one + z2 * (mhalf + z2 * (C0 + z2 * + (C1 + z2 * C2)))); + g3 = (float)(one + z3 * (mhalf + z3 * (C0 + z3 * + (C1 + z3 * C2)))); + + if (n0 & 2) { + f0 = -f0; + g0 = -g0; + } + if (n1 & 2) { + f1 = -f1; + g1 = -g1; + } + if (n2 & 2) { + f2 = -f2; + g2 = -g2; + } + if (n3 & 2) { + f3 = -f3; + g3 = -g3; + } + + if (n0 & 1) { + *s = g0; + *c = -f0; + } else { + *s = f0; + *c = g0; + } + s += strides; + c += stridec; + + if (n1 & 1) { + *s = g1; + *c = -f1; + } else { + *s = f1; + *c = g1; + } + s += strides; + c += stridec; + + if (n2 & 1) { + *s = g2; + *c = -f2; + } else { + *s = f2; + *c = g2; + } + s += strides; + c += stridec; + + if (n3 & 1) { + *s = g3; + *c = -f3; + } else { + *s = f3; + *c = g3; + } + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/libm/src/mvec/__vsinf.c b/usr/src/libm/src/mvec/__vsinf.c new file mode 100644 index 0000000..a40417b --- /dev/null +++ b/usr/src/libm/src/mvec/__vsinf.c @@ -0,0 +1,380 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsinf.c 1.4 06/01/23 SMI" + +/* + * __vsinf: single precision vector sin + * + * Algorithm: + * + * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+ + * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+ + * z*C2))), where z = x*x, all evaluated in double precision. + * + * Accuracy: + * + * The largest error is less than 0.6 ulps. + */ + +#include + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int *)&x) +#define LO(x) *(unsigned *)&x +#else +#define HI(x) *(int *)&x +#define LO(x) *(1+(unsigned *)&x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern int __vlibm_rem_pio2m(double *, double *, int, int, int); + +static const double C[] = { + -1.66666552424430847168e-01, /* 2^ -3 * -1.5555460000000 */ + 8.33219196647405624390e-03, /* 2^ -7 * 1.11077E0000000 */ + -1.95187909412197768688e-04, /* 2^-13 * -1.9956B60000000 */ + 1.0, + -0.5, + 4.16666455566883087158e-02, /* 2^ -5 * 1.55554A0000000 */ + -1.38873036485165357590e-03, /* 2^-10 * -1.6C0C1E0000000 */ + 2.44309903791872784495e-05, /* 2^-16 * 1.99E24E0000000 */ + 0.636619772367581343075535, /* 2^ -1 * 1.45F306DC9C883 */ + 6755399441055744.0, /* 2^ 52 * 1.8000000000000 */ + 1.570796326734125614166, /* 2^ 0 * 1.921FB54400000 */ + 6.077100506506192601475e-11, /* 2^-34 * 1.0B4611A626331 */ +}; + +#define S0 C[0] +#define S1 C[1] +#define S2 C[2] +#define one C[3] +#define mhalf C[4] +#define C0 C[5] +#define C1 C[6] +#define C2 C[7] +#define invpio2 C[8] +#define c3two51 C[9] +#define pio2_1 C[10] +#define pio2_t C[11] + +#define PREPROCESS(N, index, label) \ + hx = *(int *)x; \ + ix = hx & 0x7fffffff; \ + t = *x; \ + x += stridex; \ + if (ix <= 0x3f490fdb) { /* |x| < pi/4 */ \ + if (ix == 0) { \ + y[index] = t; \ + goto label; \ + } \ + y##N = (double)t; \ + n##N = 0; \ + } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */ \ + y##N = (double)t; \ + medium = 1; \ + } else { \ + if (ix >= 0x7f800000) { /* inf or nan */ \ + y[index] = t / t; \ + goto label; \ + } \ + z##N = y##N = (double)t; \ + hx = HI(y##N); \ + n##N = ((hx >> 20) & 0x7ff) - 1046; \ + HI(z##N) = (hx & 0xfffff) | 0x41600000; \ + n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0); \ + if (hx < 0) { \ + y##N = -y##N; \ + n##N = -n##N; \ + } \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * \ + (C0 + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + \ + z##N * (S1 + z##N * S2))); \ + } \ + y[index] = (n##N & 2)? -f##N : f##N; \ + goto label; \ + } + +#define PROCESS(N) \ + if (medium) { \ + z##N = y##N * invpio2 + c3two51; \ + n##N = LO(z##N); \ + z##N -= c3two51; \ + y##N = (y##N - z##N * pio2_1) - z##N * pio2_t; \ + } \ + z##N = y##N * y##N; \ + if (n##N & 1) { /* compute cos y */ \ + f##N = (float)(one + z##N * (mhalf + z##N * (C0 + \ + z##N * (C1 + z##N * C2)))); \ + } else { /* compute sin y */ \ + f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + \ + z##N * S2))); \ + } \ + *y = (n##N & 2)? -f##N : f##N; \ + y += stridey + +void +__vsinf(int n, float *restrict x, int stridex, float *restrict y, + int stridey) +{ + double y0, y1, y2, y3; + double z0, z1, z2, z3; + float f0, f1, f2, f3, t; + int n0, n1, n2, n3, hx, ix, medium; + + y -= stridey; + + for (;;) { +begin: + y += stridey; + + if (--n < 0) + break; + + medium = 0; + PREPROCESS(0, 0, begin); + + if (--n < 0) + goto process1; + + PREPROCESS(1, stridey, process1); + + if (--n < 0) + goto process2; + + PREPROCESS(2, (stridey << 1), process2); + + if (--n < 0) + goto process3; + + PREPROCESS(3, (stridey << 1) + stridey, process3); + + if (medium) { + z0 = y0 * invpio2 + c3two51; + z1 = y1 * invpio2 + c3two51; + z2 = y2 * invpio2 + c3two51; + z3 = y3 * invpio2 + c3two51; + + n0 = LO(z0); + n1 = LO(z1); + n2 = LO(z2); + n3 = LO(z3); + + z0 -= c3two51; + z1 -= c3two51; + z2 -= c3two51; + z3 -= c3two51; + + y0 = (y0 - z0 * pio2_1) - z0 * pio2_t; + y1 = (y1 - z1 * pio2_1) - z1 * pio2_t; + y2 = (y2 - z2 * pio2_1) - z2 * pio2_t; + y3 = (y3 - z3 * pio2_1) - z3 * pio2_t; + } + + z0 = y0 * y0; + z1 = y1 * y1; + z2 = y2 * y2; + z3 = y3 * y3; + + hx = (n0 & 1) | ((n1 & 1) << 1) | ((n2 & 1) << 2) | + ((n3 & 1) << 3); + switch (hx) { + case 0: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 1: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 2: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 3: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 4: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 5: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 6: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 7: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2))); + break; + + case 8: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 9: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 10: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 11: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 12: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 13: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + case 14: + f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + break; + + default: + f0 = (float)(one + z0 * (mhalf + z0 * (C0 + + z0 * (C1 + z0 * C2)))); + f1 = (float)(one + z1 * (mhalf + z1 * (C0 + + z1 * (C1 + z1 * C2)))); + f2 = (float)(one + z2 * (mhalf + z2 * (C0 + + z2 * (C1 + z2 * C2)))); + f3 = (float)(one + z3 * (mhalf + z3 * (C0 + + z3 * (C1 + z3 * C2)))); + } + + *y = (n0 & 2)? -f0 : f0; + y += stridey; + *y = (n1 & 2)? -f1 : f1; + y += stridey; + *y = (n2 & 2)? -f2 : f2; + y += stridey; + *y = (n3 & 2)? -f3 : f3; + continue; + +process1: + PROCESS(0); + continue; + +process2: + PROCESS(0); + PROCESS(1); + continue; + +process3: + PROCESS(0); + PROCESS(1); + PROCESS(2); + } +} diff --git a/usr/src/libm/src/mvec/__vsqrt.c b/usr/src/libm/src/mvec/__vsqrt.c new file mode 100644 index 0000000..f9c42de --- /dev/null +++ b/usr/src/libm/src/mvec/__vsqrt.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsqrt.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +#define sqrt __sqrt + +extern double sqrt( double ); + +void +__vsqrt( int n, double * restrict x, int stridex, double * restrict y, int stridey ) +{ + for( ; n > 0 ; n-- ) + { + *y = sqrt(*x); + x += stridex; + y += stridey; + } +} + diff --git a/usr/src/libm/src/mvec/__vsqrtf.c b/usr/src/libm/src/mvec/__vsqrtf.c new file mode 100644 index 0000000..01f95b4 --- /dev/null +++ b/usr/src/libm/src/mvec/__vsqrtf.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vsqrtf.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +#define sqrtf __sqrtf + +extern float sqrtf( float ); + +void +__vsqrtf( int n, float * restrict x, int stridex, float * restrict y, int stridey ) +{ + for( ; n > 0 ; n-- ) + { + *y = sqrtf(*x); + x += stridex; + y += stridey; + } +} + diff --git a/usr/src/libm/src/mvec/__vz_abs.c b/usr/src/libm/src/mvec/__vz_abs.c new file mode 100644 index 0000000..8a2d38e --- /dev/null +++ b/usr/src/libm/src/mvec/__vz_abs.c @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vz_abs.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vhypot( int, double *, int, double *, int, double *, int ); + +void +__vz_abs( int n, double * restrict x, int stridex, double * restrict y, + int stridey ) +{ + stridex <<= 1; + __vhypot( n, x, stridex, x + 1, stridex, y, stridey ); +} diff --git a/usr/src/libm/src/mvec/__vz_exp.c b/usr/src/libm/src/mvec/__vz_exp.c new file mode 100644 index 0000000..da1ac0b --- /dev/null +++ b/usr/src/libm/src/mvec/__vz_exp.c @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vz_exp.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vexp( int, double *, int, double *, int ); +extern void __vsincos( int, double *, int, double *, int, double *, int ); + +void +__vz_exp( int n, double * restrict x, int stridex, double * restrict y, + int stridey, double * restrict tmp ) +{ + int i, j, k; + + stridex <<= 1; + stridey <<= 1; + __vexp( n, x, stridex, tmp, 1 ); + __vsincos( n, x + 1, stridex, y + 1, stridey, y, stridey ); + for ( i = j = 0; i < n; i++, j += stridey ) + { + y[j] *= tmp[i]; + y[j+1] *= tmp[i]; + } +} diff --git a/usr/src/libm/src/mvec/__vz_log.c b/usr/src/libm/src/mvec/__vz_log.c new file mode 100644 index 0000000..462bce7 --- /dev/null +++ b/usr/src/libm/src/mvec/__vz_log.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vz_log.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vatan2( int, double *, int, double *, int, double *, int ); +extern void __vhypot( int, double *, int, double *, int, double *, int ); +extern void __vlog( int, double *, int, double *, int ); + +void +__vz_log( int n, double * restrict x, int stridex, double * restrict y, + int stridey ) +{ + stridex <<= 1; + stridey <<= 1; + __vhypot( n, x, stridex, x + 1, stridex, y + 1, stridey ); + __vlog( n, y + 1, stridey, y, stridey ); + __vatan2( n, x + 1, stridex, x, stridex, y + 1, stridey ); +} diff --git a/usr/src/libm/src/mvec/__vz_pow.c b/usr/src/libm/src/mvec/__vz_pow.c new file mode 100644 index 0000000..419f156 --- /dev/null +++ b/usr/src/libm/src/mvec/__vz_pow.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__vz_pow.c 1.3 06/01/31 SMI" + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern void __vz_exp( int, double *, int, double *, int, double * ); +extern void __vz_log( int, double *, int, double *, int ); + +void +__vz_pow( int n, double * restrict x, int stridex, double * restrict y, + int stridey, double * restrict z, int stridez, double * restrict tmp ) +{ + double r; + int i, j, k; + + __vz_log( n, x, stridex, tmp, 1 ); + stridey <<= 1; + for ( i = j = 0; i < n; i++, j += stridey ) + { + k = i << 1; + r = y[j] * tmp[k] - y[j+1] * tmp[k+1]; + tmp[k+1] = y[j+1] * tmp[k] + y[j] * tmp[k+1]; + tmp[k] = r; + } + __vz_exp( n, tmp, 1, z, stridez, tmp + n + n ); +} diff --git a/usr/src/libm/src/mvec/amd64/__vsqrtf.S b/usr/src/libm/src/mvec/amd64/__vsqrtf.S new file mode 100644 index 0000000..96990c5 --- /dev/null +++ b/usr/src/libm/src/mvec/amd64/__vsqrtf.S @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsqrtf.S 1.2 06/01/23 SMI" + + .file "__vsqrtf.S" + +#include "libm.h" + + ENTRY(__vsqrtf) + push %rbp + movq %rsp,%rbp + +/ on entry: +/ %edi = n +/ %rsi = x +/ %edx = stridex +/ %rcx = y +/ %r8d = stridey + + movslq %edx,%rdx / sign extend and scale strides + shlq $2,%rdx + movslq %r8d,%r8 + shlq $2,%r8 + + cmpl $4,%edi + jl .finish + + cmpq $4,%rdx + jne .nonunit + cmpq $4,%r8 + jne .nonunit + +/ unit-stride case + movq %rdx,%r9 + shlq $2,%r9 + movq %r8,%r10 + shlq $2,%r10 + + .zalign 16,8 +.loop: + movups (%rsi),%xmm0 + addq %r9,%rsi + sqrtps %xmm0,%xmm0 + movups %xmm0,(%rcx) + addq %r10,%rcx + subl $4,%edi + cmpl $4,%edi + jge .loop + +.finish: + testl %edi,%edi + jle .done + +.finish_loop: + movss (%rsi),%xmm0 + addq %rdx,%rsi + sqrtss %xmm0,%xmm0 + movss %xmm0,(%rcx) + addq %r8,%rcx + decl %edi + jg .finish_loop + +.done: + leave + ret + + .zalign 16,8 +.nonunit: + movss (%rsi),%xmm0 + addq %rdx,%rsi + movss (%rsi),%xmm1 + addq %rdx,%rsi + movss (%rsi),%xmm2 + addq %rdx,%rsi + movss (%rsi),%xmm3 + addq %rdx,%rsi + + movlhps %xmm1,%xmm0 / xmm0: 0 x1 0 x0 + movlhps %xmm3,%xmm2 / xmm2: 0 x3 0 x2 + shufps $0x88,%xmm2,%xmm0 / xmm0: x3 x2 x1 x0 + + sqrtps %xmm0,%xmm0 / xmm0: y3 y2 y1 y0 + + movaps %xmm0,%xmm1 / xmm1: y3 y2 y1 y0 + shufps $0xf5,%xmm0,%xmm1 / xmm1: y3 y3 y1 y1 + movhlps %xmm0,%xmm2 / xmm2: 0 x3 y3 y2 + movhlps %xmm1,%xmm3 / xmm3: 0 0 y3 y3 + + movss %xmm0,(%rcx) + addq %r8,%rcx + movss %xmm1,(%rcx) + addq %r8,%rcx + movss %xmm2,(%rcx) + addq %r8,%rcx + movss %xmm3,(%rcx) + addq %r8,%rcx + + subl $4,%edi + cmpl $4,%edi + jge .nonunit + + jmp .finish + + SET_SIZE(__vsqrtf) diff --git a/usr/src/libm/src/mvec/vatan2_.c b/usr/src/libm/src/mvec/vatan2_.c new file mode 100644 index 0000000..678e4c7 --- /dev/null +++ b/usr/src/libm/src/mvec/vatan2_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vatan2_.c 1.5 06/01/31 SMI" + +extern void __vatan2( int, double *, int, double *, int, double *, int ); + +#pragma weak vatan2_ = __vatan2_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vatan2_( int *n, double *y, int *stridey, double *x, int *stridex, + double *z, int *stridez ) +{ + __vatan2( *n, y, *stridey, x, *stridex, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vatan2 */ +void +__vatan2_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vatan2( UpperBound - LowerBound + 1, yp + sy * LowerBound, sy, + xp + sx * LowerBound, sx, zp + sz * LowerBound, sz ); +} + +void +__vatan2_( int *n, double *y, int *stridey, double *x, int *stridex, + double *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vatan2( *n, y, *stridey, x, *stridex, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vatan2_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vatan2f_.c b/usr/src/libm/src/mvec/vatan2f_.c new file mode 100644 index 0000000..744d800 --- /dev/null +++ b/usr/src/libm/src/mvec/vatan2f_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vatan2f_.c 1.4 06/01/31 SMI" + +extern void __vatan2f( int, float *, int, float *, int, float *, int ); + +#pragma weak vatan2f_ = __vatan2f_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vatan2f_( int *n, float *y, int *stridey, float *x, int *stridex, + float *z, int *stridez ) +{ + __vatan2f( *n, y, *stridey, x, *stridex, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vatan2f */ +void +__vatan2f_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vatan2f( UpperBound - LowerBound + 1, yp + sy * LowerBound, sy, + xp + sx * LowerBound, sx, zp + sz * LowerBound, sz ); +} + +void +__vatan2f_( int *n, float *y, int *stridey, float *x, int *stridex, + float *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vatan2f( *n, y, *stridey, x, *stridex, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vatan2f_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vatan_.c b/usr/src/libm/src/mvec/vatan_.c new file mode 100644 index 0000000..3d9c65d --- /dev/null +++ b/usr/src/libm/src/mvec/vatan_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vatan_.c 1.4 06/01/31 SMI" + +extern void __vatan( int, double *, int, double *, int ); + +#pragma weak vatan_ = __vatan_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vatan_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + __vatan( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vatan */ +void +__vatan_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vatan( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vatan_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vatan( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vatan_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vatanf_.c b/usr/src/libm/src/mvec/vatanf_.c new file mode 100644 index 0000000..90c7b94 --- /dev/null +++ b/usr/src/libm/src/mvec/vatanf_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vatanf_.c 1.4 06/01/31 SMI" + +extern void __vatanf( int, float *, int, float *, int ); + +#pragma weak vatanf_ = __vatanf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vatanf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vatanf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vatanf */ +void +__vatanf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vatanf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vatanf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vatanf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vatanf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vc_abs_.c b/usr/src/libm/src/mvec/vc_abs_.c new file mode 100644 index 0000000..76efbf1 --- /dev/null +++ b/usr/src/libm/src/mvec/vc_abs_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vc_abs_.c 1.4 06/01/31 SMI" + +extern void __vc_abs( int, float *, int, float *, int ); + +#pragma weak vc_abs_ = __vc_abs_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vc_abs_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vc_abs( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vc_abs */ +void +__vc_abs_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vc_abs( UpperBound - LowerBound + 1, xp + ( sx << 1) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy ); +} + +void +__vc_abs_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vc_abs( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vc_abs_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vc_exp_.c b/usr/src/libm/src/mvec/vc_exp_.c new file mode 100644 index 0000000..efb54d4 --- /dev/null +++ b/usr/src/libm/src/mvec/vc_exp_.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vc_exp_.c 1.4 06/01/31 SMI" + +extern void __vc_exp( int, float *, int, float *, int, float * ); + +#pragma weak vc_exp_ = __vc_exp_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vc_exp_( int *n, float *x, int *stridex, float *y, int *stridey, + float *tmp ) +{ + __vc_exp( *n, x, *stridex, y, *stridey, tmp ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp, *tp; +static int sx, sy; + +/* m-function for parallel vc_exp */ +void +__vc_exp_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vc_exp( UpperBound - LowerBound + 1, xp + ( sx << 1 ) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy, tp + LowerBound ); +} + +void +__vc_exp_( int *n, float *x, int *stridex, float *y, int *stridey, + float *tmp ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vc_exp( *n, x, *stridex, y, *stridey, tmp ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + tp = tmp; + + m.MFunctionPtr = &__vc_exp_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vc_log_.c b/usr/src/libm/src/mvec/vc_log_.c new file mode 100644 index 0000000..1ef6154 --- /dev/null +++ b/usr/src/libm/src/mvec/vc_log_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vc_log_.c 1.4 06/01/31 SMI" + +extern void __vc_log( int, float *, int, float *, int ); + +#pragma weak vc_log_ = __vc_log_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vc_log_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vc_log( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vc_log */ +void +__vc_log_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vc_log( UpperBound - LowerBound + 1, xp + ( sx << 1) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy ); +} + +void +__vc_log_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vc_log( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vc_log_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vc_pow_.c b/usr/src/libm/src/mvec/vc_pow_.c new file mode 100644 index 0000000..42d3524 --- /dev/null +++ b/usr/src/libm/src/mvec/vc_pow_.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vc_pow_.c 1.4 06/01/31 SMI" + +extern void __vc_pow( int, float *, int, float *, int, float *, int, + float * ); + +#pragma weak vc_pow_ = __vc_pow_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vc_pow_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez, float *tmp ) +{ + __vc_pow( *n, x, *stridex, y, *stridey, z, *stridez, tmp ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp, *zp, *tp; +static int sx, sy, sz; + +/* m-function for parallel vc_pow */ +void +__vc_pow_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vc_pow( UpperBound - LowerBound + 1, xp + ( sx << 1 ) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy, zp + ( sz << 1 ) * LowerBound, sz, + tp + LowerBound ); +} + +void +__vc_pow_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez, float *tmp ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vc_pow( *n, x, *stridex, y, *stridey, z, *stridez, tmp ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + tp = tmp; + + m.MFunctionPtr = &__vc_pow_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vcos_.c b/usr/src/libm/src/mvec/vcos_.c new file mode 100644 index 0000000..a2d11e7 --- /dev/null +++ b/usr/src/libm/src/mvec/vcos_.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vcos_.c 1.8 06/01/31 SMI" + +extern void __vcos( int, double *, int, double *, int ); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include +#define sysinfo _sysinfo +#include + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vcos_ultra3( int, double *, int, double *, int ); +#endif + +#pragma weak vcos_ = __vcos_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vcos_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vcos_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vcos_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vcos( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vcos */ +void +__vcos_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vcos( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +#ifdef CHECK_ULTRA3 +/* m-function for ultra3 version of parallel vcos */ +void +__vcos_ultra3_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vcos_ultra3( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} +#endif + +void +__vcos_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vcos_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } +#endif + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { +#ifdef CHECK_ULTRA3 + if (u & 2) + __vcos_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vcos( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + +#ifdef CHECK_ULTRA3 + if (u & 2) + m.MFunctionPtr = &__vcos_ultra3_mfunc; + else +#endif + m.MFunctionPtr = &__vcos_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vcosf_.c b/usr/src/libm/src/mvec/vcosf_.c new file mode 100644 index 0000000..05d6815 --- /dev/null +++ b/usr/src/libm/src/mvec/vcosf_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vcosf_.c 1.4 06/01/31 SMI" + +extern void __vcosf( int, float *, int, float *, int ); + +#pragma weak vcosf_ = __vcosf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vcosf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vcosf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vcosf */ +void +__vcosf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vcosf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vcosf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vcosf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vcosf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vexp_.c b/usr/src/libm/src/mvec/vexp_.c new file mode 100644 index 0000000..a440bf8 --- /dev/null +++ b/usr/src/libm/src/mvec/vexp_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vexp_.c 1.5 06/01/31 SMI" + +extern void __vexp( int, double *, int, double *, int ); + +#pragma weak vexp_ = __vexp_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vexp_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + __vexp( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vexp */ +void +__vexp_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vexp( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vexp_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vexp( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vexp_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vexpf_.c b/usr/src/libm/src/mvec/vexpf_.c new file mode 100644 index 0000000..1c9f68a --- /dev/null +++ b/usr/src/libm/src/mvec/vexpf_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vexpf_.c 1.4 06/01/31 SMI" + +extern void __vexpf( int, float *, int, float *, int ); + +#pragma weak vexpf_ = __vexpf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vexpf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vexpf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vexpf */ +void +__vexpf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vexpf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vexpf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vexpf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vexpf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vhypot_.c b/usr/src/libm/src/mvec/vhypot_.c new file mode 100644 index 0000000..7d53635 --- /dev/null +++ b/usr/src/libm/src/mvec/vhypot_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vhypot_.c 1.5 06/01/31 SMI" + +extern void __vhypot( int, double *, int, double *, int, double *, int ); + +#pragma weak vhypot_ = __vhypot_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vhypot_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez ) +{ + __vhypot( *n, x, *stridex, y, *stridey, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vhypot */ +void +__vhypot_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vhypot( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy, zp + sz * LowerBound, sz ); +} + +void +__vhypot_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vhypot( *n, x, *stridex, y, *stridey, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vhypot_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vhypotf_.c b/usr/src/libm/src/mvec/vhypotf_.c new file mode 100644 index 0000000..ee352fc --- /dev/null +++ b/usr/src/libm/src/mvec/vhypotf_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vhypotf_.c 1.4 06/01/31 SMI" + +extern void __vhypotf( int, float *, int, float *, int, float *, int ); + +#pragma weak vhypotf_ = __vhypotf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vhypotf_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez ) +{ + __vhypotf( *n, x, *stridex, y, *stridey, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vhypotf */ +void +__vhypotf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vhypotf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy, zp + sz * LowerBound, sz ); +} + +void +__vhypotf_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vhypotf( *n, x, *stridex, y, *stridey, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vhypotf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vis/__vatan.S b/usr/src/libm/src/mvec/vis/__vatan.S new file mode 100644 index 0000000..f531a1a --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vatan.S @@ -0,0 +1,571 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vatan.S 1.8 06/01/23 SMI" + + .file "__vatan.S" + +#include "libm.h" + + RO_DATA + +! following is the C version of the ATAN algorithm +! #include +! #include +! double jkatan(double *x) +! { +! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy; +! int index, sign, intf, intz; +! extern const double __vlibm_TBL_atan1[]; +! long *pf = (long *) &f, *pz = (long *) &z; +! +! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 +! * Error = -3.08254E-18 On the interval |x| < 1/64 */ +! +! /* define dummy names for readability. Use parray to help compiler optimize loads */ +! #define p3 parray[0] +! #define p2 parray[1] +! #define p1 parray[2] +! #define soffset 3 +! +! static const double parray[] = { +! -1.428029046844299722E-01, /* p[3] */ +! 1.999999917247000615E-01, /* p[2] */ +! -3.333333333329292858E-01, /* p[1] */ +! 1.0, /* not used for p[0], though */ +! -1.0, /* used to flip sign of answer */ +! }; +! +! f = *x; /* fetch argument */ +! intf = pf[0]; /* grab upper half */ +! sign = intf & 0x80000000; /* sign of argument */ +! intf ^= sign; /* abs(upper argument) */ +! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */ +! pf[0] = intf; +! +! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ +! { +! if( (intf > 0x7ff00000) || +! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/ +! if( intf < 0x3e300000 ) /* avoid underflow for small arg */ +! { +! dummy = 1.0e37 + f; +! dummy = dummy; +! return (*x); +! } +! if( intf > 0x43600000 ) /* avoid underflow for big arg */ +! { +! index = 2; +! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ +! f = parray[soffset + sign] * f; /* put sign bit on ans */ +! return (f); +! } +! } +! +! index = 0; /* points to 0,0 in table */ +! if (intf > 0x40500000) /* if(|x| > 64 */ +! { f = -1.0/f; +! index = 2; /* point to pi/2 upper, lower */ +! } +! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ +! { +! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ +! pz[0] = intz; /* store as a double (z) */ +! pz[1] = 0; /* ...lower */ +! f = (f - z)/(1.0 + f*z); /* get reduced argument */ +! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ +! index += 4; /* skip over 0,0,pi/2,pi/2 */ +! } +! conup = __vlibm_TBL_atan1[index]; /* upper table */ +! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */ +! tmp = f*f; +! poly = (f*tmp)*((p3*tmp + p2)*tmp + p1); +! ansu = conup + f; /* compute atan(f) upper */ +! ansl = (((conup - ansu) + f) + poly) + conlo; +! ans = ansu + ansl; +! ans = parray[soffset + sign] * ans; +! return ans; +! } + +/* 8 bytes = 1 double f.p. word */ +#define WSIZE 8 + + .align 32 !align with full D-cache line +.COEFFS: + .double 0r-1.428029046844299722E-01 !p[3] + .double 0r1.999999917247000615E-01 !p[2] + .double 0r-3.333333333329292858E-01 !p[1] + .double 0r-1.0, !constant -1.0 + .word 0x00008000,0x0 !for fp rounding of reduced arg + .word 0x7fff0000,0x0 !for fp truncation + .word 0x47900000,0 !a number close to 1.0E37 + .word 0x80000000,0x0 !mask for fp sign bit + .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument + .type .COEFFS,#object + + ENTRY(__vatan) + save %sp,-SA(MINFRAME)-16,%sp + PIC_SETUP(g5) + PIC_SET(g5,__vlibm_TBL_atan1,o4) + PIC_SET(g5,.COEFFS,o0) +/* + __vatan(int n, double *x, int stridex, double *y, stridey) + computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey + are the distance between x and y elements + + %i0 n + %i1 address of x + %i2 stride x + %i3 address of y + %i4 stride y +*/ + cmp %i0,0 !if n <=0, + ble,pn %icc,.RETURN !....then do nothing + sll %i2,3,%i2 !convert stride to byte count + sll %i4,3,%i4 !convert stride to byte count + +/* pre-load constants before beginning main loop */ + + ldd [%o0],%f58 !load p[3] + mov 2,%i5 !argcount = 3 + + ldd [%o0+WSIZE],%f60 !load p[2] + add %fp,STACK_BIAS-8,%l1 !yaddr1 = &dummy + fzero %f18 !ansu1 = 0 + + ldd [%o0+2*WSIZE],%f62 !load p[1] + add %fp,STACK_BIAS-8,%l2 !yaddr2 = &dummy + fzero %f12 !(poly1) = 0 + + ldd [%o0+3*WSIZE],%f56 !-1.0 + fzero %f14 !tmp1 = 0 + + ldd [%o0+4*WSIZE],%f52 !load rounding mask + fzero %f16 !conup1 = 0 + + ldd [%o0+5*WSIZE],%f54 !load truncation mask + fzero %f36 !f1 = 0 + + ldd [%o0+6*WSIZE],%f50 !1.0e37 + fzero %f38 !f2 = 0 + + ldd [%o0+7*WSIZE],%f32 !mask for sign bit + + ldd [%o4+2*WSIZE],%f46 !pi/2 upper + ldd [%o4+(2*WSIZE+8)],%f48 !pi/2 lower + sethi %hi(0x40500000),%l6 !64.0 + sethi %hi(0x3f900000),%l7 !1/64.0 + mov 0,%l4 !index1 = 0 + mov 0,%l5 !index2 = 0 + +.MAINLOOP: + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.LOOP0: + deccc %i0 !--n + bneg 1f + mov %i1,%o5 !xuse = x (delay slot) + + ba 2f + nop !delay slot +1: + PIC_SET(g5,.COEFFS+8*WSIZE,o5) + dec %i5 !argcount-- +2: + sethi %hi(0x80000000),%o7 !mask for sign bit +/*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0 + ld [%o5],%o0 !intf = pf[0] = f upper + ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2] + + sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 +/*4 */ andn %o0,%o7,%o0 !intf = fabs(intf) + ldd [%o5],%f34 !f = *x into f34 + + sub %o1,%o0,%o1 !(-) if intf > big +/*6 */ sub %o0,%o2,%o2 !(-) if intf < small + fand %f34,%f32,%f40 !sign0 = sign bit + fmuld %f38,%f38,%f24 !tmp2= f2*f2 + +/*7 */ orcc %o1,%o2,%g0 !(-) if either true + bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below + fabsd %f34,%f34 !abs(f) (delay slot) + !---------------------- + + + sethi %hi(0x8000),%o7 !rounding bit +/*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again) + faddd %f26,%f38,%f28 !ansu2 = conup2 + f2 + + add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) +/*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) + fmuld %f58,%f24,%f22 !p[3]*tmp2 + +/*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument + fmuld %f34,%f0,%f10 !f*z + fsubd %f34,%f0,%f20 !f - z + add %o4,%l4,%l4 !base addr + index1 + fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1) + faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1 + +/*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 + faddd %f22,%f60,%f22 !p[3]*tmp2 + p[2] + ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1] + +/*13 */ sub %o0,%l7,%o2 !intz - 0x3f900000 + fsubd %f10,%f56,%f10 !(f*z - (-1.0)) + faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1 + + cmp %o0,%l6 !(|f| > 64) + ble .ELSE0 !if(|f| > 64) then +/*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 + mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower + ba .ENDIF0 !continue +/*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot) + .ELSE0: !else f( |x| >= (1/64)) + cmp %o0,%l7 !if intf >= 1/64 + bl .ENDIF0 !if( |x| >= (1/64) ) then... + mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 + add %o3,4,%o1 !index = index + 4 +/*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument + .ENDIF0: + +/*17*/ sll %o1,3,%l3 !index0 = index + mov %i3,%l0 !yaddr0 = address of y + faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1 + fmuld %f22,%f24,%f22 !(p3*tmp2 + p2)*tmp2 + fsubd %f26,%f28,%f26 !conup2 - ansu2 + +/*20*/ add %i1,%i2,%i1 !x += stridex + add %i3,%i4,%i3 !y += stridey + faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1 + fmuld %f38,%f24,%f24 !f*tmp2 + faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1 + +/*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument + std %f36,[%l1] !*yaddr1 = ans1 + add %o4,%l5,%l5 !base addr + index2 + fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1) + faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2 + cmp %i5,0 !if argcount =0, we are done + be .RETURN + nop + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.LOOP1: +/*25*/ deccc %i0 !--n + bneg 1f + mov %i1,%o5 !xuse = x (delay slot) + ba 2f + nop !delay slot +1: + PIC_SET(g5,.COEFFS+8*WSIZE,o5) + dec %i5 !argcount-- +2: + +/*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit + sethi %hi(0x43600000),%o1 !big = 0x43600000,0 + ld [%o5],%o0 !intf = pf[0] = f upper + +/*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 + andn %o0,%o7,%o0 !intf = fabs(intf) + ldd [%o5],%f36 !f = *x into f36 + +/*30*/ sub %o1,%o0,%o1 !(-) if intf > big + sub %o0,%o2,%o2 !(-) if intf < small + fand %f36,%f32,%f42 !sign1 = sign bit + +/*31*/ orcc %o1,%o2,%g0 !(-) if either true + bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below + fabsd %f36,%f36 !abs(f) (delay slot) + !---------------------- + +/*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again) + ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1] + +/*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) + sethi %hi(0x8000),%o7 !rounding bit + faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2 + +/*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) + sethi %hi(0x7fff0000),%o7 !mask for rounding argument + fmuld %f36,%f0,%f10 !f*z + fsubd %f36,%f0,%f20 !f - z + +/*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 + faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2 + +/*37*/ sub %o0,%l7,%o2 !intz - 0x3f900000 + fsubd %f10,%f56,%f10 !(f*z - (-1.0)) + ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0] + + cmp %o0,%l6 !(|f| > 64) + ble .ELSE1 !if(|f| > 64) then +/*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 + mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower + ba .ENDIF1 !continue +/*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot) + .ELSE1: !else f( |x| >= (1/64)) + cmp %o0,%l7 !if intf >= 1/64 + bl .ENDIF1 !if( |x| >= (1/64) ) then... + mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 + add %o3,4,%o1 !index = index + 4 +/*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument + .ENDIF1: + +/*41*/sll %o1,3,%l4 !index1 = index + mov %i3,%l1 !yaddr1 = address of y + fmuld %f34,%f34,%f4 !tmp0= f0*f0 + faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2 + +/*44*/add %i1,%i2,%i1 !x += stridex + add %i3,%i4,%i3 !y += stridey + fmuld %f58,%f4,%f2 !p[3]*tmp0 + faddd %f6,%f34,%f8 !ansu0 = conup0 + f0 + for %f38,%f44,%f38 !sign(ans2) = sign of argument + std %f38,[%l2] !*yaddr2 = ans2 + cmp %i5,0 !if argcount =0, we are done + be .RETURN + nop + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.LOOP2: +/*46*/ deccc %i0 !--n + bneg 1f + mov %i1,%o5 !xuse = x (delay slot) + ba 2f + nop !delay slot +1: + PIC_SET(g5,.COEFFS+8*WSIZE,o5) + dec %i5 !argcount-- +2: + +/*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit + sethi %hi(0x43600000),%o1 !big = 0x43600000,0 + ld [%o5],%o0 !intf = pf[0] = f upper + +/*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0 + andn %o0,%o7,%o0 !intf = fabs(intf) + ldd [%o5],%f38 !f = *x into f38 + +/*51*/ sub %o1,%o0,%o1 !(-) if intf > big + sub %o0,%o2,%o2 !(-) if intf < small + fand %f38,%f32,%f44 !sign2 = sign bit + +/*52*/ orcc %o1,%o2,%g0 !(-) if either true + bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below + fabsd %f38,%f38 !abs(f) (delay slot) + !---------------------- + +/*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again) + faddd %f2,%f60,%f2 !p[3]*tmp0 + p[2] + +/*54*/ sethi %hi(0x8000),%o7 !rounding bit + fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again) + +/*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot) + sethi %hi(0x7fff0000),%o7 !mask for rounding argument + fmuld %f38,%f0,%f10 !f*z + fsubd %f38,%f0,%f20 !f - z + +/*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000 + fmuld %f2,%f4,%f2 !(p3*tmp0 + p2)*tmp0 + fsubd %f6,%f8,%f6 !conup0 - ansu0 + +/*58*/ sub %o0,%l7,%o2 !intz - 0x3f900000 + fsubd %f10,%f56,%f10 !(f*z - (-1.0)) + ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1] + + cmp %o0,%l6 !(|f| > 64) + ble .ELSE2 !if(|f| > 64) then +/*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15 + mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower + ba .ENDIF2 !continue +/*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot) + .ELSE2: !else f( |x| >= (1/64)) + cmp %o0,%l7 !if intf >= 1/64 + bl .ENDIF2 !if( |x| >= (1/64) ) then... + mov 0,%o1 !index == 0 , point to conup,conlo = 0,0 + add %o3,4,%o1 !index = index + 4 +/*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument + .ENDIF2: + + +/*62*/ sll %o1,3,%l5 !index2 = index + mov %i3,%l2 !yaddr2 = address of y + fmuld %f34,%f4,%f4 !f0*tmp0 + faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1 + fmuld %f36,%f36,%f14 !tmp1= f1*f1 + +/*65*/add %o4,%l3,%l3 !base addr + index0 + fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1) + faddd %f6,%f34,%f6 !(conup0 - ansu0) + f0 + fmuld %f58,%f14,%f12 !p[3]*tmp1 + faddd %f16,%f36,%f18 !ansu1 = conup1 + f1 + ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1] + +/*68*/ add %i1,%i2,%i1 !x += stridex + add %i3,%i4,%i3 !y += stridey + faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0 + faddd %f12,%f60,%f12 !p[3]*tmp1 + p[2] + +/*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0 + fmuld %f12,%f14,%f12 !(p3*tmp1 + p2)*tmp1 + fsubd %f16,%f18,%f16 !conup1 - ansu1 + +/*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0 + fmuld %f36,%f14,%f14 !f1*tmp1 + faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1 + +/*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument + std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot) + cmp %i5,0 !if argcount =0, we are done + bg .MAINLOOP + nop + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +.RETURN: + ret + restore %g0,%g0,%g0 + + /*--------------------------------------------------------------------------*/ + /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/ + /*--------------------------------------------------------------------------*/ + +/* at this point + %i1 x address + %o0 intf + %o2 intf - 0x3e300000 + %f34,36,38 f0,f1,f2 + %f40,42,44 sign0,sign1,sign2 +*/ + + .align 32 !align on I-cache boundary +.SPECIAL0: + orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 + bpos 1f !if >=...continue + sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) + ba 3f + faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot) +1: + ld [%o5+4],%o5 !load x lower word + sllx %o0,32,%o0 !left justify intf + sllx %g1,32,%g1 !left justify Inf + or %o0,%o5,%o0 !merge in lower intf + cmp %o0,%g1 !if intf > 0x7ff00000 00000000 + ble,pt %xcc,2f !pass thru if NaN + nop + fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception + ba 3f + nop +2: + faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower +3: + add %i1,%i2,%i1 !x += stridex + for %f34,%f40,%f34 !sign(ans) = sign of argument + std %f34,[%i3] !*y = ans + ba .LOOP0 !keep looping + add %i3,%i4,%i3 !y += stridey (delay slot) + + /*--------------------------------------------------------------------------*/ + /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/ + /*--------------------------------------------------------------------------*/ + + .align 32 !align on I-cache boundary +.SPECIAL1: + orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 + bpos 1f !if >=...continue + sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) + ba 3f + faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot) +1: + ld [%o5+4],%o5 !load x lower word + sllx %o0,32,%o0 !left justify intf + sllx %g1,32,%g1 !left justify Inf + or %o0,%o5,%o0 !merge in lower intf + cmp %o0,%g1 !if intf > 0x7ff00000 00000000 + ble,pt %xcc,2f !pass thru if NaN + nop + fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception + ba 3f + nop +2: + faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower +3: + add %i1,%i2,%i1 !x += stridex + for %f36,%f42,%f36 !sign(ans) = sign of argument + std %f36,[%i3] !*y = ans + ba .LOOP1 !keep looping + add %i3,%i4,%i3 !y += stridey (delay slot) + + /*--------------------------------------------------------------------------*/ + /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/ + /*--------------------------------------------------------------------------*/ + + .align 32 !align on I-cache boundary +.SPECIAL2: + orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000 + bpos 1f !if >=...continue + sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this) + ba 3f + faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot) +1: + ld [%o5+4],%o5 !load x lower word + sllx %o0,32,%o0 !left justify intf + sllx %g1,32,%g1 !left justify Inf + or %o0,%o5,%o0 !merge in lower intf + cmp %o0,%g1 !if intf > 0x7ff00000 00000000 + ble,pt %xcc,2f !pass thru if NaN + nop + fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception + ba 3f + nop +2: + faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower +3: + add %i1,%i2,%i1 !x += stridex + for %f38,%f44,%f38 !sign(ans) = sign of argument + std %f38,[%i3] !*y = ans + ba .LOOP2 !keep looping + add %i3,%i4,%i3 !y += stridey + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + SET_SIZE(__vatan) + +! .ident "03-20-96 Sparc V9 3-way-unrolled version" diff --git a/usr/src/libm/src/mvec/vis/__vatan2.S b/usr/src/libm/src/mvec/vis/__vatan2.S new file mode 100644 index 0000000..a696b07 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vatan2.S @@ -0,0 +1,1077 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vatan2.S 1.5 06/01/23 SMI" + + .file "__vatan2.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x3ff921fb,0x54442d18 ! pio2 + .word 0x3c91a626,0x33145c07 ! pio2_lo + .word 0xbfd55555,0x555554ee ! p1 + .word 0x3fc99999,0x997a1559 ! p2 + .word 0xbfc24923,0x158dfe02 ! p3 + .word 0x3fbc639d,0x0ed1347b ! p4 + .word 0xffffffff,0x00000000 ! mask + .word 0x3fc00000,0x00000000 ! twom3 + .word 0x46d00000,0x00000000 ! two110 + .word 0x3fe921fb,0x54442d18 ! pio4 + +! local storage indices + +#define xscl STACK_BIAS-0x8 +#define yscl STACK_BIAS-0x10 +#define twom3 STACK_BIAS-0x18 +#define two110 STACK_BIAS-0x20 +#define pio4 STACK_BIAS-0x28 +#define junk STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 y +! i2 stridey +! i3 x +! i4 stridex +! i5 z + +! l0 k0 +! l1 k1 +! l2 k2 +! l3 hx +! l4 pz0 +! l5 pz1 +! l6 pz2 +! l7 stridez + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_atan2 +! g5 + +! o0 hy +! o1 0x00004000 +! o2 0x1420 +! o3 0x7fe00000 +! o4 0x03600000 +! o5 0x00100000 +! o7 + +! f0 y0 +! f2 x0 +! f4 t0 +! f6 ah0 +! f8 al0 +! f10 y1 +! f12 x1 +! f14 t1 +! f16 ah1 +! f18 al1 +! f20 y2 +! f22 x2 +! f24 t2 +! f26 ah2 +! f28 al2 +! f30 +! f32 +! f34 +! f36 sx0 +! f38 sx1 +! f40 sx2 +! f42 sy0 +! f44 sy1 +! f46 sy2 + +#define mask %f48 +#define signbit %f50 +#define pio2 %f52 +#define pio2_lo %f54 +#define p1 %f56 +#define p2 %f58 +#define p3 %f60 +#define p4 %f62 + + ENTRY(__vatan2) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_atan2,o1) + wr %g0,0x82,%asi ! set %asi for non-faulting loads + mov %o1, %g1 +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+0xb0],%l7 +#else + ld [%fp+0x5c],%l7 +#endif + ldd [%o0+0x00],pio2 ! load/set up constants + ldd [%o0+0x08],pio2_lo + ldd [%o0+0x10],p1 + ldd [%o0+0x18],p2 + ldd [%o0+0x20],p3 + ldd [%o0+0x28],p4 + ldd [%o0+0x30],mask + fzero signbit + fnegd signbit,signbit + sethi %hi(0x00004000),%o1 + sethi %hi(0x1420),%o2 + or %o2,%lo(0x1420),%o2 + sethi %hi(0x7fe00000),%o3 + sethi %hi(0x03600000),%o4 + sethi %hi(0x00100000),%o5 + ldd [%o0+0x38],%f0 ! copy rarely used constants to stack + ldd [%o0+0x40],%f2 + ldd [%o0+0x48],%f4 + std %f0,[%fp+twom3] + std %f2,[%fp+two110] + std %f4,[%fp+pio4] + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + sll %l7,3,%l7 + fzero %f20 ! loop prologue + fzero %f22 + fzero %f24 + fzero %f26 + fzero %f46 + add %fp,junk,%l6 + ld [%i1],%f0 ! *y + ld [%i1+4],%f1 + ld [%i3],%f8 ! *x + ld [%i3+4],%f9 + ld [%i1],%o0 ! hy + ba .loop + ld [%i3],%l3 ! hx + +! 16-byte aligned + .align 16 +.loop: + fabsd %f0,%f4 + mov %i5,%l4 + add %i1,%i2,%i1 ! y += stridey + + fabsd %f8,%f2 + add %i3,%i4,%i3 ! x += stridex + add %i5,%l7,%i5 ! z += stridez + + fand %f0,signbit,%f42 + sethi %hi(0x80000000),%g5 + + fand %f8,signbit,%f36 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + + fcmpd %fcc0,%f4,%f2 + + fmovd %f4,%f0 + + fmovdg %fcc0,%f2,%f0 ! swap if |y| > |x| + + fmovdg %fcc0,%f4,%f2 + mov %o0,%o7 + lda [%i1]%asi,%f10 ! preload next argument + + faddd %f26,%f20,%f26 + lda [%i1+4]%asi,%f11 + + faddd %f22,%f24,%f22 + movg %fcc0,%l3,%o0 + + movg %fcc0,%o7,%l3 + + fbu,pn %fcc0,.nan0 ! if x or y is nan +! delay slot + lda [%i3]%asi,%f18 + + sub %l3,%o0,%l0 ! hx - hy + sub %l3,%o3,%g5 + fabsd %f10,%f14 + lda [%i3+4]%asi,%f19 + + sub %l0,%o4,%o7 + faddd %f22,%f26,%f26 + + andcc %g5,%o7,%g0 + bge,pn %icc,.big0 ! if |x| or |x/y| is big +! delay slot + nop + + fabsd %f18,%f12 + cmp %o0,%o5 + bl,pn %icc,.small0 ! if |y| is small +! delay slot + lda [%i1]%asi,%o0 + + add %l0,%o1,%l0 ! k + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + lda [%i3]%asi,%l3 + +.cont1: + srl %l0,10,%l0 + mov %i5,%l5 + fxor %f26,%f46,%f26 + st %f26,[%l6] + + fand %f10,signbit,%f44 + andn %l0,0x1f,%l0 + add %i1,%i2,%i1 + st %f27,[%l6+4] + + fand %f18,signbit,%f38 + cmp %l0,%o2 + movg %icc,%o2,%l0 + + fcmpd %fcc1,%f14,%f12 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + + fmovd %f14,%f10 + add %l0,%g1,%l0 + sethi %hi(0x80000000),%g5 + + ldd [%l0+0x10],%f4 + fand %f2,mask,%f6 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + + fmovdg %fcc1,%f12,%f10 + + fmovdg %fcc1,%f14,%f12 + mov %o0,%o7 + lda [%i1]%asi,%f20 + + fsubd %f2,%f6,%f30 + fmuld %f6,%f4,%f6 + movg %fcc1,%l3,%o0 + + fmuld %f0,%f4,%f8 + movg %fcc1,%o7,%l3 + + lda [%i1+4]%asi,%f21 + fbu,pn %fcc1,.nan1 +! delay slot + nop + + lda [%i3]%asi,%f28 + sub %l3,%o0,%l1 + sub %l3,%o3,%g5 + + lda [%i3+4]%asi,%f29 + fmuld %f30,%f4,%f30 + fsubd %f0,%f6,%f4 + sub %l1,%o4,%o7 + + fabsd %f20,%f24 + andcc %g5,%o7,%g0 + bge,pn %icc,.big1 +! delay slot + nop + + faddd %f2,%f8,%f8 + cmp %o0,%o5 + bl,pn %icc,.small1 +! delay slot + lda [%i1]%asi,%o0 + + fabsd %f28,%f22 + add %l1,%o1,%l1 + addcc %i0,-1,%i0 + lda [%i3]%asi,%l3 + + fsubd %f4,%f30,%f4 + srl %l1,10,%l1 + ble,pn %icc,.last2 +! delay slot + mov %i5,%l6 + +.cont2: + fand %f20,signbit,%f46 + andn %l1,0x1f,%l1 + add %i1,%i2,%i1 + + fand %f28,signbit,%f40 + cmp %l1,%o2 + movg %icc,%o2,%l1 + + fcmpd %fcc2,%f24,%f22 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + + fdivd %f4,%f8,%f4 + fmovd %f24,%f20 + add %l1,%g1,%l1 + sethi %hi(0x80000000),%g5 + + ldd [%l1+0x10],%f14 + fand %f12,mask,%f16 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + + fmovdg %fcc2,%f22,%f20 + + fmovdg %fcc2,%f24,%f22 + mov %o0,%o7 + + fsubd %f12,%f16,%f32 + fmuld %f16,%f14,%f16 + movg %fcc2,%l3,%o0 + + fnegd pio2_lo,%f8 ! al + fmuld %f10,%f14,%f18 + movg %fcc2,%o7,%l3 + + fzero %f0 + fbu,pn %fcc2,.nan2 +! delay slot + nop + + fmovdg %fcc0,signbit,%f0 + sub %l3,%o0,%l2 + sub %l3,%o3,%g5 + + fmuld %f32,%f14,%f32 + fsubd %f10,%f16,%f14 + sub %l2,%o4,%o7 + + faddd %f12,%f18,%f18 + andcc %g5,%o7,%g0 + bge,pn %icc,.big2 +! delay slot + nop + + fxor %f36,%f0,%f36 + cmp %o0,%o5 + bl,pn %icc,.small2 +! delay slot + nop + +.cont3: + fmovdg %fcc0,signbit,%f8 + add %l2,%o1,%l2 + + fsubd %f14,%f32,%f14 + srl %l2,10,%l2 + + fxor %f36,pio2_lo,%f30 ! al + andn %l2,0x1f,%l2 + + fxor %f36,pio2,%f0 ! ah + cmp %l2,%o2 + movg %icc,%o2,%l2 + + fxor %f42,%f36,%f42 ! sy + + faddd %f8,%f30,%f8 + ldd [%l0+0x8],%f30 + add %l2,%g1,%l2 + + fdivd %f14,%f18,%f14 + fzero %f10 + + ldd [%l2+0x10],%f24 + fand %f22,mask,%f26 + + fmovdg %fcc1,signbit,%f10 + + fmuld %f4,%f4,%f36 + faddd %f8,%f30,%f8 + + fsubd %f22,%f26,%f34 + fmuld %f26,%f24,%f26 + + fmuld %f20,%f24,%f28 + fxor %f38,%f10,%f38 + + fmuld %f4,p3,%f6 + fnegd pio2_lo,%f18 + + fmuld %f36,p2,%f2 + fmovdg %fcc1,signbit,%f18 + + fmuld %f36,%f4,%f36 + fxor %f38,pio2,%f10 + + fmuld %f34,%f24,%f34 + fsubd %f20,%f26,%f24 + + faddd %f22,%f28,%f28 + + faddd %f2,p1,%f2 + + fmuld %f36,p4,%f30 + fxor %f38,pio2_lo,%f32 + + fsubd %f24,%f34,%f24 + + fxor %f44,%f38,%f44 + + fmuld %f36,%f2,%f2 + faddd %f18,%f32,%f18 + ldd [%l1+0x8],%f32 + + fmuld %f36,%f36,%f36 + faddd %f6,%f30,%f30 + + fdivd %f24,%f28,%f24 + fzero %f20 + + fmovdg %fcc2,signbit,%f20 + + faddd %f2,%f8,%f2 + + fmuld %f14,%f14,%f38 + faddd %f18,%f32,%f18 + + fmuld %f36,%f30,%f36 + fxor %f40,%f20,%f40 + + fnegd pio2,%f6 ! ah + fmuld %f14,p3,%f16 + + fmovdg %fcc0,signbit,%f6 + + fmuld %f38,p2,%f12 + fnegd pio2_lo,%f28 + + faddd %f2,%f36,%f2 + fmuld %f38,%f14,%f38 + + faddd %f6,%f0,%f6 + ldd [%l0],%f0 + + fmovdg %fcc2,signbit,%f28 + + faddd %f12,p1,%f12 + + fmuld %f38,p4,%f32 + fxor %f40,pio2_lo,%f34 + + fxor %f40,pio2,%f20 + + faddd %f2,%f4,%f2 + + fmuld %f38,%f12,%f12 + fxor %f46,%f40,%f46 + + fmuld %f38,%f38,%f38 + faddd %f16,%f32,%f32 + + faddd %f28,%f34,%f28 + ldd [%l2+0x8],%f34 + + faddd %f6,%f0,%f6 + lda [%i1]%asi,%f0 ! preload next argument + + faddd %f12,%f18,%f12 + lda [%i1+4]%asi,%f1 + + fmuld %f24,%f24,%f40 + lda [%i3]%asi,%f8 + + fmuld %f38,%f32,%f38 + faddd %f28,%f34,%f28 + lda [%i3+4]%asi,%f9 + + fnegd pio2,%f16 + fmuld %f24,p3,%f26 + lda [%i1]%asi,%o0 + + fmovdg %fcc1,signbit,%f16 + lda [%i3]%asi,%l3 + + fmuld %f40,p2,%f22 + + faddd %f12,%f38,%f12 + fmuld %f40,%f24,%f40 + + faddd %f2,%f6,%f6 + + faddd %f16,%f10,%f16 + ldd [%l1],%f10 + + faddd %f22,p1,%f22 + + faddd %f12,%f14,%f12 + fmuld %f40,p4,%f34 + + fxor %f6,%f42,%f6 + st %f6,[%l4] + + faddd %f16,%f10,%f16 + st %f7,[%l4+4] + + fmuld %f40,%f22,%f22 + + fmuld %f40,%f40,%f40 + faddd %f26,%f34,%f34 + + fnegd pio2,%f26 + + faddd %f12,%f16,%f16 + + faddd %f22,%f28,%f22 + + fmuld %f40,%f34,%f40 + fmovdg %fcc2,signbit,%f26 + +! - + + fxor %f16,%f44,%f16 + st %f16,[%l5] + + faddd %f26,%f20,%f26 + st %f17,[%l5+4] + addcc %i0,-1,%i0 + + faddd %f22,%f40,%f22 + bg,pt %icc,.loop +! delay slot + ldd [%l2],%f20 + + + faddd %f26,%f20,%f26 + faddd %f22,%f24,%f22 + faddd %f22,%f26,%f26 +.done_from_special0: + fxor %f26,%f46,%f26 + st %f26,[%l6] + st %f27,[%l6+4] + ret + restore + + + + .align 16 +.last1: + fmovd pio2,%f10 ! set up dummy arguments + fmovd pio2,%f18 + fabsd %f10,%f14 + fabsd %f18,%f12 + sethi %hi(0x3ff921fb),%o0 + or %o0,%lo(0x3ff921fb),%o0 + mov %o0,%l3 + ba,pt %icc,.cont1 +! delay slot + add %fp,junk,%i5 + + + + .align 16 +.last2: + fmovd pio2,%f20 + fmovd pio2,%f28 + fabsd %f20,%f24 + fabsd %f28,%f22 + sethi %hi(0x3ff921fb),%o0 + or %o0,%lo(0x3ff921fb),%o0 + mov %o0,%l3 + ba,pt %icc,.cont2 +! delay slot + add %fp,junk,%l6 + + + + .align 16 +.nan0: + faddd %f22,%f26,%f26 +.nan0_from_special0: + fabsd %f10,%f14 + lda [%i3+4]%asi,%f19 + fabsd %f18,%f12 + lda [%i1]%asi,%o0 + lda [%i3]%asi,%l3 + ba,pt %icc,.special0 +! delay slot + fmuld %f0,%f2,%f6 + + + .align 16 +.big0: + fabsd %f18,%f12 + lda [%i1]%asi,%o0 + lda [%i3]%asi,%l3 + cmp %g5,%o5 + bge,pn %icc,.return_ah0 ! if hx >= 0x7ff00000 +! delay slot + nop + cmp %l0,%o4 + bge,pn %icc,1f ! if hx - hy >= 0x03600000 +! delay slot + nop + ldd [%fp+twom3],%f6 + fmuld %f0,%f6,%f0 + fmuld %f2,%f6,%f2 + add %l0,%o1,%l0 + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + nop + ba,pt %icc,.cont1 +! delay slot + nop +1: + fbg,pn %fcc0,.return_ah0 +! delay slot + nop + fcmpd %fcc3,%f8,signbit + fbl,pn %fcc3,.return_ah0 +! delay slot + nop + ba,pt %icc,.special0 +! delay slot + fdivd %f0,%f2,%f6 + + + .align 16 +.small0: + lda [%i3]%asi,%l3 + fcmpd %fcc3,%f0,signbit + fbe,pt %fcc3,.return_ah0 +! delay slot + nop + ldd [%fp+two110],%f6 + fmuld %f0,%f6,%f0 + fmuld %f2,%f6,%f2 + st %f0,[%fp+yscl] + ld [%fp+yscl],%o7 + st %f2,[%fp+xscl] + ld [%fp+xscl],%l0 + sub %l0,%o7,%l0 + add %l0,%o1,%l0 + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + nop + ba,pt %icc,.cont1 +! delay slot + nop + + + .align 16 +.return_ah0: + fzero %f0 + fmovdg %fcc0,signbit,%f0 + fxor %f36,%f0,%f36 + fxor %f36,pio2,%f0 + fxor %f42,%f36,%f42 + fnegd pio2,%f6 + fmovdg %fcc0,signbit,%f6 + faddd %f6,%f0,%f6 + sub %g5,%l0,%o7 + cmp %o7,%o5 + bl,pt %icc,1f ! if hy < 0x7ff00000 +! delay slot + nop + ldd [%fp+pio4],%f0 + faddd %f6,%f0,%f6 +1: + fdtoi %f6,%f4 +.special0: + fxor %f6,%f42,%f6 + st %f6,[%l4] + st %f7,[%l4+4] + addcc %i0,-1,%i0 + ble,pn %icc,.done_from_special0 +! delay slot + nop + fmovd %f10,%f0 + fmovd %f18,%f8 + fmovd %f14,%f4 + fmovd %f12,%f2 + mov %i5,%l4 + add %i1,%i2,%i1 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + fand %f0,signbit,%f42 + sethi %hi(0x80000000),%g5 + fand %f8,signbit,%f36 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + fcmpd %fcc0,%f4,%f2 + fmovd %f4,%f0 + fmovdg %fcc0,%f2,%f0 + fmovdg %fcc0,%f4,%f2 + mov %o0,%o7 + movg %fcc0,%l3,%o0 + movg %fcc0,%o7,%l3 + lda [%i1]%asi,%f10 + lda [%i1+4]%asi,%f11 + fbu,pn %fcc0,.nan0_from_special0 +! delay slot + lda [%i3]%asi,%f18 + fabsd %f10,%f14 + lda [%i3+4]%asi,%f19 + sub %l3,%o0,%l0 + sub %l3,%o3,%g5 + sub %l0,%o4,%o7 + andcc %g5,%o7,%g0 + bge,pn %icc,.big0 +! delay slot + nop + fabsd %f18,%f12 + cmp %o0,%o5 + bl,pn %icc,.small0 +! delay slot + lda [%i1]%asi,%o0 + add %l0,%o1,%l0 + addcc %i0,-1,%i0 + ble,pn %icc,.last1 +! delay slot + lda [%i3]%asi,%l3 + ba,pt %icc,.cont1 +! delay slot + nop + + + + .align 16 +.nan1: + fmuld %f30,%f4,%f30 + fsubd %f0,%f6,%f4 + faddd %f2,%f8,%f8 + fsubd %f4,%f30,%f4 +.nan1_from_special1: + lda [%i3]%asi,%f28 + lda [%i3+4]%asi,%f29 + fabsd %f20,%f24 + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + ba,pt %icc,.special1 +! delay slot + fmuld %f10,%f12,%f16 + + + .align 16 +.big1: + faddd %f2,%f8,%f8 + fsubd %f4,%f30,%f4 +.big1_from_special1: + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + cmp %g5,%o5 + bge,pn %icc,.return_ah1 +! delay slot + nop + cmp %l1,%o4 + bge,pn %icc,1f +! delay slot + nop + ldd [%fp+twom3],%f16 + fmuld %f10,%f16,%f10 + fmuld %f12,%f16,%f12 + add %l1,%o1,%l1 + srl %l1,10,%l1 + addcc %i0,-1,%i0 + ble,pn %icc,.last2 +! delay slot + nop + ba,pt %icc,.cont2 +! delay slot + nop +1: + fbg,pn %fcc1,.return_ah1 +! delay slot + nop + fcmpd %fcc3,%f18,signbit + fbl,pn %fcc3,.return_ah1 +! delay slot + nop + ba,pt %icc,.special1 +! delay slot + fdivd %f10,%f12,%f16 + + + .align 16 +.small1: + fsubd %f4,%f30,%f4 +.small1_from_special1: + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + fcmpd %fcc3,%f10,signbit + fbe,pt %fcc3,.return_ah1 +! delay slot + nop + ldd [%fp+two110],%f16 + fmuld %f10,%f16,%f10 + fmuld %f12,%f16,%f12 + st %f10,[%fp+yscl] + ld [%fp+yscl],%o7 + st %f12,[%fp+xscl] + ld [%fp+xscl],%l1 + sub %l1,%o7,%l1 + add %l1,%o1,%l1 + srl %l1,10,%l1 + addcc %i0,-1,%i0 + ble,pn %icc,.last2 +! delay slot + nop + ba,pt %icc,.cont2 +! delay slot + nop + + + .align 16 +.return_ah1: + fzero %f10 + fmovdg %fcc1,signbit,%f10 + fxor %f38,%f10,%f38 + fxor %f38,pio2,%f10 + fxor %f44,%f38,%f44 + fnegd pio2,%f16 + fmovdg %fcc1,signbit,%f16 + faddd %f16,%f10,%f16 + sub %g5,%l1,%o7 + cmp %o7,%o5 + bl,pt %icc,1f +! delay slot + nop + ldd [%fp+pio4],%f10 + faddd %f16,%f10,%f16 +1: + fdtoi %f16,%f14 +.special1: + fxor %f16,%f44,%f16 + st %f16,[%l5] + st %f17,[%l5+4] + addcc %i0,-1,%i0 + bg,pn %icc,1f +! delay slot + nop + fmovd pio2,%f20 ! set up dummy argument + fmovd pio2,%f28 + fabsd %f20,%f24 + fabsd %f28,%f22 + sethi %hi(0x3ff921fb),%o0 + or %o0,%lo(0x3ff921fb),%o0 + mov %o0,%l3 + add %fp,junk,%i5 +1: + fmovd %f20,%f10 + fmovd %f28,%f18 + fmovd %f24,%f14 + fmovd %f22,%f12 + mov %i5,%l5 + add %i1,%i2,%i1 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + fand %f10,signbit,%f44 + sethi %hi(0x80000000),%g5 + fand %f18,signbit,%f38 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + fcmpd %fcc1,%f14,%f12 + fmovd %f14,%f10 + fmovdg %fcc1,%f12,%f10 + fmovdg %fcc1,%f14,%f12 + mov %o0,%o7 + movg %fcc1,%l3,%o0 + movg %fcc1,%o7,%l3 + lda [%i1]%asi,%f20 + lda [%i1+4]%asi,%f21 + fbu,pn %fcc1,.nan1_from_special1 +! delay slot + nop + lda [%i3]%asi,%f28 + lda [%i3+4]%asi,%f29 + fabsd %f20,%f24 + sub %l3,%o0,%l1 + sub %l3,%o3,%g5 + sub %l1,%o4,%o7 + andcc %g5,%o7,%g0 + bge,pn %icc,.big1_from_special1 +! delay slot + nop + cmp %o0,%o5 + bl,pn %icc,.small1_from_special1 +! delay slot + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + add %l1,%o1,%l1 + srl %l1,10,%l1 + addcc %i0,-1,%i0 + ble,pn %icc,.last2 +! delay slot + mov %i5,%l6 + ba,pt %icc,.cont2 +! delay slot + nop + + + + .align 16 +.nan2: + fmovdg %fcc0,signbit,%f0 + fmuld %f32,%f14,%f32 + fsubd %f10,%f16,%f14 + faddd %f12,%f18,%f18 + fxor %f36,%f0,%f36 +.nan2_from_special2: + ba,pt %icc,.special2 +! delay slot + fmuld %f20,%f22,%f26 + + + .align 16 +.big2: + fxor %f36,%f0,%f36 +.big2_from_special2: + cmp %g5,%o5 + bge,pn %icc,.return_ah2 +! delay slot + nop + cmp %l2,%o4 + bge,pn %icc,1f +! delay slot + nop + ldd [%fp+twom3],%f26 + fmuld %f20,%f26,%f20 + fmuld %f22,%f26,%f22 + ba,pt %icc,.cont3 +! delay slot + nop +1: + fbg,pn %fcc2,.return_ah2 +! delay slot + nop + fcmpd %fcc3,%f28,signbit + fbl,pn %fcc3,.return_ah2 +! delay slot + nop + ba,pt %icc,.special2 +! delay slot + fdivd %f20,%f22,%f26 + + + .align 16 +.small2: + fcmpd %fcc3,%f20,signbit + fbe,pt %fcc3,.return_ah2 +! delay slot + nop + ldd [%fp+two110],%f26 + fmuld %f20,%f26,%f20 + fmuld %f22,%f26,%f22 + st %f20,[%fp+yscl] + ld [%fp+yscl],%o7 + st %f22,[%fp+xscl] + ld [%fp+xscl],%l2 + sub %l2,%o7,%l2 + ba,pt %icc,.cont3 +! delay slot + nop + + + .align 16 +.return_ah2: + fzero %f20 + fmovdg %fcc2,signbit,%f20 + fxor %f40,%f20,%f40 + fxor %f40,pio2,%f20 + fxor %f46,%f40,%f46 + fnegd pio2,%f26 + fmovdg %fcc2,signbit,%f26 + faddd %f26,%f20,%f26 + sub %g5,%l2,%o7 + cmp %o7,%o5 + bl,pt %icc,1f +! delay slot + nop + ldd [%fp+pio4],%f20 + faddd %f26,%f20,%f26 +1: + fdtoi %f26,%f24 +.special2: + fxor %f26,%f46,%f26 + st %f26,[%l6] + st %f27,[%l6+4] + addcc %i0,-1,%i0 + bg,pn %icc,1f +! delay slot + nop + fmovd pio2,%f20 ! set up dummy argument + fmovd pio2,%f22 + fzero %f40 + fzero %f46 + mov 0,%l2 + ba,pt %icc,.cont3 +! delay slot + add %fp,junk,%l6 +1: + lda [%i1]%asi,%f20 + lda [%i1+4]%asi,%f21 + lda [%i3]%asi,%f28 + lda [%i3+4]%asi,%f29 + fabsd %f20,%f24 + lda [%i1]%asi,%o0 + fabsd %f28,%f22 + lda [%i3]%asi,%l3 + mov %i5,%l6 + fand %f20,signbit,%f46 + add %i1,%i2,%i1 + fand %f28,signbit,%f40 + fcmpd %fcc2,%f24,%f22 + add %i3,%i4,%i3 + add %i5,%l7,%i5 + fmovd %f24,%f20 + sethi %hi(0x80000000),%g5 + andn %o0,%g5,%o0 + andn %l3,%g5,%l3 + fmovdg %fcc2,%f22,%f20 + fmovdg %fcc2,%f24,%f22 + mov %o0,%o7 + movg %fcc2,%l3,%o0 + movg %fcc2,%o7,%l3 + fbu,pn %fcc2,.nan2_from_special2 +! delay slot + nop + sub %l3,%o0,%l2 + sub %l3,%o3,%g5 + sub %l2,%o4,%o7 + andcc %g5,%o7,%g0 + bge,pn %icc,.big2_from_special2 +! delay slot + nop + cmp %o0,%o5 + bl,pn %icc,.small2 +! delay slot + nop + ba,pt %icc,.cont3 +! delay slot + nop + + SET_SIZE(__vatan2) + diff --git a/usr/src/libm/src/mvec/vis/__vatan2f.S b/usr/src/libm/src/mvec/vis/__vatan2f.S new file mode 100644 index 0000000..2451611 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vatan2f.S @@ -0,0 +1,3378 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vatan2f.S 1.6 06/01/23 SMI" + + .file "__vatan2f.S" + +#include "libm.h" + + RO_DATA + .align 64 +.CONST_TBL: + .word 0xbff921fb, 0x54442d18 ! -M_PI_2 + .word 0x3ff921fb, 0x54442d18 ! M_PI_2 + .word 0xbff921fb, 0x54442d18 ! -M_PI_2 + .word 0x3ff921fb, 0x54442d18 ! M_PI_2 + .word 0xc00921fb, 0x54442d18 ! -M_PI + .word 0x400921fb, 0x54442d18 ! M_PI + .word 0x80000000, 0x00000000 ! -0.0 + .word 0x00000000, 0x00000000 ! 0.0 + + .word 0xbff00000, 0x00000000 ! -1.0 + .word 0x3ff00000, 0x00000000 ! 1.0 + + .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01 + .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01 + .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01 + .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01 + .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01 + .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02 + .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02 + .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02 + .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02 + .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03 + + .word 0x3fe921fb, 0x54442d18 ! M_PI_4 + .word 0x36a00000, 0x00000000 ! 2^(-149) + +#define counter %o3 +#define stridex %i4 +#define stridey %i5 +#define stridez %l1 +#define cmul_arr %i0 +#define cadd_arr %i2 +#define _0x7fffffff %l0 +#define _0x7f800000 %l2 + +#define K0 %f42 +#define K1 %f44 +#define K2 %f46 +#define K3 %f48 +#define K4 %f50 +#define K5 %f52 +#define K6 %f54 +#define K7 %f56 +#define K8 %f58 +#define K9 %f60 + +#define tmp_counter STACK_BIAS-32 +#define tmp_py STACK_BIAS-24 +#define tmp_px STACK_BIAS-16 +#define tmp_pz STACK_BIAS-8 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!-------------------------------------------------------------------- +! !!!!! vatan2f algorithm !!!!! +! uy0 = *(int*)py; +! ux0 = *(int*)px; +! ay0 = uy0 & 0x7fffffff; +! ax0 = ux0 & 0x7fffffff; +! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) +! { +! /* |X| or |Y| = Nan */ +! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 ) +! { +! ftmp0 = *(float*)&ax0 * *(float*)&ay0; +! *pz = ftmp0; +! } +! signx0 = (unsigned)ux0 >> 30; +! signx0 &= 2; +! signy0 = uy0 >> 31; +! if (ay0 == 0x7f800000) +! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2; +! else +! signx0 += signx0; +! res = signx0 * M_PI_4; +! signy0 <<= 3; +! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); +! res *= dtmp0; +! ftmp0 = (float) res; +! *pz = ftmp0; +! goto next; +! } +! if ( ax0 == 0 && ay0 == 0 ) +! { +! signy0 = uy0 >> 28; +! signx0 = ux0 >> 27; +! ldiff0 = ax0 - ay0; +! ldiff0 >>= 31; +! signx0 &= -16; +! signy0 &= -8; +! ldiff0 <<= 5; +! signx0 += signy0; +! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0); +! ftmp0 = (float) res; +! *pz = ftmp0; +! goto next; +! } +! ldiff0 = ax0 - ay0; +! ldiff0 >>= 31; +! addrc0 = (char*)px - (char*)py; +! addrc0 &= ldiff0; +! fy0 = *(float*)((char*)py + addrc0); +! fx0 = *(float*)((char*)px - addrc0); +! itmp0 = *(int*)&fy0; +! if((itmp0 & 0x7fffffff) < 0x00800000) +! { +! itmp0 >>= 28; +! itmp0 &= -8; +! fy0 = fabsf(fy0); +! dtmp0 = (double) *(int*)&fy0; +! dtmp0 *= C2ONM149; +! dsign = *(double*)((char*)cmul_arr + itmp0); +! dtmp0 *= dsign; +! y0 = dtm0; +! } +! else +! y0 = (double)fy0; +! itmp0 = *(int*)&fx0; +! if((itmp0 & 0x7fffffff) < 0x00800000) +! { +! itmp0 >>= 28; +! itmp0 &= -8; +! fx0 = fabsf(fx0); +! dtmp0 = (double) *(int*)&fx0; +! dtmp0 *= C2ONM149; +! dsign = *(double*)((char*)cmul_arr + itmp0); +! dtmp0 *= dsign; +! x0 = dtmp0; +! } +! else +! x0 = (double)fx0; +! px += stridex; +! py += stridey; +! x0 = y0 / x0; +! x20 = x0 * x0; +! dtmp0 = K9 * x20; +! dtmp0 += K8; +! dtmp0 *= x20; +! dtmp0 += K7; +! dtmp0 *= x20; +! dtmp0 += K6; +! dtmp0 *= x20; +! dtmp0 += K5; +! dtmp0 *= x20; +! dtmp0 += K4; +! dtmp0 *= x20; +! dtmp0 += K3; +! dtmp0 *= x20; +! dtmp0 += K2; +! dtmp0 *= x20; +! dtmp0 += K1; +! dtmp0 *= x20; +! dtmp0 += K0; +! x0 = dtmp0 * x0; +! signy0 = uy0 >> 28; +! signy0 &= -8; +! signx0 = ux0 >> 27; +! signx0 &= -16; +! ltmp0 = ldiff0 << 5; +! ltmp0 += (char*)cadd_arr; +! ltmp0 += signx0; +! cadd0 = *(double*)(ltmp0 + signy0); +! cmul0_ind = ldiff0 << 3; +! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); +! dtmp0 = cmul0 * x0; +! dtmp0 = cadd0 + dtmp0; +! ftmp0 = (float)dtmp0; +! *pz = ftmp0; +! pz += stridez; +! +!-------------------------------------------------------------------- + + ENTRY(__vatan2f) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],%l7 +#else + ld [%fp+STACK_BIAS+92],%l7 +#endif + + st %i0,[%fp+tmp_counter] + sethi %hi(0x7ffffc00),_0x7fffffff + add _0x7fffffff,1023,_0x7fffffff + or %g0,%i2,%o2 + sll %l7,2,stridez + + sethi %hi(0x7f800000),_0x7f800000 + mov %g5,%g1 + + or %g0,stridey,%o4 + add %g1,56,cadd_arr + + sll %o2,2,stridey + add %g1,72,cmul_arr + + ldd [%g1+80],K0 + ldd [%g1+80+8],K1 + ldd [%g1+80+16],K2 + ldd [%g1+80+24],K3 + ldd [%g1+80+32],K4 + ldd [%g1+80+40],K5 + ldd [%g1+80+48],K6 + ldd [%g1+80+56],K7 + ldd [%g1+80+64],K8 + ldd [%g1+80+72],K9 + + sll stridex,2,stridex + + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_py],%i1 + ldx [%fp+tmp_px],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + subcc counter,1,counter + bneg,pn %icc,.exit + nop + + lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; + + lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; + + and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; + + cmp %l7,_0x7f800000 + bge,pn %icc,.spec0 + and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; + + cmp %l6,_0x7f800000 + bge,pn %icc,.spec0 + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.spec1 + sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; + + cmp %l7,%o5 + bl,pn %icc,.spec1 + nop + + stx %o4,[%fp+tmp_pz] + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i1,stridey,%i1 ! py += stridey + + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; + +.spec1_cont: + lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (0_0) signx0 &= -16; + + and %o4,-8,%o4 ! (0_0) signy0 &= -8; + + fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; + + add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; + + and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.u0 + and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; +.c0: + cmp %g1,%o5 + bl,pn %icc,.u1 + ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); +.c1: + cmp %l6,_0x7f800000 + bge,pn %icc,.u2 + sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; +.c2: + cmp %g1,_0x7f800000 + bge,pn %icc,.u3 + nop +.c3: + sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; + + and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; + + lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; + + lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); + sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; + + cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 + bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 ) + nop +.cont0: + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (1_0) y0 = (double)fy0; + + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (1_0) x0 = (double)fx0; +.d0: + and %o5,-16,%o5 ! (1_0) signx0 &= -16; + and %o4,-8,%o4 ! (1_0) signy0 &= -8; + + lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; + + lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; + fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; + + fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; + + add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; + + and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.u4 + and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; +.c4: + cmp %g5,%o5 + bl,pn %icc,.u5 + fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; +.c5: + cmp %l6,_0x7f800000 + bge,pn %icc,.u6 + ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); +.c6: + cmp %g5,_0x7f800000 + bge,pn %icc,.u7 + sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; +.c7: + sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; + + faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; + and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; + + lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; + + lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); + + cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 + bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 ) + nop +.cont1: + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (2_0) y0 = (double)fy0; + + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + fstod %f2,%f2 ! (2_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; +.d1: + lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; + and %o5,-16,%o5 ! (2_0) signx0 &= -16; + faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; + + lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; + + fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; + + fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; + + add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; + and %o4,-8,%o4 ! (2_0) signy0 &= -8; + fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + + cmp %l6,%o5 + bl,pn %icc,.u8 + and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; +.c8: + cmp %o0,%o5 + bl,pn %icc,.u9 + fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; +.c9: + cmp %l6,_0x7f800000 + bge,pn %icc,.u10 + faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; +.c10: + cmp %o0,_0x7f800000 + bge,pn %icc,.u11 + ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); +.c11: + sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; + + sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; + + faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; + and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; + fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; + + lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); + + cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 + bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 ) + nop +.cont2: + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (3_0) y0 = (double)fy0; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + fstod %f1,%f16 ! (3_0) x0 = (double)fx0; +.d2: + faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; + add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; + and %o5,-16,%o5 ! (3_0) signx0 &= -16; + + lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; + fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; + + lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; + fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; + + and %o4,-8,%o4 ! (3_0) signy0 &= -8; + fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; + + add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; + fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; + + and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; + + cmp %l6,%o5 + bl,pn %icc,.u12 + and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; +.c12: + cmp %l5,%o5 + bl,pn %icc,.u13 + fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; +.c13: + cmp %l6,_0x7f800000 + bge,pn %icc,.u14 + faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; +.c14: + ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l5,_0x7f800000 + bge,pn %icc,.u15 + fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; +.c15: + sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; + + sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; + + faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; + and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; + fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; + faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); + + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 ) + nop +.cont3: + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (4_0) y0 = (double)fy0; + + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + add %i3,stridex,%i3 ! px += stridex + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + fstod %f2,%f2 ! (4_0) x0 = (double)fx0; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; +.d3: + lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; + faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; + + fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; + and %o5,-16,%o5 ! (4_0) signx0 &= -16; + + lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; + fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; + faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; + + and %o4,-8,%o4 ! (4_1) signy0 &= -8; + fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; + + add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; + fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; + + and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; + + cmp %l6,%o5 + bl,pn %icc,.u16 + and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; +.c16: + cmp %o7,%o5 + bl,pn %icc,.u17 + fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; +.c17: + cmp %l6,_0x7f800000 + bge,pn %icc,.u18 + fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; +.c18: + cmp %o7,_0x7f800000 + bge,pn %icc,.u19 + faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; +.c19: + ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; + + sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; + and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); + sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; + sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; + faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; + + lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0); + + fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 + bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 ) + nop +.cont4: + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + fstod %f0,%f40 ! (5_1) y0 = (double)fy0; + + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + add %i3,stridex,%i3 ! px += stridex + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + fstod %f1,%f2 ! (5_1) x0 = (double)fx0; +.d4: + sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; + add %i1,stridey,%i1 ! py += stridey + + faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; + sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; + + lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; + fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (5_1) signx0 &= -16; + fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; + faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; + + fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; + + ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; + and %o4,-8,%o4 ! (5_1) signy0 &= -8; + fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; + + fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; + cmp %l7,%o5 + bl,pn %icc,.u20 + fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; +.c20: + cmp %l6,%o5 + bl,pn %icc,.u21 + fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; +.c21: + cmp %l7,_0x7f800000 + bge,pn %icc,.u22 + faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; +.c22: + ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.u23 + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; +.c23: + sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + + fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 + bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 ) + faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; +.cont5: + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; +.d5: + lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (0_0) signx0 &= -16; + faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; + + ldx [%fp+tmp_pz],%o1 + fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; + and %o4,-8,%o4 ! (0_0) signy0 &= -8; + faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; + + fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; + faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; + + fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; + st %f2,[%o1] ! (0_1) *pz = ftmp0 + add %o1,stridez,%o2 + fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; + fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; + + fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u24 + fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; +.c24: + cmp %g1,%o5 + bl,pn %icc,.u25 + fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; +.c25: + cmp %l6,_0x7f800000 + bge,pn %icc,.u26 + faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; +.c26: + ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %g1,_0x7f800000 + bge,pn %icc,.u27 + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; +.c27: + sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; + sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; + and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); + sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; + faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); + sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; + add %o2,stridez,%o1 ! pz += stridez + + fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 + bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 ) + faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; +.cont6: + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (1_0) y0 = (double)fy0; + + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (1_0) x0 = (double)fx0; +.d6: + faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; + and %o5,-16,%o5 ! (1_0) signx0 &= -16; + and %o4,-8,%o4 ! (1_0) signy0 &= -8; + + lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; + fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; + fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; + faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; + + fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; + fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (1_1) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; + fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; + + fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; + + and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u28 + fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; +.c28: + cmp %g5,%o5 + bl,pn %icc,.u29 + fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; +.c29: + cmp %l6,_0x7f800000 + bge,pn %icc,.u30 + faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; +.c30: + ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %g5,_0x7f800000 + bge,pn %icc,.u31 + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; +.c31: + sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; + sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; + and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; + fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); + sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; + + fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 + bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 ) + faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; +.cont7: + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (2_0) y0 = (double)fy0; + + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + fstod %f2,%f2 ! (2_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; +.d7: + lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; + and %o5,-16,%o5 ! (2_0) signx0 &= -16; + faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; + + lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; + fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; + + fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; + faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; + + fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; + fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; + st %f1,[%o1] ! (2_1) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; + and %o4,-8,%o4 ! (2_0) signy0 &= -8; + fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; + + fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u32 + fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; +.c32: + cmp %o0,%o5 + bl,pn %icc,.u33 + fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; +.c33: + cmp %l6,_0x7f800000 + bge,pn %icc,.u34 + faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; +.c34: + ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %o0,_0x7f800000 + bge,pn %icc,.u35 + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; +.c35: + sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; + + fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; + sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; + and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; + fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; + add %o2,stridez,%o1 ! pz += stridez + faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; + + lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); + sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; + + fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 + bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 ) + faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; +.cont8: + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (3_0) y0 = (double)fy0; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + fstod %f1,%f16 ! (3_0) x0 = (double)fx0; +.d8: + faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; + add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; + and %o5,-16,%o5 ! (3_0) signx0 &= -16; + + lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; + fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; + fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; + faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; + + fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; + and %o4,-8,%o4 ! (3_0) signy0 &= -8; + st %f1,[%o2] ! (3_1) *pz = ftmp0; + fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; + fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; + + fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; + + and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; + cmp %l6,%o5 + bl,pn %icc,.u36 + fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; +.c36: + cmp %l5,%o5 + bl,pn %icc,.u37 + fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; +.c37: + cmp %l6,_0x7f800000 + bge,pn %icc,.u38 + faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; +.c38: + ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l5,_0x7f800000 + bge,pn %icc,.u39 + fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; +.c39: + sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; + + fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; + sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; + and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; + fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); + sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; + + fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 ) + faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; +.cont9: + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (4_0) y0 = (double)fy0; + + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + fstod %f2,%f2 ! (4_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; +.d9: + lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; + faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; + + fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; + and %o5,-16,%o5 ! (4_0) signx0 &= -16; + faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; + + subcc counter,5,counter + bneg,pn %icc,.tail + nop + + ba .main_loop + nop + + .align 16 +.main_loop: + lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; + nop + fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; + faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; + + fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; + and %o4,-8,%o4 ! (4_1) signy0 &= -8; + st %f22,[%o1] ! (4_2) *pz = ftmp0; + fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; + + ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; + fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; + + fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up0 + fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; +.co0: + nop + cmp %o7,%o5 + bl,pn %icc,.up1 + faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; +.co1: + ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up2 + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; +.co2: + sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; + cmp %o7,_0x7f800000 + bge,pn %icc,.up3 + + fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; +.co3: + sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; + and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); + sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; + sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; + faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0); + + fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 + bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 ) + faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; +.cont10: + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + nop + fstod %f0,%f40 ! (5_1) y0 = (double)fy0; + + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + add %o2,stridez,%o1 ! pz += stridez + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; +.den0: + sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; + add %i1,stridey,%i1 ! py += stridey + + faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; + sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; + + lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; + add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; + fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (5_1) signx0 &= -16; + fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; + faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; + + fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (5_2) *pz = ftmp0; + fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; + + ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; + and %o4,-8,%o4 ! (5_1) signy0 &= -8; + fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; + + fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; + fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; + + cmp %l7,%o5 + bl,pn %icc,.up4 + fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; +.co4: + nop + cmp %l6,%o5 + bl,pn %icc,.up5 + faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; +.co5: + ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l7,_0x7f800000 + bge,pn %icc,.up6 + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; +.co6: + sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; + cmp %l6,_0x7f800000 + bge,pn %icc,.up7 + + fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; +.co7: + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + add %o1,stridez,%o2 ! pz += stridez + + fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 + bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 ) + faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; +.cont11: + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; +.den1: + lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; + and %o5,-16,%o5 ! (0_0) signx0 &= -16; + faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; + + fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; + and %o4,-8,%o4 ! (0_0) signy0 &= -8; + faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; + + fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; + faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; + + fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; + nop + st %f2,[%o1] ! (0_1) *pz = ftmp0 + fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; + + ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; + fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; + + fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up8 + fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; +.co8: + nop + cmp %g1,%o5 + bl,pn %icc,.up9 + faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; +.co9: + ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up10 + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; +.co10: + sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; + cmp %g1,_0x7f800000 + bge,pn %icc,.up11 + + fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; +.co11: + sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; + and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); + sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; + sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; + faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); + sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; + add %o2,stridez,%o1 ! pz += stridez + + fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 + bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 ) + faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; +.cont12: + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + add %i1,stridey,%i1 ! py += stridey + nop + fstod %f0,%f40 ! (1_0) y0 = (double)fy0; + + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + fstod %f2,%f2 ! (1_0) x0 = (double)fx0; +.den2: + faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; + and %o5,-16,%o5 ! (1_0) signx0 &= -16; + and %o4,-8,%o4 ! (1_0) signy0 &= -8; + + lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; + fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; + fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; + faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; + + fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; + nop + st %f2,[%o2] ! (1_1) *pz = ftmp0; + fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; + + ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; + fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; + + fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; + + and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up12 + fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; +.co12: + nop + cmp %g5,%o5 + bl,pn %icc,.up13 + faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; +.co13: + ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up14 + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; +.co14: + sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; + cmp %g5,_0x7f800000 + bge,pn %icc,.up15 + + fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; +.co15: + sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; + and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; + fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); + sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 + bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 ) + faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; +.cont13: + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (2_0) y0 = (double)fy0; + + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + fstod %f2,%f2 ! (2_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; +.den3: + lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; + and %o5,-16,%o5 ! (2_0) signx0 &= -16; + faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; + + lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; + fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; + + fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; + faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; + + fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; + st %f1,[%o1] ! (2_1) *pz = ftmp0; + fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; + + ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; + and %o4,-8,%o4 ! (2_0) signy0 &= -8; + fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; + + fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; + and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; + + and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; + fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up16 + fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; +.co16: + nop + cmp %o0,%o5 + bl,pn %icc,.up17 + faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; +.co17: + ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up18 + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; +.co18: + sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; + cmp %o0,_0x7f800000 + bge,pn %icc,.up19 + + fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; +.co19: + sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; + and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; + fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; + + lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; + add %o2,stridez,%o1 ! pz += stridez + faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; + + lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); + sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 + bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 ) + faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; +.cont14: + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (3_0) y0 = (double)fy0; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + fstod %f1,%f16 ! (3_0) x0 = (double)fx0; +.den4: + faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; + add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; + and %o5,-16,%o5 ! (3_0) signx0 &= -16; + + lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; + fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; + + lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; + fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; + faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; + + fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; + and %o4,-8,%o4 ! (3_0) signy0 &= -8; + st %f1,[%o2] ! (3_1) *pz = ftmp0; + fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; + + ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; + fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; + + fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; + and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; + sethi %hi(0x00800000),%o5 + faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; + + and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; + fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; + + cmp %l6,%o5 + bl,pn %icc,.up20 + fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; +.co20: + nop + cmp %l5,%o5 + bl,pn %icc,.up21 + faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; +.co21: + ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); + cmp %l6,_0x7f800000 + bge,pn %icc,.up22 + fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; +.co22: + sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; + cmp %l5,_0x7f800000 + bge,pn %icc,.up23 + + fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; +.co23: + sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; + faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; + + faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; + and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; + fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; + + lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; + add %o1,stridez,%o2 ! pz += stridez + faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; + + lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); + sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 ) + faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; +.cont15: + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + add %i1,stridey,%i1 ! py += stridey + fstod %f0,%f40 ! (4_0) y0 = (double)fy0; + + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + fstod %f2,%f2 ! (4_0) x0 = (double)fx0; + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; +.den5: + lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; + subcc counter,6,counter ! counter? + add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; + faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; + + fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; + and %o5,-16,%o5 ! (4_0) signx0 &= -16; + bpos,pt %icc,.main_loop + faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; + +.tail: + addcc counter,5,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; + + fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; + st %f22,[%o1] ! (4_2) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; + + fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; + faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; + + fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; + + + faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; + + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; + faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; + + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; + + fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; + faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; + + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + add %o2,stridez,%o1 ! pz += stridez + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + + fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; + faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; + + faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; + + fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (5_2) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + + fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; + faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; + + fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; + + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; + faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; + + sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; + faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; + + add %o1,stridez,%o2 ! pz += stridez + + fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; + faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; + + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; + + faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; + + fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; + st %f2,[%o1] ! (0_1) *pz = ftmp0 + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o2,%o4 + + ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + + fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; + + fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; + + fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; + faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; + + sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; + + add %o2,stridez,%o1 ! pz += stridez + + fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; + faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; + + faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; + + fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; + st %f2,[%o2] ! (1_1) *pz = ftmp0; + + subcc counter,1,counter + bneg,a,pn %icc,.begin + or %g0,%o1,%o4 + + ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); + + fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; + + fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; + + add %o1,stridez,%o2 ! pz += stridez + + faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; + + fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; + st %f1,[%o1] ! (2_1) *pz = ftmp0; + + ba .begin + or %g0,%o2,%o4 + + .align 16 +.spec0: + cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 + bg 2f ! if ( ax0 >= 0x7f800000 ) + srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30; + + cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000 + bg 2f ! if ( ay0 >= 0x7f800000 ) + and %l3,2,%l3 ! signx0 &= 2; + + sra %l4,31,%l4 ! signy0 = uy0 >> 31; + bne,a 1f ! if (ay0 != 0x7f800000) + add %l3,%l3,%l3 ! signx0 += signx0; + + cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 + bne,a 1f ! if ( ax0 != 0x7f800000 ) + add %g0,2,%l3 ! signx0 = 2 + + add %l3,1,%l3 ! signx0 ++; +1: + sll %l4,3,%l4 ! signy0 <<= 3; + st %l3,[%fp+tmp_pz] ! STORE signx0 + + ldd [cmul_arr+88],%f0 ! LOAD M_PI_4 + + ld [%fp+tmp_pz],%f2 ! LOAD signx0 + + ldd [cmul_arr+%l4],%f4 ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); + + add %i1,stridey,%i1 ! py += stridey; + fitod %f2,%f2 ! dtmp1 = (double)signx0; + + add %i3,stridex,%i3 ! px += stridex; + + fmuld %f2,%f0,%f0 ! res = signx0 * M_PI_4; + + fmuld %f0,%f4,%f0 ! res *= dtmp0; + fdtos %f0,%f0 ! ftmp0 = (float) res; + st %f0,[%o4] ! *pz = ftmp0; + + ba .begin1 + add %o4,stridez,%o4 ! pz += stridez; +2: + std %l6,[%fp+tmp_pz] ! *(float*)&ax0, *(float*)&ay0 + ldd [%fp+tmp_pz],%f0 ! *(float*)&ax0, *(float*)&ay0 + + add %i1,stridey,%i1 ! py += stridey; + + fmuls %f0,%f1,%f0 ! ftmp0 = *(float*)&ax0 * *(float*)&ay0; + add %i3,stridex,%i3 ! pz += stridex; + st %f0,[%o4] ! *pz = ftmp0; + + ba .begin1 + add %o4,stridez,%o4 ! pz += stridez; + + .align 16 +.spec1: + cmp %l6,0 + bne,pn %icc,1f + nop + + cmp %l7,0 + bne,pn %icc,1f + nop + + sra %l4,28,%l4 ! signy0 = uy0 >> 28; + + sra %l3,27,%l3 ! signx0 = ux0 >> 27; + and %l4,-8,%l4 ! signy0 &= -8; + + sra %o2,31,%o2 ! ldiff0 >>= 31; + and %l3,-16,%l3 ! signx0 &= -16; + + sll %o2,5,%o2 ! ldiff0 <<= 5; + add %l4,%l3,%l3 ! signx0 += signy0; + + add %o2,%l3,%l3 ! signx0 += ldiff0; + add %i1,stridey,%i1 ! py += stridey; + + ldd [cadd_arr+%l3],%f0 ! res = *(double*)((char*)(cadd_arr + 7) + signx0); + add %i3,stridex,%i3 ! px += stridex; + + fdtos %f0,%f0 ! ftmp0 = (float) res; + st %f0,[%o4] ! *pz = ftmp0; + + ba .begin1 + add %o4,stridez,%o4 ! pz += stridez; +1: + stx %o4,[%fp+tmp_pz] + sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; + sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; + + and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; + + lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 + + lda [%i1+%o2]0x82,%l5 ! (0_0) fy0 = *(float*)((char*)py + addrc0); + + lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; + + lda [%o4]0x82,%g5 ! (0_0) fx0 = *(float*)((char*)px - addrc0); + + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i1,stridey,%i1 ! py += stridey + + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + and %l5,_0x7fffffff,%l4 + sethi %hi(0x00800000),%g1 + + cmp %l4,%g1 + bge,a %icc,1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + fabss %f0,%f0 ! fy0 = fabsf(fy0); + ldd [cmul_arr+96],%f40 + sra %l5,28,%l4 ! itmp0 >>= 28; + + and %l4,-8,%l4 + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f40,%f0,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f40,%f0,%f40 ! dtmp0 *= dsign; +1: + and %g5,_0x7fffffff,%l4 + cmp %l4,%g1 + bge,a %icc,.spec1_cont + fstod %f2,%f2 ! (0_0) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %g5,28,%l4 ! itmp0 >>= 28; + + and %l4,-8,%l4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + ba .spec1_cont + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; + + .align 16 +.update0: + cmp counter,0 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont0 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,0,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,0,counter + ba .cont0 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i3,stridex,%i3 ! px += stridex + add %i1,stridey,%i1 ! py += stridey + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + ba .d0 + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update1: + cmp counter,1 + bg,pn %icc,1f + nop + + fzero %f0 + ba .cont1 + ld [cmul_arr],%f2 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,1,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,1,counter + ba .cont1 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + + add %i1,stridey,%i1 ! py += stridey + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + ba .d1 + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update2: + cmp counter,2 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont2 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,2,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,2,counter + ba .cont2 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_px] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f1,%f16 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f16 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; +1: + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d2 + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + + .align 16 +.update3: + cmp counter,3 + bg,pn %icc,1f + nop + + fzero %f0 + ba .cont3 + ld [cmul_arr],%f2 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,3,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,3,counter + ba .cont3 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d3 + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; + + .align 16 +.update4: + cmp counter,4 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont4 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,4,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,4,counter + ba .cont4 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_px] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + + and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff + cmp %o1,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f14 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f14,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f14 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f14,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff + cmp %o1,%o5 + bge,a 1f + fstod %f1,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f22 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f22,%f22 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f22,%f0,%f22 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f22,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + ba .d4 + add %i3,stridex,%i3 ! px += stridex + + .align 16 +.update5: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont5 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont5 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_px] + st %f2,[%fp+tmp_px+4] + ld [%fp+tmp_px],%o4 + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_py] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + ld [%fp+tmp_px+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_py],%l5 + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + ba .d5 + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update6: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont6 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont6 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_px] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + add %i3,stridex,%i3 ! px += stridex + add %i1,stridey,%i1 ! py += stridey + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_px],%l5 + + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + ba .d6 + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update7: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont7 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont7 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + ba .d7 + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update8: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont8 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,5,counter + ba .cont8 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_pz] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f1,%f16 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f16 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; +1: + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d8 + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + + .align 16 +.update9: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont9 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,5,counter + ba .cont9 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + + add %i3,stridex,%i3 ! px += stridex + ba .d9 + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; + + .align 16 +.update10: + cmp counter,1 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont10 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,1,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,1,counter + ba .cont10 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o1 + fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; + + and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff + cmp %o4,%o5 + bge,a 1f + fstod %f0,%f40 ! (5_1) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o1,28,%o1 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o1,-8,%o1 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; + fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; + + sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; + add %i3,stridex,%i3 ! px += stridex + + ld [%fp+tmp_pz+4],%o1 + and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff + cmp %o4,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o1,28,%o1 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o1,-8,%o1 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ba .den0 + add %o2,stridez,%o1 ! pz += stridez + + .align 16 +.update11: + cmp counter,2 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont11 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,2,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,2,counter + ba .cont11 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_px] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_px],%l5 + sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; + add %i3,stridex,%i3 ! px += stridex + + lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; + sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; + ba .den1 + add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update12: + cmp counter,3 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont12 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,3,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + stx %i3,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,3,counter + ba .cont12 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; + + stx %l5,[%fp+tmp_px] + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; + add %i3,stridex,%i3 ! px += stridex + add %i1,stridey,%i1 ! py += stridey + fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff + cmp %l5,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + ldx [%fp+tmp_px],%l5 + + sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; + + sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; + ba .den2 + add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update13: + cmp counter,4 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont13 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,4,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + sub %i3,stridex,%o5 + stx %o5,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,4,counter + ba .cont13 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; + add %i1,stridey,%i1 ! py += stridey + fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f2,%f2 ! fx0 = fabsf(fx0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; + + sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; + ba .den3 + add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; + + .align 16 +.update14: + cmp counter,5 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f1 + ba .cont14 + fzeros %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,5,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + sub %i3,stridex,%o5 + stx %o5,[%fp+tmp_px] + + ld [cmul_arr],%f1 + or %g0,5,counter + ba .cont14 + fzeros %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + std %f0,[%fp+tmp_pz] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; + + faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f1,%f16 ! (5_1) x0 = (double)fx0; + + fabss %f1,%f16 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; +1: + sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; + sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; + + ba .den4 + sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; + + .align 16 +.update15: + cmp counter,6 + bg,pn %icc,1f + nop + + ld [cmul_arr],%f2 + ba .cont15 + fzero %f0 +1: + cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 + bg,pt %icc,1f + nop +2: + sub counter,6,counter + st counter,[%fp+tmp_counter] + stx %i1,[%fp+tmp_py] + sub %i3,stridex,%o5 + stx %o5,[%fp+tmp_px] + + ld [cmul_arr],%f2 + or %g0,6,counter + ba .cont15 + fzero %f0 +1: + andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + bne,pn %icc,1f + sethi %hi(0x00800000),%o5 + + andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff + be,pn %icc,2b + nop +1: + st %f0,[%fp+tmp_pz] + st %f2,[%fp+tmp_pz+4] + ld [%fp+tmp_pz],%o4 + fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; + + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f0,%f40 ! (0_0) y0 = (double)fy0; + + ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + fabss %f0,%f0 ! fy0 = fabsf(fy0); + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; + + fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; +1: + add %i1,stridey,%i1 ! py += stridey + faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; + fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; + + ld [%fp+tmp_pz+4],%o4 + and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff + cmp %l6,%o5 + bge,a 1f + fstod %f2,%f2 ! (5_1) x0 = (double)fx0; + + fabss %f2,%f2 ! fx0 = fabsf(fx0); + ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 + sra %o4,28,%o4 ! itmp0 >>= 28; + + and %o4,-8,%o4 ! itmp0 = -8; + fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; + + fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; + ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); + + fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; +1: + sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; + sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; + + ba .den5 + sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; + + .align 16 +.u0: + ba .c0 + or %g0,_0x7fffffff,%o5 +.u1: + ba .c1 + or %g0,_0x7fffffff,%o5 +.u2: + ba .c2 + or %g0,_0x7f800000,%o5 +.u3: + ba .c3 + or %g0,_0x7f800000,%o5 +.u4: + ba .c4 + or %g0,_0x7fffffff,%o5 +.u5: + ba .c5 + or %g0,_0x7fffffff,%o5 +.u6: + ba .c6 + or %g0,_0x7f800000,%o5 +.u7: + ba .c7 + or %g0,_0x7f800000,%o5 +.u8: + ba .c8 + or %g0,_0x7fffffff,%o5 +.u9: + ba .c9 + or %g0,_0x7fffffff,%o5 +.u10: + ba .c10 + or %g0,_0x7f800000,%o5 +.u11: + ba .c11 + or %g0,_0x7f800000,%o5 +.u12: + ba .c12 + or %g0,_0x7fffffff,%o5 +.u13: + ba .c13 + or %g0,_0x7fffffff,%o5 +.u14: + ba .c14 + or %g0,_0x7f800000,%o5 +.u15: + ba .c15 + or %g0,_0x7f800000,%o5 +.u16: + ba .c16 + or %g0,_0x7fffffff,%o5 +.u17: + ba .c17 + or %g0,_0x7fffffff,%o5 +.u18: + ba .c18 + or %g0,_0x7f800000,%o5 +.u19: + ba .c19 + or %g0,_0x7f800000,%o5 +.u20: + ba .c20 + or %g0,_0x7fffffff,%o5 +.u21: + ba .c21 + or %g0,_0x7fffffff,%o5 +.u22: + ba .c22 + or %g0,_0x7f800000,%o5 +.u23: + ba .c23 + or %g0,_0x7f800000,%o5 +.u24: + ba .c24 + or %g0,_0x7fffffff,%o5 +.u25: + ba .c25 + or %g0,_0x7fffffff,%o5 +.u26: + ba .c26 + or %g0,_0x7f800000,%o5 +.u27: + ba .c27 + or %g0,_0x7f800000,%o5 +.u28: + ba .c28 + or %g0,_0x7fffffff,%o5 +.u29: + ba .c29 + or %g0,_0x7fffffff,%o5 +.u30: + ba .c30 + or %g0,_0x7f800000,%o5 +.u31: + ba .c31 + or %g0,_0x7f800000,%o5 +.u32: + ba .c32 + or %g0,_0x7fffffff,%o5 +.u33: + ba .c33 + or %g0,_0x7fffffff,%o5 +.u34: + ba .c34 + or %g0,_0x7f800000,%o5 +.u35: + ba .c35 + or %g0,_0x7f800000,%o5 +.u36: + ba .c36 + or %g0,_0x7fffffff,%o5 +.u37: + ba .c37 + or %g0,_0x7fffffff,%o5 +.u38: + ba .c38 + or %g0,_0x7f800000,%o5 +.u39: + ba .c39 + or %g0,_0x7f800000,%o5 +.up0: + ba .co0 + or %g0,_0x7fffffff,%o5 +.up1: + ba .co1 + or %g0,_0x7fffffff,%o5 +.up2: + ba .co2 + or %g0,_0x7f800000,%o5 +.up3: + ba .co3 + or %g0,_0x7f800000,%o5 +.up4: + ba .co4 + or %g0,_0x7fffffff,%o5 +.up5: + ba .co5 + or %g0,_0x7fffffff,%o5 +.up6: + ba .co6 + or %g0,_0x7f800000,%o5 +.up7: + ba .co7 + or %g0,_0x7f800000,%o5 +.up8: + ba .co8 + or %g0,_0x7fffffff,%o5 +.up9: + ba .co9 + or %g0,_0x7fffffff,%o5 +.up10: + ba .co10 + or %g0,_0x7f800000,%o5 +.up11: + ba .co11 + or %g0,_0x7f800000,%o5 +.up12: + ba .co12 + or %g0,_0x7fffffff,%o5 +.up13: + ba .co13 + or %g0,_0x7fffffff,%o5 +.up14: + ba .co14 + or %g0,_0x7f800000,%o5 +.up15: + ba .co15 + or %g0,_0x7f800000,%o5 +.up16: + ba .co16 + or %g0,_0x7fffffff,%o5 +.up17: + ba .co17 + or %g0,_0x7fffffff,%o5 +.up18: + ba .co18 + or %g0,_0x7f800000,%o5 +.up19: + ba .co19 + or %g0,_0x7f800000,%o5 +.up20: + ba .co20 + or %g0,_0x7fffffff,%o5 +.up21: + ba .co21 + or %g0,_0x7fffffff,%o5 +.up22: + ba .co22 + or %g0,_0x7f800000,%o5 +.up23: + ba .co23 + or %g0,_0x7f800000,%o5 +.exit: + ret + restore + SET_SIZE(__vatan2f) + diff --git a/usr/src/libm/src/mvec/vis/__vatanf.S b/usr/src/libm/src/mvec/vis/__vatanf.S new file mode 100644 index 0000000..b7191de --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vatanf.S @@ -0,0 +1,1891 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vatanf.S 1.7 06/01/23 SMI" + + .file "__vatanf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01 + .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01 + .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01 + .word 0x00020000, 0x00000000 ! DC1 + .word 0xfffc0000, 0x00000000 ! DC2 + .word 0x7ff00000, 0x00000000 ! DC3 + .word 0x3ff00000, 0x00000000 ! DONE = 1.0 + .word 0x40000000, 0x00000000 ! DTWO = 2.0 + +! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127] + + .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6 + .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91 + .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac + .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26 + .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd + .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b + .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741 + .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24 + .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f + .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427 + .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225 + .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca + .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6 + .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f + .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867 + .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397 + .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f + .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805 + .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5 + .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60 + .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce + .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8 + .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c + .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d + .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120 + .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c + .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d + .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30 + .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244 + .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab + .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949 + .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804 + + .word 0x3ff00000, 0x00000000 ! 1.0 + .word 0xbff00000, 0x00000000 ! -1.0 + +! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155] + + .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f + .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf + .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2 + .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3 + .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19 + .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30 + .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195 + .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302 + .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a + .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1 + .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c + .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c + .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700 + .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712 + .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9 + .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444 + .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d + .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4 + .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c + .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2 + .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc + .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd + .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4 + .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634 + .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e + .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f + .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8 + .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5 + .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857 + .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd + .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054 + .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0 + .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f + .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc + .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45 + .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f + .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665 + .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0 + .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5 + .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27 + .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38 + .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2 + .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849 + .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff + .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619 + .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa + .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105 + .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7 + .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc + .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb + .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28 + .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1 + .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94 + .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6 + .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395 + .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7 + .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e + .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5 + .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2 + .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886 + .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5 + .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf + .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f + .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4 + .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b + .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886 + .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2 + .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf + .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5 + .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4 + .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f + .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886 + .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b + .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf + .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2 + .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4 + .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5 + .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886 + +#define DC2 %f2 +#define DTWO %f6 +#define DONE %f52 +#define K0 %f54 +#define K1 %f56 +#define K2 %f58 +#define DC1 %f60 +#define DC3 %f62 + +#define stridex %o2 +#define stridey %o3 +#define MASK_0x7fffffff %i1 +#define MASK_0x100000 %i5 + +#define tmp_px STACK_BIAS-32 +#define tmp_counter STACK_BIAS-24 +#define tmp0 STACK_BIAS-16 +#define tmp1 STACK_BIAS-8 + +#define counter %l1 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!-------------------------------------------------------------------- +! !!!!! vatanf algorithm !!!!! +! ux = ((int*)px)[0]; +! ax = ux & 0x7fffffff; +! +! if ( ax < 0x39b89c55 ) +! { +! *(int*)py = ux; +! goto next; +! } +! +! if ( ax > 0x4c700518 ) +! { +! if ( ax > 0x7f800000 ) +! { +! float fpx = fabsf(*px); +! fpx *= fpx; +! *py = fpx; +! goto next; +! } +! +! sign = ux & 0x80000000; +! sign |= pi_2; +! *(int*)py = sign; +! goto next; +! } +! +! ftmp0 = *px; +! x = (double)ftmp0; +! px += stridex; +! y = vis_fpadd32(x,DC1); +! y = vis_fand(y,DC2); +! div = x * y; +! xx = x - y; +! div += DONE; +! i = ((unsigned long long*)&div)[0]; +! y0 = vis_fand(div,DC3); +! i >>= 43; +! i &= 508; +! *(float*)&dtmp0 = *(float*)((char*)parr0 + i); +! y0 = vis_fpsub32(dtmp0, y0); +! dtmp0 = div0 * y0; +! dtmp0 = DTWO - dtmp0; +! y0 *= dtmp0; +! dtmp1 = div0 * y0; +! dtmp1 = DTWO - dtmp1; +! y0 *= dtmp1; +! ax = ux & 0x7fffffff; +! ax += 0x00100000; +! ax >>= 18; +! ax &= -8; +! res = *(double*)((char*)parr1 + ax); +! ux >>= 28; +! ux &= -8; +! dtmp0 = *(double*)((char*)sign_arr + ux); +! res *= dtmp0; +! xx *= y0; +! x2 = xx * xx; +! dtmp0 = K2 * x2; +! dtmp0 += K1; +! dtmp0 *= x2; +! dtmp0 += K0; +! dtmp0 *= xx; +! res += dtmp0; +! ftmp0 = (float)res; +! py[0] = ftmp0; +! py += stridey; +!-------------------------------------------------------------------- + + ENTRY(__vatanf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + + st %i0,[%fp+tmp_counter] + + sllx %i2,2,stridex + sllx %i4,2,stridey + + or %g0,%i3,%o1 + stx %i1,[%fp+tmp_px] + + ldd [%l2],K0 + ldd [%l2+8],K1 + ldd [%l2+16],K2 + ldd [%l2+24],DC1 + ldd [%l2+32],DC2 + ldd [%l2+40],DC3 + ldd [%l2+48],DONE + ldd [%l2+56],DTWO + + add %l2,64,%i4 + add %l2,64+512,%l0 + add %l2,64+512+16-0x1cc*8,%l7 + + sethi %hi(0x100000),MASK_0x100000 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + add MASK_0x7fffffff,1023,MASK_0x7fffffff + + sethi %hi(0x39b89c00),%o4 + add %o4,0x55,%o4 + sethi %hi(0x4c700400),%o5 + add %o5,0x118,%o5 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + + and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff; + lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; + + cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55 + bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 ) + nop + + cmp %l5,%o5 ! (0_0) ax ? 0x4c700518 + bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 ) + nop + + add %i3,stridex,%l5 ! px += stridex; + fstod %f0,%f22 ! (0_0) ftmp0 = *px; + mov %l6,%i3 + + lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; + + and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; + lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; + add %l5,stridex,%l4 ! px += stridex; + fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 + bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 ) + nop +.cont0: + cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 + bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 ) + nop +.cont1: + fstod %f0,%f20 ! (1_0) x = (double)ftmp0; + mov %l6,%l5 + + fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); + + fmuld %f22,%f26,%f32 ! (0_0) div = x * y; + + lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; + fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; + + and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; + lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; + add %l4,stridex,%l3 ! px += stridex; + fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 + bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (0_0) div += done; +.cont2: + cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 + bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 ) + nop +.cont3: + std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%l4 + fstod %f0,%f18 ! (2_0) x = (double)ftmp0; + + fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); + + fmuld %f20,%f26,%f30 ! (1_0) div = x * y; + + lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; + fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; + + and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; + lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; + add %l3,stridex,%i0 ! px += stridex; + fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 + bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (1_0) div += done; +.cont4: + cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 + bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 ) + nop +.cont5: + std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%l3 + fstod %f0,%f16 ! (3_0) x = (double)ftmp0; + + ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); + + fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); + + srlx %o0,43,%o0 ! (0_0) i >>= 43; + + and %o0,508,%l6 ! (0_0) i &= 508; + + ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f18,%f26,%f28 ! (2_0) div = x * y; + + lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; + fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; + + fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); + + and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; + lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; + add %i0,stridex,%i2 ! px += stridex; + fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 + bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (2_0) div += done; +.cont6: + fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 + bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 ) + nop +.cont7: + std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%i0 + fstod %f0,%f14 ! (4_0) x = (double)ftmp0; + + ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); + + fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); + + fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; + srlx %g1,43,%g1 ! (1_0) i >>= 43; + + and %g1,508,%l6 ! (1_0) i &= 508; + + ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f16,%f26,%f34 ! (3_0) div = x * y; + + lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; + fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; + + fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); + add %i2,stridex,%l2 ! px += stridex; + + fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; + lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; + fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 + bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (3_0) div += done; +.cont8: + fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 + bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 ) + nop +.cont9: + std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%i2 + fstod %f0,%f36 ! (5_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; + ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); + + fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); + + fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; + srlx %o0,43,%o0 ! (2_0) i >>= 43; + + and %o0,508,%l6 ! (2_0) i &= 508; + fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; + + ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f14,%f26,%f32 ! (4_0) div = x * y; + + lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; + fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; + add %l2,stridex,%g5 ! px += stridex; + fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; + lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; + fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 + bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (4_0) div += done; +.cont10: + fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 + bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 ) + nop +.cont11: + fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; + mov %l6,%l2 + std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f10 ! (6_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; + ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); + + fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); + + fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; + srlx %g1,43,%g1 ! (3_0) i >>= 43; + fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (3_0) i &= 508; + mov %i3,%o7 + fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; + + ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f36,%f26,%f30 ! (5_0) div = x * y; + srl %o7,28,%g1 ! (0_0) ux >>= 28; + add %g5,stridex,%i3 ! px += stridex; + + fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; + fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; + and %g1,-8,%g1 ! (0_0) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; + fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 + bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (5_0) div += done; +.cont12: + fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 + bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; +.cont13: + fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; + srl %o0,18,%o7 ! (0_0) ax >>= 18; + std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f8 ! (7_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (0_0) ux &= -8; + ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); + + add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax; + mov %l6,%g5 + ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; + srlx %o0,43,%o0 ! (4_0) i >>= 43; + ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); + fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); + + fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; + and %o0,508,%l6 ! (4_0) i &= 508; + mov %l5,%o7 + fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; + + fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; + srl %o7,28,%l5 ! (1_0) ux >>= 28; + ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f10,%f26,%f28 ! (6_0) div = x * y; + faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; + + subcc counter,8,counter + bneg,pn %icc,.tail + or %g0,%o1,%o0 + + add %fp,tmp0,%g1 + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + + ba .main_loop + add %i3,stridex,%l5 ! px += stridex; + + .align 16 +.main_loop: + fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; + and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (7_1) py[0] = ftmp0; + fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; + + fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; + srl %o7,28,%o7 ! (1_0) ux >>= 28; + add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; + fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff; + lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; + fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; + cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55 + bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (6_1) div += done; +.cont14: + fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; + cmp %o1,%o5 ! (0_0) ax ? 0x4c700518 + bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; +.cont15: + fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; + srl %g1,18,%o1 ! (1_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f22 ! (0_0) ftmp0 = *px; + + fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; + and %o1,-8,%o1 ! (1_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2); + + ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); + and %o7,-8,%o7 ! (1_1) ux &= -8; + mov %l6,%i3 + faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; + + fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; + nop + ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); + fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); + + fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (5_1) i >>= 43; + mov %l4,%o7 + fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (5_1) i &= 508; + nop + bn,pn %icc,.exit + fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; + + fmuld %f8,%f26,%f34 ! (7_1) div = x * y; + srl %o7,28,%o1 ! (2_1) ux >>= 28; + lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (0_1) py[0] = ftmp0; + fsubd %f8,%f26,%f8 ! (7_1) xx = x - y; + + fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; + add %l5,stridex,%l4 ! px += stridex; + add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; + fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; + lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; + fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; + cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 + bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (7_1) div += done; +.cont16: + fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 + bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; +.cont17: + fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; + srl %o0,18,%o7 ! (2_1) ax >>= 18; + std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f20 ! (1_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; + ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; + and %o1,-8,%o1 ! (2_1) ux &= -8; + fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; + and %o7,-8,%o7 ! (2_1) ax &= -8; + ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); + mov %l6,%l5 + fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); + + fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (6_1) i >>= 43; + mov %l3,%o7 + fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (6_1) i &= 508; + add %l4,stridex,%l3 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; + + fmuld %f22,%f26,%f32 ! (0_0) div = x * y; + srl %o7,28,%o1 ! (3_1) ux >>= 28; + lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (1_1) py[0] = ftmp0; + fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; + and %o1,-8,%o1 ! (3_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; + lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; + fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; + cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 + bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (0_0) div += done; +.cont18: + fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 + bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; +.cont19: + fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; + srl %g1,18,%o7 ! (3_1) ax >>= 18; + std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f18 ! (2_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (3_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; + mov %l6,%l4 + ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; + ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) + nop + fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3); + + fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (7_1) i >>= 43; + mov %i0,%o7 + fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (7_1) i &= 508; + add %l3,stridex,%i0 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; + + fmuld %f20,%f26,%f30 ! (1_0) div = x * y; + srl %o7,28,%o1 ! (4_1) ux >>= 28; + lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (2_1) py[0] = ftmp0; + fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; + and %o1,-8,%o1 ! (4_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; + lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; + fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; + cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 + bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (1_0) div += done; +.cont20: + fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 + bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; +.cont21: + fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; + srl %o0,18,%o7 ! (4_1) ax >>= 18; + std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f16 ! (3_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (4_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; + nop + ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); + mov %l6,%l3 + fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; + fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); + + fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (0_0) i >>= 43; + mov %i2,%o7 + fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (0_0) i &= 508; + add %i0,stridex,%i2 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; + + fmuld %f18,%f26,%f28 ! (2_0) div = x * y; + srl %o7,28,%o1 ! (5_1) ux >>= 28; + lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (3_1) py[0] = ftmp0; + fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; + and %o1,-8,%o1 ! (5_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; + lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; + fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; + cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 + bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (2_0) div += done; +.cont22: + fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 + bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; +.cont23: + fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; + srl %g1,18,%o7 ! (5_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f14 ! (4_0) x = (double)ftmp0; + + fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (5_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; + mov %l6,%i0 + ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; + fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); + + fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (1_0) i >>= 43; + mov %l2,%o7 + fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (1_0) i &= 508; + add %i2,stridex,%l2 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; + + fmuld %f16,%f26,%f34 ! (3_0) div = x * y; + srl %o7,28,%o1 ! (6_1) ux >>= 28; + lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (4_1) py[0] = ftmp0; + fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; + and %o1,-8,%o1 ! (6_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; + lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; + fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; + cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 + bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (3_0) div += done; +.cont24: + fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 + bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; +.cont25: + fmuld %f8,%f26,%f8 ! (7_1) xx *= y0; + srl %o0,18,%o7 ! (6_1) ax >>= 18; + std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f36 ! (5_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (6_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; + mov %l6,%i2 + ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); + + fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (2_0) i >>= 43; + mov %g5,%o7 + fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (2_0) i &= 508; + add %l2,stridex,%g5 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; + + fmuld %f14,%f26,%f32 ! (4_0) div = x * y; + srl %o7,28,%o1 ! (7_1) ux >>= 28; + lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (5_1) py[0] = ftmp0; + fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000; + and %o1,-8,%o1 ! (7_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; + lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; + fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; + cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 + bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (4_0) div += done; +.cont26: + fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 + bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1; +.cont27: + fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; + srl %g1,18,%o7 ! (7_1) ax >>= 18; + std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f10 ! (6_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (7_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; + mov %l6,%l2 + ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2; + fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); + + fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; + srlx %g1,43,%g1 ! (3_0) i >>= 43; + mov %i3,%o7 + fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (3_0) i &= 508; + add %g5,stridex,%i3 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; + + fmuld %f36,%f26,%f30 ! (5_0) div = x * y; + srl %o7,28,%o1 ! (0_0) ux >>= 28; + lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (6_1) py[0] = ftmp0; + fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; + and %o1,-8,%o1 ! (0_0) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; + fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx; + cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 + bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (5_0) div += done; +.cont28: + fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 + bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; +.cont29: + fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; + srl %o0,18,%o7 ! (0_0) ax >>= 18; + std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f8 ! (7_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (0_0) ux &= -8; + ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (7_1) res += dtmp0; + subcc counter,8,counter + ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; + mov %l6,%g5 + ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); + fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); + + fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; + srlx %o0,43,%l6 ! (4_0) i >>= 43; + mov %l5,%o7 + fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; + + add %g1,stridey,%o0 ! py += stridey; + and %l6,508,%l6 ! (4_0) i &= 508; + bn,pn %icc,.exit + fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; + ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + add %i3,stridex,%l5 ! px += stridex; + fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res; + + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + fmuld %f10,%f26,%f28 ! (6_0) div = x * y; + bpos,pt %icc,.main_loop + faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; + + srl %o7,28,%l5 ! (1_0) ux >>= 28; + st %f12,[%g1] ! (7_1) py[0] = ftmp0; + +.tail: + addcc counter,7,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; + and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff; + fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; + + fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; + and %l5,-8,%l5 ! (1_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; + + fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; + faddd DONE,%f28,%f28 ! (6_1) div += done; + + fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; + + fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; + srl %g1,18,%o7 ! (1_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; + + fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (1_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; + + faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; + add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax; + ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; + fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); + ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; + srlx %g1,43,%g1 ! (5_1) i >>= 43; + + and %g1,508,%l6 ! (5_1) i &= 508; + mov %l4,%o7 + fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; + + srl %o7,28,%l4 ! (2_1) ux >>= 28; + st %f12,[%o0] ! (0_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff; + + fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; + and %l4,-8,%l4 ! (2_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; + + fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; + + fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; + + fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; + srl %o0,18,%o7 ! (2_1) ax >>= 18; + + fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (2_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; + + faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; + add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax; + ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); + ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; + srlx %o0,43,%o0 ! (6_1) i >>= 43; + + and %o0,508,%l6 ! (6_1) i &= 508; + mov %l3,%o7 + fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; + + srl %o7,28,%l3 ! (3_1) ux >>= 28; + st %f12,[%g1] ! (1_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff; + + fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; + and %l3,-8,%l3 ! (3_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; + + fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; + + fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; + + fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; + srl %g1,18,%o7 ! (3_1) ax >>= 18; + + fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (3_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; + add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax; + ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) + + fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; + + mov %i0,%o7 + fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; + + srl %o7,28,%i0 ! (4_1) ux >>= 28; + st %f12,[%o0] ! (2_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff; + + fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; + and %i0,-8,%i0 ! (4_1) ux &= -8; + + fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; + + fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; + + fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; + srl %o0,18,%o7 ! (4_1) ax >>= 18; + + fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (4_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; + add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax; + ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; + + mov %i2,%o7 + fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; + + srl %o7,28,%i2 ! (5_1) ux >>= 28; + st %f12,[%g1] ! (3_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff; + + fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; + and %i2,-8,%i2 ! (5_1) ux &= -8; + + fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; + + fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; + srl %g1,18,%o7 ! (5_1) ax >>= 18; + + and %o7,-8,%o7 ! (5_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; + add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax; + ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; + + mov %l2,%o7 + + fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; + + srl %o7,28,%l2 ! (6_1) ux >>= 28; + st %f12,[%o0] ! (4_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff; + + add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; + and %l2,-8,%l2 ! (6_1) ux &= -8; + + fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; + + srl %o0,18,%o7 ! (6_1) ax >>= 18; + + and %o7,-8,%o7 ! (6_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; + add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax; + ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; + + st %f12,[%g1] ! (5_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; + + faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; + + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; + + st %f12,[%o0] ! (6_1) py[0] = ftmp0; + + ba .begin + or %g0,%g1,%o1 ! py += stridey; + +.exit: + ret + restore %g0,%g0,%g0 + + .align 16 +.spec0: + add %i3,stridex,%i3 ! px += stridex; + sub counter,1,counter + st %l6,[%o1] ! *(int*)py = ux; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + + .align 16 +.spec1: + sethi %hi(0x7f800000),%l3 + sethi %hi(0x3fc90c00),%l4 ! pi_2 + + sethi %hi(0x80000000),%o0 + add %l4,0x3db,%l4 ! pi_2 + + cmp %l5,%l3 ! if ( ax > 0x7f800000 ) + bg,a,pn %icc,1f + fabss %f0,%f0 ! fpx = fabsf(*px); + + and %l6,%o0,%l6 ! sign = ux & 0x80000000; + + or %l6,%l4,%l6 ! sign |= pi_2; + + add %i3,stridex,%i3 ! px += stridex; + sub counter,1,counter + st %l6,[%o1] ! *(int*)py = sign; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + +1: + fmuls %f0,%f0,%f0 ! fpx *= fpx; + + add %i3,stridex,%i3 ! px += stridex + sub counter,1,counter + st %f0,[%o1] ! *py = fpx; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + + .align 16 +.update0: + cmp counter,1 + fzeros %f0 + ble,a .cont0 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + fzeros %f0 + ble,a .cont1 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + fzeros %f0 + ble,a .cont2 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + fzeros %f0 + ble,a .cont3 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + fzeros %f0 + ble,a .cont4 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + fzeros %f0 + ble,a .cont5 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + fzeros %f0 + ble,a .cont6 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + fzeros %f0 + ble,a .cont7 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + fzeros %f0 + ble,a .cont8 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + fzeros %f0 + ble,a .cont9 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + fzeros %f0 + ble,a .cont10 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + fzeros %f0 + ble,a .cont11 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,7 + fzeros %f0 + ble,a .cont12 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont12 + or %g0,7,counter + + .align 16 +.update13: + cmp counter,7 + fzeros %f0 + ble,a .cont13 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont13 + or %g0,7,counter + + .align 16 +.update14: + cmp counter,0 + fzeros %f0 + ble,a .cont14 + sethi %hi(0x3fffffff),%l6 + + sub counter,0,counter + st counter,[%fp+tmp_counter] + + stx %i3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont14 + or %g0,0,counter + + .align 16 +.update15: + cmp counter,0 + fzeros %f0 + ble,a .cont15 + sethi %hi(0x3fffffff),%l6 + + sub counter,0,counter + st counter,[%fp+tmp_counter] + + stx %i3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont15 + or %g0,0,counter + + .align 16 +.update16: + cmp counter,1 + fzeros %f0 + ble,a .cont16 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont16 + or %g0,1,counter + + .align 16 +.update17: + cmp counter,1 + fzeros %f0 + ble,a .cont17 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont17 + or %g0,1,counter + + .align 16 +.update18: + cmp counter,2 + fzeros %f0 + ble,a .cont18 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont18 + or %g0,2,counter + + .align 16 +.update19: + cmp counter,2 + fzeros %f0 + ble,a .cont19 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont19 + or %g0,2,counter + + .align 16 +.update20: + cmp counter,3 + fzeros %f0 + ble,a .cont20 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont20 + or %g0,3,counter + + .align 16 +.update21: + cmp counter,3 + fzeros %f0 + ble,a .cont21 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont21 + or %g0,3,counter + + .align 16 +.update22: + cmp counter,4 + fzeros %f0 + ble,a .cont22 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont22 + or %g0,4,counter + + .align 16 +.update23: + cmp counter,4 + fzeros %f0 + ble,a .cont23 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont23 + or %g0,4,counter + + .align 16 +.update24: + cmp counter,5 + fzeros %f0 + ble,a .cont24 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont24 + or %g0,5,counter + + .align 16 +.update25: + cmp counter,5 + fzeros %f0 + ble,a .cont25 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont25 + or %g0,5,counter + + .align 16 +.update26: + cmp counter,6 + fzeros %f0 + ble,a .cont26 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont26 + or %g0,6,counter + + .align 16 +.update27: + cmp counter,6 + fzeros %f0 + ble,a .cont27 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont27 + or %g0,6,counter + + .align 16 +.update28: + cmp counter,7 + fzeros %f0 + ble,a .cont28 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont28 + or %g0,7,counter + + .align 16 +.update29: + cmp counter,7 + fzeros %f0 + ble,a .cont29 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont29 + or %g0,7,counter + + SET_SIZE(__vatanf) + diff --git a/usr/src/libm/src/mvec/vis/__vcos.S b/usr/src/libm/src/mvec/vis/__vcos.S new file mode 100644 index 0000000..4cfee05 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vcos.S @@ -0,0 +1,3078 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vcos.S 1.8 06/01/23 SMI" + + .file "__vcos.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x3ec718e3,0xa6972785 + .word 0x3ef9fd39,0x94293940 + .word 0xbf2a019f,0x75ee4be1 + .word 0xbf56c16b,0xba552569 + .word 0x3f811111,0x1108c703 + .word 0x3fa55555,0x554f5b35 + .word 0xbfc55555,0x555554d0 + .word 0xbfdfffff,0xffffff85 + .word 0x3ff00000,0x00000000 + .word 0xbfc55555,0x5551fc28 + .word 0x3f811107,0x62eacc9d + .word 0xbfdfffff,0xffff6328 + .word 0x3fa55551,0x5f7acf0c + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a600000 + .word 0x3ba3198a,0x2e000000 + .word 0x397b839a,0x252049c1 + .word 0x80000000,0x00004000 + .word 0xffff8000,0x00000000 ! N.B.: low-order words used + .word 0x3fc90000,0x80000000 ! for sign bit hacking; see + .word 0x3fc40000,0x00000000 ! references to "thresh" below + +#define p4 0x0 +#define q4 0x08 +#define p3 0x10 +#define q3 0x18 +#define p2 0x20 +#define q2 0x28 +#define p1 0x30 +#define q1 0x38 +#define one 0x40 +#define pp1 0x48 +#define pp2 0x50 +#define qq1 0x58 +#define qq2 0x60 +#define invpio2 0x68 +#define round 0x70 +#define pio2_1 0x78 +#define pio2_2 0x80 +#define pio2_3 0x88 +#define pio2_3t 0x90 +#define f30val 0x98 +#define mask 0xa0 +#define thresh 0xa8 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define n2 STACK_BIAS-0x24 +#define n1 STACK_BIAS-0x28 +#define n0 STACK_BIAS-0x2c +#define x2_1 STACK_BIAS-0x40 +#define x1_1 STACK_BIAS-0x50 +#define x0_1 STACK_BIAS-0x60 +#define y2_0 STACK_BIAS-0x70 +#define y1_0 STACK_BIAS-0x80 +#define y0_0 STACK_BIAS-0x90 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x90 + +!-------------------------------------------------------------------- +! define pipes for easier reading + +#define P0_f0 %f0 +#define P0_f1 %f1 +#define P0_f2 %f2 +#define P0_f3 %f3 +#define P0_f4 %f4 +#define P0_f5 %f5 +#define P0_f6 %f6 +#define P0_f7 %f7 +#define P0_f8 %f8 +#define P0_f9 %f9 + +#define P1_f10 %f10 +#define P1_f11 %f11 +#define P1_f12 %f12 +#define P1_f13 %f13 +#define P1_f14 %f14 +#define P1_f15 %f15 +#define P1_f16 %f16 +#define P1_f17 %f17 +#define P1_f18 %f18 +#define P1_f19 %f19 + +#define P2_f20 %f20 +#define P2_f21 %f21 +#define P2_f22 %f22 +#define P2_f23 %f23 +#define P2_f24 %f24 +#define P2_f25 %f25 +#define P2_f26 %f26 +#define P2_f27 %f27 +#define P2_f28 %f28 +#define P2_f29 %f29 + +! define __vlibm_TBL_sincos_hi & lo for easy reading + +#define SC_HI %l3 +#define SC_LO %l4 + +! define constants for easy reading + +#define C_q1 %f46 +#define C_q2 %f48 +#define C_q3 %f50 +#define C_q4 %f52 + +! one ( 1 ) uno eins echi un +#define C_ONE %f54 +#define C_ONE_LO %f55 + +! masks +#define MSK_SIGN %i5 +#define MSK_BIT31 %f30 +#define MSK_BIT13 %f31 +#define MSK_BITSHI17 %f44 + + +! constants for pp and qq +#define C_pp1 %f56 +#define C_pp2 %f58 +#define C_qq1 %f60 +#define C_qq2 %f62 + +! sign mask +#define C_signM %i5 + +#define LIM_l5 %l5 +#define LIM_l6 %l6 +! when in pri range, using value as transition from poly to table. +! for Medium range,change use of %l6 and use to keep track of biguns. +#define LIM_l7 %l7 + +!-------------------------------------------------------------------- + + + ENTRY(__vcos) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(g5) + PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) + PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) + PIC_SET(g5,constants,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + +! ========== primary range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 0x3fc40000 +! l6 0x3e400000 +! l7 0x3fe921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 oy0 +! o4 oy1 +! o5 oy2 +! o7 scratch + +! f0 x0 +! f2 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 +! f42 +! f44 0xffff800000000000 +! f46 p1 +! f48 p2 +! f50 p3 +! f52 p4 +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + sethi %hi(0x80000000),MSK_SIGN ! load/set up constants + sethi %hi(0x3fc40000),LIM_l5 + sethi %hi(0x3e400000),LIM_l6 + sethi %hi(0x3fe921fb),LIM_l7 + or LIM_l7,%lo(0x3fe921fb),LIM_l7 + ldd [%g1+f30val],MSK_BIT31 + ldd [%g1+mask],MSK_BITSHI17 + ldd [%g1+q1],C_q1 + ldd [%g1+q2],C_q2 + ldd [%g1+q3],C_q3 + ldd [%g1+q4],C_q4 + ldd [%g1+one],C_ONE + ldd [%g1+pp1],C_pp1 + ldd [%g1+pp2],C_pp2 + ldd [%g1+qq1],C_qq1 + ldd [%g1+qq2],C_qq2 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,x0_1,%o3 ! precondition loop + add %fp,x0_1,%o4 + add %fp,x0_1,%o5 + ld [%i1],%l0 ! hx = *x + ld [%i1],P0_f0 + ld [%i1+4],P0_f1 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + add %i1,%i2,%i1 ! x += stridex + + ba,pt %icc,.loop0 +!delay slot + nop + + .align 32 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,LIM_l6,%g1 + sub LIM_l7,%l0,%o7 + fands P0_f0,MSK_BIT31,P0_f9 ! save signbit + + lda [%i1]%asi,P1_f10 + orcc %o7,%g1,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,P1_f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 + +! delay slot + andn %l1,MSK_SIGN,%l1 + add %i1,%i2,%i1 ! x += stridex + fabsd P0_f0,P0_f0 + fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,LIM_l6,%g1 + sub LIM_l7,%l1,%o7 + fands P1_f10,MSK_BIT31,P1_f19 ! save signbit + + lda [%i1]%asi,P2_f20 + orcc %o7,%g1,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,P2_f21 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 + +! delay slot + andn %l2,MSK_SIGN,%l2 + add %i1,%i2,%i1 ! x += stridex + fabsd P1_f10,P1_f10 + fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only + +.loop2: + st P0_f6,[%o3] + sub %l2,LIM_l6,%g1 + sub LIM_l7,%l2,%o7 + fands P2_f20,MSK_BIT31,P2_f29 ! save signbit + + st P0_f7,[%o3+4] + orcc %g1,%o7,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + add %i3,%i4,%i3 ! y += stridey + cmp %l0,LIM_l5 + fabsd P2_f20,P2_f20 + bl,pn %icc,.case4 + +! delay slot + st P1_f16,[%o4] + cmp %l1,LIM_l5 + fpadd32s P0_f0,MSK_BIT13,P0_f8 + bl,pn %icc,.case2 + +! delay slot + st P1_f17,[%o4+4] + cmp %l2,LIM_l5 + fpadd32s P1_f10,MSK_BIT13,P1_f18 + bl,pn %icc,.case1 + +! delay slot + st P2_f26,[%o5] + mov %o0,%o3 + sethi %hi(0x3fc3c000),%o7 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + + st P2_f27,[%o5+4] + fand P0_f8,MSK_BITSHI17,P0_f2 + mov %o1,%o4 + + fand P1_f18,MSK_BITSHI17,P1_f12 + mov %o2,%o5 + sub %l0,%o7,%l0 + + fand P2_f28,MSK_BITSHI17,P2_f22 + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + add SC_HI,8,%g1;add SC_LO,8,%o7 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P2_f24,%f40,P2_f24 + lda [%i1+4]%asi,P0_f1 + + fmuld P0_f6,%f34,P0_f6 + add %i1,%i2,%i1 ! x += stridex + + fmuld P1_f16,%f38,P1_f16 + + fmuld P2_f26,%f42,P2_f26 + + fsubd P0_f6,P0_f4,P0_f6 + + fsubd P1_f16,P1_f14,P1_f16 + + fsubd P2_f26,P2_f24,P2_f26 + + fsubd P0_f2,P0_f6,P0_f6 + + fsubd P1_f12,P1_f16,P1_f16 + + fsubd P2_f22,P2_f26,P2_f26 + + faddd P0_f6,%f32,P0_f6 + + faddd P1_f16,%f36,P1_f16 + + faddd P2_f26,%f40,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case1: + st P2_f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + fand P0_f8,MSK_BITSHI17,P0_f2 + + sub %l0,%o7,%l0 + sub %l1,%o7,%l1 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fand P1_f18,MSK_BITSHI17,P1_f12 + fmuld P2_f20,P2_f20,P2_f22 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmuld P2_f22,C_q4,P2_f24 + mov %o2,%o5 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P0_f6,%f34,P0_f6 + lda [%i1+4]%asi,P0_f1 + + fmuld P1_f16,%f38,P1_f16 + add %i1,%i2,%i1 ! x += stridex + + fmuld P2_f22,P2_f24,P2_f24 + + fsubd P0_f6,P0_f4,P0_f6 + + fsubd P1_f16,P1_f14,P1_f16 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + fsubd P0_f2,P0_f6,P0_f6 + + fsubd P1_f12,P1_f16,P1_f16 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + + faddd P0_f6,%f32,P0_f6 + + faddd P1_f16,%f36,P1_f16 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case2: + st P2_f26,[%o5] + cmp %l2,LIM_l5 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + bl,pn %icc,.case3 + +! delay slot + st P2_f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + fand P0_f8,MSK_BITSHI17,P0_f2 + + sub %l0,%o7,%l0 + sub %l2,%o7,%l2 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fand P2_f28,MSK_BITSHI17,P2_f22 + fmuld P1_f10,P1_f10,P1_f12 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmuld P1_f12,C_q4,P1_f14 + mov %o1,%o4 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + faddd P1_f14,C_q3,P1_f14 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + faddd P1_f14,C_q2,P1_f14 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + faddd P1_f14,C_q1,P1_f14 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P2_f24,%f40,P2_f24 + lda [%i1]%asi,P0_f0 + + fmuld P0_f6,%f34,P0_f6 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f26,%f42,P2_f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld P1_f12,P1_f14,P1_f14 + + fsubd P0_f6,P0_f4,P0_f6 + + fsubd P2_f26,P2_f24,P2_f26 + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + fsubd P0_f2,P0_f6,P0_f6 + + fsubd P2_f22,P2_f26,P2_f26 + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + faddd P0_f6,%f32,P0_f6 + + faddd P2_f26,%f40,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case3: + sethi %hi(0x3fc3c000),%o7 + fand P0_f8,MSK_BITSHI17,P0_f2 + fmuld P1_f10,P1_f10,P1_f12 + + sub %l0,%o7,%l0 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fmuld P2_f20,P2_f20,P2_f22 + + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fmuld P1_f12,C_q4,P1_f14 + mov %o1,%o4 + + fmuld P2_f22,C_q4,P2_f24 + mov %o2,%o5 + + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + + faddd P1_f14,C_q3,P1_f14 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P0_f2,C_pp2,P0_f6 + ldd [%g1+%l0],%f32 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_pp1,P0_f6 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + + faddd P1_f14,C_q2,P1_f14 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P0_f2,P0_f6,P0_f6 + faddd P0_f4,C_qq1,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f6,C_ONE,P0_f6 + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f14,C_q1,P1_f14 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P0_f0,P0_f6,P0_f6 + ldd [%o7+%l0],P0_f2 + + fmuld P0_f4,%f32,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f12,P1_f14,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P0_f6,%f34,P0_f6 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f22,P2_f24,P2_f24 + add %i1,%i2,%i1 ! x += stridex + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + fsubd P0_f6,P0_f4,P0_f6 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + fsubd P0_f2,P0_f6,P0_f6 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + faddd P0_f6,%f32,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case4: + st P1_f17,[%o4+4] + cmp %l1,LIM_l5 + fpadd32s P1_f10,MSK_BIT13,P1_f18 + bl,pn %icc,.case6 + +! delay slot + st P2_f26,[%o5] + cmp %l2,LIM_l5 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + bl,pn %icc,.case5 + +! delay slot + st P2_f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + fand P1_f18,MSK_BITSHI17,P1_f12 + + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fand P2_f28,MSK_BITSHI17,P2_f22 + fmuld P0_f0,P0_f0,P0_f2 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd P0_f0,P0_f6 !ID for processing + fmuld P0_f2,C_q4,P0_f4 + mov %o0,%o3 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + faddd P0_f4,C_q3,P0_f4 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + faddd P0_f4,C_q2,P0_f4 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q1,P0_f4 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P2_f24,%f40,P2_f24 + lda [%i1]%asi,P0_f0 + + fmuld P1_f16,%f38,P1_f16 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f26,%f42,P2_f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld P0_f2,P0_f4,P0_f4 + + fsubd P1_f16,P1_f14,P1_f16 + + fsubd P2_f26,P2_f24,P2_f26 + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + fsubd P1_f12,P1_f16,P1_f16 + + fsubd P2_f22,P2_f26,P2_f26 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + faddd P1_f16,%f36,P1_f16 + + faddd P2_f26,%f40,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case5: + sethi %hi(0x3fc3c000),%o7 + fand P1_f18,MSK_BITSHI17,P1_f12 + fmuld P0_f0,P0_f0,P0_f2 + + sub %l1,%o7,%l1 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fmuld P2_f20,P2_f20,P2_f22 + + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmovd P0_f0,P0_f6 !ID for processing + fmuld P0_f2,C_q4,P0_f4 + mov %o0,%o3 + + fmuld P2_f22,C_q4,P2_f24 + mov %o2,%o5 + + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + + faddd P0_f4,C_q3,P0_f4 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P1_f12,C_pp2,P1_f16 + ldd [%g1+%l1],%f36 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P1_f16,C_pp1,P1_f16 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + + faddd P0_f4,C_q2,P0_f4 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P1_f12,P1_f16,P1_f16 + faddd P1_f14,C_qq1,P1_f14 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P1_f16,C_ONE,P1_f16 + fmuld P1_f12,P1_f14,P1_f14 + + faddd P0_f4,C_q1,P0_f4 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P1_f10,P1_f16,P1_f16 + ldd [%o7+%l1],P1_f12 + + fmuld P1_f14,%f36,P1_f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P0_f2,P0_f4,P0_f4 + lda [%i1]%asi,P0_f0 + + fmuld P1_f16,%f38,P1_f16 + lda [%i1+4]%asi,P0_f1 + + fmuld P2_f22,P2_f24,P2_f24 + add %i1,%i2,%i1 ! x += stridex + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + fsubd P1_f16,P1_f14,P1_f16 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + fsubd P1_f12,P1_f16,P1_f16 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + faddd P1_f16,%f36,P1_f16 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case6: + st P2_f27,[%o5+4] + cmp %l2,LIM_l5 + fpadd32s P2_f20,MSK_BIT13,P2_f28 + bl,pn %icc,.case7 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + fand P2_f28,MSK_BITSHI17,P2_f22 + fmuld P0_f0,P0_f0,P0_f2 + + sub %l2,%o7,%l2 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fmuld P1_f10,P1_f10,P1_f12 + + fsubd P2_f20,P2_f22,P2_f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd P0_f0,P0_f6 !ID for processing + fmuld P0_f2,C_q4,P0_f4 + mov %o0,%o3 + + fmuld P1_f12,C_q4,P1_f14 + mov %o1,%o4 + + fmuld P2_f20,P2_f20,P2_f22 + andn %l2,0x1f,%l2 + + faddd P0_f4,C_q3,P0_f4 + + faddd P1_f14,C_q3,P1_f14 + + fmuld P2_f22,C_pp2,P2_f26 + ldd [%g1+%l2],%f40 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_pp1,P2_f26 + fmuld P2_f22,C_qq2,P2_f24 + ldd [SC_HI+%l2],%f42 + + faddd P0_f4,C_q2,P0_f4 + + faddd P1_f14,C_q2,P1_f14 + + fmuld P2_f22,P2_f26,P2_f26 + faddd P2_f24,C_qq1,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + faddd P2_f26,C_ONE,P2_f26 + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q1,P0_f4 + + faddd P1_f14,C_q1,P1_f14 + + fmuld P2_f20,P2_f26,P2_f26 + ldd [%o7+%l2],P2_f22 + + fmuld P2_f24,%f40,P2_f24 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P0_f2,P0_f4,P0_f4 + lda [%i1]%asi,P0_f0 + + fmuld P2_f26,%f42,P2_f26 + lda [%i1+4]%asi,P0_f1 + + fmuld P1_f12,P1_f14,P1_f14 + add %i1,%i2,%i1 ! x += stridex + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + fsubd P2_f26,P2_f24,P2_f26 + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + fsubd P2_f22,P2_f26,P2_f26 + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + faddd P2_f26,%f40,P2_f26 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case7: + fmuld P0_f0,P0_f0,P0_f2 + fmovd P0_f0,P0_f6 !ID for processing + mov %o0,%o3 + + fmuld P1_f10,P1_f10,P1_f12 + mov %o1,%o4 + + fmuld P2_f20,P2_f20,P2_f22 + mov %o2,%o5 + + fmuld P0_f2,C_q4,P0_f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld P1_f12,C_q4,P1_f14 + lda [%i1]%asi,P0_f0 + + fmuld P2_f22,C_q4,P2_f24 + lda [%i1+4]%asi,P0_f1 + + faddd P0_f4,C_q3,P0_f4 + add %i1,%i2,%i1 ! x += stridex + + faddd P1_f14,C_q3,P1_f14 + + faddd P2_f24,C_q3,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q2,P0_f4 + + faddd P1_f14,C_q2,P1_f14 + + faddd P2_f24,C_q2,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + faddd P0_f4,C_q1,P0_f4 + + faddd P1_f14,C_q1,P1_f14 + + faddd P2_f24,C_q1,P2_f24 + + fmuld P0_f2,P0_f4,P0_f4 + + fmuld P1_f12,P1_f14,P1_f14 + + fmuld P2_f22,P2_f24,P2_f24 + + !!(vsin)fmuld P0_f6,P0_f4,P0_f4 + + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + + !!(vsin)fmuld P2_f20,P2_f24,P2_f24 + + faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing + + faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 + + faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 + andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 + + nop !!(vsin) fors P0_f6,P0_f9,P0_f6 + addcc %i0,-1,%i0 + + nop !!(vsin) fors P1_f16,P1_f19,P1_f16 + bg,pt %icc,.loop0 + +! delay slot + nop !!(vsin) fors P2_f26,P2_f29,P2_f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + + .align 32 +.endloop2: + cmp %l1,LIM_l5 + bl,pn %icc,1f +! delay slot + fabsd P1_f10,P1_f10 + sethi %hi(0x3fc3c000),%o7 + fpadd32s P1_f10,MSK_BIT13,P1_f18 + fand P1_f18,MSK_BITSHI17,P1_f12 + sub %l1,%o7,%l1 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fsubd P1_f10,P1_f12,P1_f10 + srl %l1,10,%l1 + fmuld P1_f10,P1_f10,P1_f12 + andn %l1,0x1f,%l1 + fmuld P1_f12,C_pp2,P2_f20 + ldd [%g1+%l1],%f36 + faddd P2_f20,C_pp1,P2_f20 + fmuld P1_f12,C_qq2,P1_f14 + ldd [SC_HI+%l1],%f38 + fmuld P1_f12,P2_f20,P2_f20 + faddd P1_f14,C_qq1,P1_f14 + faddd P2_f20,C_ONE,P2_f20 + fmuld P1_f12,P1_f14,P1_f14 + fmuld P1_f10,P2_f20,P2_f20 + ldd [%o7+%l1],P1_f12 + fmuld P1_f14,%f36,P1_f14 + fmuld P2_f20,%f38,P2_f20 + fsubd P2_f20,P1_f14,P2_f20 + fsubd P1_f12,P2_f20,P2_f20 + ba,pt %icc,2f +! delay slot + faddd P2_f20,%f36,P2_f20 +1: + fmuld P1_f10,P1_f10,P1_f12 + fmuld P1_f12,C_q4,P1_f14 + faddd P1_f14,C_q3,P1_f14 + fmuld P1_f12,P1_f14,P1_f14 + faddd P1_f14,C_q2,P1_f14 + fmuld P1_f12,P1_f14,P1_f14 + faddd P1_f14,C_q1,P1_f14 + fmuld P1_f12,P1_f14,P1_f14 + !!(vsin)fmuld P1_f10,P1_f14,P1_f14 + faddd C_ONE,P1_f14,P2_f20 !!(vsin)faddd P1_f10,P1_f14,P2_f20 +2: + nop !!(vsin) fors P2_f20,P1_f19,P2_f20 + st P2_f20,[%o1] + st P2_f21,[%o1+4] + +.endloop1: + cmp %l0,LIM_l5 + bl,pn %icc,1f +! delay slot + fabsd P0_f0,P0_f0 + sethi %hi(0x3fc3c000),%o7 + fpadd32s P0_f0,MSK_BIT13,P0_f8 + fand P0_f8,MSK_BITSHI17,P0_f2 + sub %l0,%o7,%l0 + add SC_HI,8,%g1;add SC_LO,8,%o7 + fsubd P0_f0,P0_f2,P0_f0 + srl %l0,10,%l0 + fmuld P0_f0,P0_f0,P0_f2 + andn %l0,0x1f,%l0 + fmuld P0_f2,C_pp2,P2_f20 + ldd [%g1+%l0],%f32 + faddd P2_f20,C_pp1,P2_f20 + fmuld P0_f2,C_qq2,P0_f4 + ldd [SC_HI+%l0],%f34 + fmuld P0_f2,P2_f20,P2_f20 + faddd P0_f4,C_qq1,P0_f4 + faddd P2_f20,C_ONE,P2_f20 + fmuld P0_f2,P0_f4,P0_f4 + fmuld P0_f0,P2_f20,P2_f20 + ldd [%o7+%l0],P0_f2 + fmuld P0_f4,%f32,P0_f4 + fmuld P2_f20,%f34,P2_f20 + fsubd P2_f20,P0_f4,P2_f20 + fsubd P0_f2,P2_f20,P2_f20 + ba,pt %icc,2f +! delay slot + faddd P2_f20,%f32,P2_f20 +1: + fmuld P0_f0,P0_f0,P0_f2 + fmuld P0_f2,C_q4,P0_f4 + faddd P0_f4,C_q3,P0_f4 + fmuld P0_f2,P0_f4,P0_f4 + faddd P0_f4,C_q2,P0_f4 + fmuld P0_f2,P0_f4,P0_f4 + faddd P0_f4,C_q1,P0_f4 + fmuld P0_f2,P0_f4,P0_f4 + !!(vsin)fmuld P0_f0,P0_f4,P0_f4 + faddd C_ONE,P0_f4,P2_f20 !!(vsin)faddd P0_f0,P0_f4,P2_f20 +2: + nop !!(vsin) fors P2_f20,P0_f9,P2_f20 + st P2_f20,[%o0] + st P2_f21,[%o0+4] + +.endloop0: + st P0_f6,[%o3] + st P0_f7,[%o3+4] + st P1_f16,[%o4] + st P1_f17,[%o4+4] + st P2_f26,[%o5] + st P2_f27,[%o5+4] + +! return. finished off with only primary range arguments + + ret + restore + + + .align 32 +.range0: + cmp %l0,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg. +! delay slot, annulled if branch not taken + mov 0x1,LIM_l6 ! set biguns flag or + fdtoi P0_f0,P0_f2; fmovd C_ONE,P0_f0 ; st P0_f0,[%o0] ! *y = *x with inexact if x nonzero + st P0_f1,[%o0+4] + !nop ! (vsin) fdtoi P0_f0,P0_f2 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,MSK_SIGN,%l0 ! hx &= ~0x80000000 + fmovd P1_f10,P0_f0 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range1: + cmp %l1,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg. +! delay slot, annulled if branch not taken + mov 0x2,LIM_l6 ! set biguns flag or + fdtoi P1_f10,P1_f12; fmovd C_ONE,P1_f10 ; st P1_f10,[%o1] ! *y = *x with inexact if x nonzero + st P1_f11,[%o1+4] + !nop ! (vsin) fdtoi P1_f10,P1_f12 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,MSK_SIGN,%l1 ! hx &= ~0x80000000 + fmovd P2_f20,P1_f10 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range2: + cmp %l2,LIM_l6 + bg,a,pt %icc,.MEDIUM ! brance to Medium range on big arg. +! delay slot, annulled if branch not taken + mov 0x3,LIM_l6 ! set biguns flag or + fdtoi P2_f20,P2_f22; fmovd C_ONE,P2_f20 ; st P2_f20,[%o2] ! *y = *x with inexact if x nonzero + st P2_f21,[%o2+4] + nop ! (vsin) fdtoi P2_f20,P2_f22 +1: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],P2_f20 + ld [%i1+4],P2_f21 + andn %l2,MSK_SIGN,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.MEDIUM: + +! ========== medium range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 constants +! l6 biguns stored here : still called LIM_l6 +! l7 0x413921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 n0 +! o4 n1 +! o5 n2 +! o7 scratch + +! f0 x0 +! f2 n0,y0 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 n1,y1 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 n2,y2 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 invpio2 +! f42 round +! f44 0xffff800000000000 +! f46 pio2_1 +! f48 pio2_2 +! f50 pio2_3 +! f52 pio2_3t +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + + + PIC_SET(g5,constants,l5) + + ! %o3,%o4,%o5 need to be stored + st P0_f6,[%o3] + sethi %hi(0x413921fb),%l7 + st P0_f7,[%o3+4] + or %l7,%lo(0x413921fb),%l7 + st P1_f16,[%o4] + st P1_f17,[%o4+4] + st P2_f26,[%o5] + st P2_f27,[%o5+4] + ldd [%l5+invpio2],%f40 + ldd [%l5+round],%f42 + ldd [%l5+pio2_1],%f46 + ldd [%l5+pio2_2],%f48 + ldd [%l5+pio2_3],%f50 + ldd [%l5+pio2_3t],%f52 + std %f54,[%fp+x0_1+8] ! set up stack data + std %f54,[%fp+x1_1+8] + std %f54,[%fp+x2_1+8] + stx %g0,[%fp+y0_0+8] + stx %g0,[%fp+y1_0+8] + stx %g0,[%fp+y2_0+8] + +! branched here in the middle of the array. Need to adjust +! for the members of the triple that were selected in the primary +! loop. + +! no adjustment since all three selected here + subcc LIM_l6,0x1,%g0 ! continue in LOOP0? + bz,a %icc,.LOOP0 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st triple since 2d and 3d done here + subcc LIM_l6,0x2,%g0 ! continue in LOOP1? + fmuld %f0,%f40,%f2 ! adj LOOP0 + bz,a %icc,.LOOP1 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st and 2d triple since 3d done here + subcc LIM_l6,0x3,%g0 ! continue in LOOP2? + !done fmuld %f0,%f40,%f2 ! adj LOOP0 + sub %i3,%i4,%i3 ! adjust to not double increment + fmuld %f10,%f40,%f12 ! adj LOOP1 + faddd %f2,%f42,%f2 ! adj LOOP1 + bz,a %icc,.LOOP2 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + + ba .LOOP0 + nop + +! -- 16 byte aligned + + .align 32 +.LOOP0: + lda [%i1]%asi,%l1 ! preload next argument + mov %i3,%o0 ! py0 = y + + lda [%i1]%asi,%f10 + cmp %l0,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG0 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP1 + +! delay slot + andn %l1,%i5,%l1 + nop + fmuld %f0,%f40,%f2 + fabsd %f54,%f54 ! a nop for alignment only + +.LOOP1: + lda [%i1]%asi,%l2 ! preload next argument + mov %i3,%o1 ! py1 = y + + lda [%i1]%asi,%f20 + cmp %l1,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG1 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP2 + +! delay slot + andn %l2,%i5,%l2 + nop + fmuld %f10,%f40,%f12 + faddd %f2,%f42,%f2 + +.LOOP2: + st %f3,[%fp+n0] + mov %i3,%o2 ! py2 = y + + cmp %l2,%l7 + add %i3,%i4,%i3 ! y += stridey + fmuld %f20,%f40,%f22 + bg,pn %icc,.BIG2 ! if hx > 0x413921fb + +! delay slot + add %l5,thresh+4,%o7 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + +! - + + add %l5,thresh,%g1 + faddd %f22,%f42,%f22 + st %f23,[%fp+n2] + + fsubd %f2,%f42,%f2 ! n + + fsubd %f12,%f42,%f12 ! n + + fsubd %f22,%f42,%f22 ! n + + fmuld %f2,%f46,%f4 + + fmuld %f12,%f46,%f14 + + fmuld %f22,%f46,%f24 + + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + + fsubd %f20,%f24,%f24 + fmuld %f22,%f48,%f26 + + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 ; add %o3,1,%o3 + + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 ; add %o4,1,%o4 + + fsubd %f24,%f26,%f20 + ld [%fp+n2],%o5 ; add %o5,1,%o5 + + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + + fsubd %f24,%f20,%f36 + and %o5,1,%o5 + + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + + fsubd %f36,%f26,%f36 + fmuld %f22,%f50,%f28 + sll %o5,3,%o5 + + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + + fsubd %f28,%f36,%f28 + ld [%g1+%o5],%f26 + + fsubd %f0,%f8,%f4 + + fsubd %f10,%f18,%f14 + + fsubd %f20,%f28,%f24 + + fsubd %f0,%f4,%f32 + + fsubd %f10,%f14,%f34 + + fsubd %f20,%f24,%f36 + + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + + fsubd %f36,%f28,%f36 + fmuld %f22,%f52,%f22 + + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + + fsubd %f22,%f36,%f22 + ld [%o7+%o5],%f28 + + fsubd %f4,%f2,%f0 ! x + + fsubd %f14,%f12,%f10 ! x + + fsubd %f24,%f22,%f20 ! x + + fsubd %f4,%f0,%f4 + + fsubd %f14,%f10,%f14 + + fsubd %f24,%f20,%f24 + + fands %f0,%f30,%f9 ! save signbit + + fands %f10,%f30,%f19 ! save signbit + + fands %f20,%f30,%f29 ! save signbit + + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + + fabsd %f20,%f20 + std %f20,[%fp+x2_1] + + fsubd %f4,%f2,%f2 ! y + + fsubd %f14,%f12,%f12 ! y + + fsubd %f24,%f22,%f22 ! y + + fcmpgt32 %f6,%f0,%l0 + + fcmpgt32 %f16,%f10,%l1 + + fcmpgt32 %f26,%f20,%l2 + +! -- 16 byte aligned + fxors %f2,%f9,%f2 + + fxors %f12,%f19,%f12 + + fxors %f22,%f29,%f22 + + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,.CASE4 + +! delay slot + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,.CASE2 + +! delay slot + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + andcc %l2,2,%g0 + bne,pn %icc,.CASE1 + +! delay slot + fpadd32s %f0,%f31,%f8 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fpadd32s %f10,%f31,%f18 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + fmuld %f0,%f6,%f6 + ldd [%g1+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%g1+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%g1+%l2],%f22 + + fmuld %f4,%f32,%f4 + ldd [%l4+%l0],%f0 + + fmuld %f14,%f34,%f14 + ldd [%l4+%l1],%f10 + + fmuld %f24,%f36,%f24 + ldd [%l4+%l2],%f20 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + + faddd %f26,%f36,%f26 + +.FIXSIGN: + ld [%fp+n0],%o3 ; add %o3,1,%o3 + add %l5,thresh-4,%g1 + + ld [%fp+n1],%o4 ; add %o4,1,%o4 + + ld [%fp+n2],%o5 ; add %o5,1,%o5 + and %o3,2,%o3 + + sll %o3,2,%o3 + and %o4,2,%o4 + lda [%i1]%asi,%l0 ! preload next argument + + sll %o4,2,%o4 + and %o5,2,%o5 + ld [%g1+%o3],%f8 + + sll %o5,2,%o5 + ld [%g1+%o4],%f18 + + ld [%g1+%o5],%f28 + fxors %f9,%f8,%f9 + + lda [%i1]%asi,%f0 + fxors %f29,%f28,%f29 + + lda [%i1+4]%asi,%f1 + fxors %f19,%f18,%f19 + + fors %f6,%f9,%f6 ! tack on sign + add %i1,%i2,%i1 ! x += stridex + st %f6,[%o0] + + fors %f26,%f29,%f26 ! tack on sign + st %f7,[%o0+4] + + fors %f16,%f19,%f16 ! tack on sign + st %f26,[%o2] + + st %f27,[%o2+4] + addcc %i0,-1,%i0 + + st %f16,[%o1] + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + bg,pt %icc,.LOOP0 + +! delay slot + st %f17,[%o1+4] + + ba,pt %icc,.ENDLOOP0 +! delay slot + nop + + .align 32 +.CASE1: + fpadd32s %f10,%f31,%f18 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fand %f8,%f44,%f4 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fand %f18,%f44,%f14 + sub %l0,%o7,%l0 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + sub %l1,%o7,%l1 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f4,%f32,%f4 + std %f22,[%fp+y2_0] + + fmuld %f14,%f34,%f14 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f24,%f22,%f24 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + .align 32 +.CASE2: + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + andcc %l2,2,%g0 + bne,pn %icc,.CASE3 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + add %l3,8,%g1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f4,%f32,%f4 + std %f12,[%fp+y1_0] + + fmuld %f24,%f36,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f26,%f22,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f24,%f26 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f6,%f0,%f6 + + faddd %f26,%f20,%f26 + + faddd %f14,%f12,%f14 + + faddd %f6,%f32,%f6 + + faddd %f26,%f36,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f34,%f14,%f16 + + .align 32 +.CASE3: + fand %f8,%f44,%f4 + add %l3,8,%g1 + sub %l0,%o7,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f14,%f16,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f24,%f26,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f4,%f32,%f4 + + fmuld %f20,%f24,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f6,%f0,%f6 + + faddd %f34,%f14,%f16 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f6,%f32,%f6 + + .align 32 +.CASE4: + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + sethi %hi(0x3fc3c000),%o7 + andcc %l1,2,%g0 + bne,pn %icc,.CASE6 + +! delay slot + andcc %l2,2,%g0 + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + bne,pn %icc,.CASE5 + +! delay slot + add %l3,8,%g1 + ld [%fp+x2_1],%l2 + fpadd32s %f20,%f31,%f28 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f0,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f4,%f6,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f14,%f34,%f14 + std %f2,[%fp+y0_0] + + fmuld %f24,%f36,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE5: + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f14,%f34,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f16,%f12,%f16 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f16,%f14,%f16 + + faddd %f4,%f2,%f4 + + faddd %f24,%f22,%f24 + + faddd %f16,%f10,%f16 + + faddd %f32,%f4,%f6 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f16,%f34,%f16 + + .align 32 +.CASE6: + ld [%fp+x2_1],%l2 + add %l3,8,%g1 + bne,pn %icc,.CASE7 +! delay slot + fpadd32s %f20,%f31,%f28 + + fand %f28,%f44,%f24 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f0,%f0,%f0 + sub %l2,%o7,%l2 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + faddd %f4,%f6,%f4 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f4,%f4 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f24,%f36,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE7: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f14,%f16,%f14 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f14,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + + .align 32 +.ENDLOOP2: + fmuld %f10,%f40,%f12 + add %l5,thresh,%g1 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + fsubd %f12,%f42,%f12 ! n + fmuld %f12,%f46,%f14 + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 ; add %o4,1,%o4 + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + fsubd %f10,%f18,%f14 + fsubd %f10,%f14,%f34 + add %l5,thresh+4,%o7 + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + fsubd %f14,%f12,%f10 ! x + fsubd %f14,%f10,%f14 + fands %f10,%f30,%f19 ! save signbit + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + fsubd %f14,%f12,%f12 ! y + fcmpgt32 %f16,%f10,%l1 + fxors %f12,%f19,%f12 + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + fand %f18,%f44,%f14 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f10,%f14,%f10 + sub %l1,%o7,%l1 + srl %l1,10,%l1 + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + fmuld %f14,%f34,%f14 + fmuld %f16,%f12,%f16 + faddd %f16,%f14,%f16 + faddd %f16,%f10,%f16 + ba,pt %icc,2f + faddd %f16,%f34,%f16 +1: + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + faddd %f14,%f16,%f14 + fmuld %f10,%f14,%f14 + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + faddd %f14,%f12,%f14 + faddd %f34,%f14,%f16 +2: + add %l5,thresh-4,%g1 + ld [%fp+n1],%o4 ; add %o4,1,%o4 + and %o4,2,%o4 + sll %o4,2,%o4 + ld [%g1+%o4],%f18 + fxors %f19,%f18,%f19 + fors %f16,%f19,%f16 ! tack on sign + st %f16,[%o1] + st %f17,[%o1+4] + +.ENDLOOP1: + fmuld %f0,%f40,%f2 + add %l5,thresh,%g1 + faddd %f2,%f42,%f2 + st %f3,[%fp+n0] + fsubd %f2,%f42,%f2 ! n + fmuld %f2,%f46,%f4 + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 ; add %o3,1,%o3 + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + fsubd %f0,%f8,%f4 + fsubd %f0,%f4,%f32 + add %l5,thresh+4,%o7 + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + fsubd %f4,%f2,%f0 ! x + fsubd %f4,%f0,%f4 + fands %f0,%f30,%f9 ! save signbit + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + fsubd %f4,%f2,%f2 ! y + fcmpgt32 %f6,%f0,%l0 + fxors %f2,%f9,%f2 + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + fand %f8,%f44,%f4 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f0,%f4,%f0 + sub %l0,%o7,%l0 + srl %l0,10,%l0 + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + fmuld %f4,%f32,%f4 + fmuld %f6,%f2,%f6 + faddd %f6,%f4,%f6 + faddd %f6,%f0,%f6 + ba,pt %icc,2f + faddd %f6,%f32,%f6 +1: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + faddd %f4,%f6,%f4 + fmuld %f0,%f4,%f4 + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + faddd %f4,%f2,%f4 + faddd %f32,%f4,%f6 +2: + add %l5,thresh-4,%g1 + ld [%fp+n0],%o3 ; add %o3,1,%o3 + and %o3,2,%o3 + sll %o3,2,%o3 + ld [%g1+%o3],%f8 + fxors %f9,%f8,%f9 + fors %f6,%f9,%f6 ! tack on sign + st %f6,[%o0] + st %f7,[%o0+4] + +.ENDLOOP0: + +! check for huge arguments remaining + + tst LIM_l6 + be,pt %icc,.exit +! delay slot + nop + +! ========== huge range (use C code) ========== + +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vcos_big + mov %l7,%o5 ! delay slot + +.exit: + ret + restore + + + .align 32 +.SKIP0: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f10,%f0 + ld [%i1+4],%f1 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f20,%f10 + ld [%i1+4],%f11 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP2: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG0: + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f0,%f0,%f0 ! y = x - x + st %f0,[%o0] + st %f1,[%o0+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovd %f10,%f0 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG1: + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f10,%f10,%f10 ! y = x - x + st %f10,[%o1] + st %f11,[%o1+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovd %f20,%f10 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG2: + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f20,%f20,%f20 ! y = x - x + st %f20,[%o2] + st %f21,[%o2+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vcos) + diff --git a/usr/src/libm/src/mvec/vis/__vcos_ultra3.S b/usr/src/libm/src/mvec/vis/__vcos_ultra3.S new file mode 100644 index 0000000..2809bd9 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vcos_ultra3.S @@ -0,0 +1,3424 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vcos_ultra3.S 1.8 06/01/23 SMI" + + .file "__vcos_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vcos + .type __vcos,#function + __vcos = __vcos_ultra3 +#endif + + RO_DATA + .align 64 +constants: + .word 0x42c80000,0x00000000 ! 3 * 2^44 + .word 0x43380000,0x00000000 ! 3 * 2^51 + .word 0x3fe45f30,0x6dc9c883 ! invpio2 + .word 0x3ff921fb,0x54442c00 ! pio2_1 + .word 0x3d318469,0x898cc400 ! pio2_2 + .word 0x3a71701b,0x839a2520 ! pio2_3 + .word 0xbfc55555,0x55555533 ! pp1 + .word 0x3f811111,0x10e7d53b ! pp2 + .word 0xbf2a0167,0xe6b3cf9b ! pp3 + .word 0xbfdfffff,0xffffff65 ! qq1 + .word 0x3fa55555,0x54f88ed0 ! qq2 + .word 0xbf56c12c,0xdd185f60 ! qq3 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define nk3 STACK_BIAS-0x24 +#define nk2 STACK_BIAS-0x28 +#define nk1 STACK_BIAS-0x2c +#define nk0 STACK_BIAS-0x30 +#define junk STACK_BIAS-0x38 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 hx3 +! l4 k0 +! l5 k1 +! l6 k2 +! l7 k3 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_sincos2 +! g5 scratch + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 0x3e400000 +! o5 0x3fe921fb,0x4099251e +! o7 scratch + +! f0 hx0 +! f2 +! f4 +! f6 +! f8 hx1 +! f10 +! f12 +! f14 +! f16 hx2 +! f18 +! f20 +! f22 +! f24 hx3 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 +! f38 + +#define c3two44 %f40 +#define c3two51 %f42 +#define invpio2 %f44 +#define pio2_1 %f46 +#define pio2_2 %f48 +#define pio2_3 %f50 +#define pp1 %f52 +#define pp2 %f54 +#define pp3 %f56 +#define qq1 %f58 +#define qq2 %f60 +#define qq3 %f62 + + ENTRY(__vcos_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_sincos2,o1) + mov %o1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + st %g0,[%fp+biguns] ! biguns = 0 + ldd [%o0+0x00],c3two44 ! load/set up constants + ldd [%o0+0x08],c3two51 + ldd [%o0+0x10],invpio2 + ldd [%o0+0x18],pio2_1 + ldd [%o0+0x20],pio2_2 + ldd [%o0+0x28],pio2_3 + ldd [%o0+0x30],pp1 + ldd [%o0+0x38],pp2 + ldd [%o0+0x40],pp3 + ldd [%o0+0x48],qq1 + ldd [%o0+0x50],qq2 + ldd [%o0+0x58],qq3 + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e400000),%o4 + sethi %hi(0x3fe921fb),%o5 + or %o5,%lo(0x3fe921fb),%o5 + sllx %o5,32,%o5 + sethi %hi(0x4099251e),%o7 + or %o7,%lo(0x4099251e),%o7 + or %o5,%o7,%o5 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,junk,%o1 ! loop prologue + add %fp,junk,%o2 + add %fp,junk,%o3 + ld [%i1],%l0 ! *x + ld [%i1],%f0 + ld [%i1+4],%f3 + andn %l0,%i5,%l0 ! mask off sign + add %i1,%i2,%i1 ! x += stridex + ba .loop0 + nop + +! 16-byte aligned + .align 16 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%o4,%g5 + sub %o5,%l0,%o7 + fabss %f0,%f2 + + lda [%i1]%asi,%f8 + orcc %o7,%g5,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%o4,%g5 + sub %o5,%l1,%o7 + fabss %f8,%f10 + + lda [%i1]%asi,%f16 + orcc %o7,%g5,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f19 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] + +.loop2: + lda [%i1]%asi,%l3 ! preload next argument + sub %l2,%o4,%g5 + sub %o5,%l2,%o7 + fabss %f16,%f18 + + lda [%i1]%asi,%f24 + orcc %o7,%g5,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f27 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last3 + +! delay slot + andn %l3,%i5,%l3 + add %i1,%i2,%i1 ! x += stridex + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + +.loop3: + sub %l3,%o4,%g5 + sub %o5,%l3,%o7 + fabss %f24,%f26 + st %f5,[%fp+nk0] + + orcc %o7,%g5,%g0 + mov %i3,%o3 ! py3 = y + bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e +! delay slot + st %f13,[%fp+nk1] + +!!! DONE? +.cont: + srlx %o5,32,%o7 + add %i3,%i4,%i3 ! y += stridey + fmovs %f3,%f1 + st %f21,[%fp+nk2] + + sub %o7,%l0,%l0 + sub %o7,%l1,%l1 + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + + sub %o7,%l2,%l2 + sub %o7,%l3,%l3 + fmovs %f11,%f9 + + or %l0,%l1,%l0 + or %l2,%l3,%l2 + fmovs %f19,%f17 + + fmovs %f27,%f25 + fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range + + fmuld %f8,invpio2,%f14 + ld [%fp+nk0],%l4 + + fmuld %f16,invpio2,%f22 + ld [%fp+nk1],%l5 + + orcc %l0,%l2,%g0 + bl,pn %icc,.medium +! delay slot + fmuld %f24,invpio2,%f30 + ld [%fp+nk2],%l6 + + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 + + sll %l5,5,%l5 + ldd [%l4+%g1],%f4 + fcmpd %fcc1,%f8,pio2_3 + + sll %l6,5,%l6 + ldd [%l5+%g1],%f12 + fcmpd %fcc2,%f16,pio2_3 + + sll %l7,5,%l7 + ldd [%l6+%g1],%f20 + fcmpd %fcc3,%f24,pio2_3 + + ldd [%l7+%g1],%f28 + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + + fsubd %f26,%f28,%f26 + + fmuld %f2,%f2,%f0 ! z = x * x + + fmuld %f10,%f10,%f8 + + fmuld %f18,%f18,%f16 + + fmuld %f26,%f26,%f24 + + fmuld %f0,qq3,%f6 + + fmuld %f8,qq3,%f14 + + fmuld %f16,qq3,%f22 + + fmuld %f24,qq3,%f30 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + ldd [%l4+8],%f2 + + fmuld %f34,%f14,%f14 + ldd [%l5+8],%f10 + + fmuld %f36,%f22,%f22 + ldd [%l6+8],%f18 + + fmuld %f38,%f30,%f30 + ldd [%l7+8],%f26 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fsubd %f6,%f4,%f6 + lda [%i1]%asi,%l0 ! preload next argument + + fsubd %f14,%f12,%f14 + lda [%i1]%asi,%f0 + + fsubd %f22,%f20,%f22 + lda [%i1+4]%asi,%f3 + + fsubd %f30,%f28,%f30 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + faddd %f6,%f32,%f6 + st %f6,[%o0] + + faddd %f14,%f34,%f14 + st %f14,[%o1] + + faddd %f22,%f36,%f22 + st %f22,[%o2] + + faddd %f30,%f38,%f30 + st %f30,[%o3] + addcc %i0,-1,%i0 + + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + faddd %f6,c3two51,%f4 + st %f5,[%fp+nk0] + + faddd %f14,c3two51,%f12 + st %f13,[%fp+nk1] + + faddd %f22,c3two51,%f20 + st %f21,[%fp+nk2] + + faddd %f30,c3two51,%f28 + st %f29,[%fp+nk3] + + fsubd %f4,c3two51,%f6 + + fsubd %f12,c3two51,%f14 + + fsubd %f20,c3two51,%f22 + + fsubd %f28,c3two51,%f30 + + fmuld %f6,pio2_1,%f2 + ld [%fp+nk0],%l0 ! n + + fmuld %f14,pio2_1,%f10 + ld [%fp+nk1],%l1 + + fmuld %f22,pio2_1,%f18 + ld [%fp+nk2],%l2 + + fmuld %f30,pio2_1,%f26 + ld [%fp+nk3],%l3 + + fsubd %f0,%f2,%f0 + fmuld %f6,pio2_2,%f4 + add %l0,1,%l0 + + fsubd %f8,%f10,%f8 + fmuld %f14,pio2_2,%f12 + add %l1,1,%l1 + + fsubd %f16,%f18,%f16 + fmuld %f22,pio2_2,%f20 + add %l2,1,%l2 + + fsubd %f24,%f26,%f24 + fmuld %f30,pio2_2,%f28 + add %l3,1,%l3 + + fsubd %f0,%f4,%f32 + + fsubd %f8,%f12,%f34 + + fsubd %f16,%f20,%f36 + + fsubd %f24,%f28,%f38 + + fsubd %f0,%f32,%f0 + fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 + + fsubd %f8,%f34,%f8 + fcmple32 %f34,pio2_3,%l5 + + fsubd %f16,%f36,%f16 + fcmple32 %f36,pio2_3,%l6 + + fsubd %f24,%f38,%f24 + fcmple32 %f38,pio2_3,%l7 + + fsubd %f0,%f4,%f0 + fmuld %f6,pio2_3,%f6 + sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 + + fsubd %f8,%f12,%f8 + fmuld %f14,pio2_3,%f14 + sll %l5,30,%l5 + + fsubd %f16,%f20,%f16 + fmuld %f22,pio2_3,%f22 + sll %l6,30,%l6 + + fsubd %f24,%f28,%f24 + fmuld %f30,pio2_3,%f30 + sll %l7,30,%l7 + + fsubd %f6,%f0,%f6 + sra %l4,31,%l4 + + fsubd %f14,%f8,%f14 + sra %l5,31,%l5 + + fsubd %f22,%f16,%f22 + sra %l6,31,%l6 + + fsubd %f30,%f24,%f30 + sra %l7,31,%l7 + + fsubd %f32,%f6,%f0 ! reduced x + xor %l0,%l4,%l0 + + fsubd %f34,%f14,%f8 + xor %l1,%l5,%l1 + + fsubd %f36,%f22,%f16 + xor %l2,%l6,%l2 + + fsubd %f38,%f30,%f24 + xor %l3,%l7,%l3 + + fabsd %f0,%f2 + sub %l0,%l4,%l0 + + fabsd %f8,%f10 + sub %l1,%l5,%l1 + + fabsd %f16,%f18 + sub %l2,%l6,%l2 + + fabsd %f24,%f26 + sub %l3,%l7,%l3 + + faddd %f2,c3two44,%f4 + st %f5,[%fp+nk0] + and %l4,2,%l4 + + faddd %f10,c3two44,%f12 + st %f13,[%fp+nk1] + and %l5,2,%l5 + + faddd %f18,c3two44,%f20 + st %f21,[%fp+nk2] + and %l6,2,%l6 + + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + and %l7,2,%l7 + + fsubd %f32,%f0,%f4 + xor %l0,%l4,%l0 + + fsubd %f34,%f8,%f12 + xor %l1,%l5,%l1 + + fsubd %f36,%f16,%f20 + xor %l2,%l6,%l2 + + fsubd %f38,%f24,%f28 + xor %l3,%l7,%l3 + + fzero %f38 + ld [%fp+nk0],%l4 + + fsubd %f4,%f6,%f6 ! w + ld [%fp+nk1],%l5 + + fsubd %f12,%f14,%f14 + ld [%fp+nk2],%l6 + + fnegd %f38,%f38 + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + + fsubd %f20,%f22,%f22 + sll %l5,5,%l5 + + fsubd %f28,%f30,%f30 + sll %l6,5,%l6 + + fand %f0,%f38,%f32 ! sign bit of x + ldd [%l4+%g1],%f4 + sll %l7,5,%l7 + + fand %f8,%f38,%f34 + ldd [%l5+%g1],%f12 + + fand %f16,%f38,%f36 + ldd [%l6+%g1],%f20 + + fand %f24,%f38,%f38 + ldd [%l7+%g1],%f28 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + nop + + fsubd %f26,%f28,%f26 + nop + +! 16-byte aligned + fmuld %f2,%f2,%f0 ! z = x * x + andcc %l0,1,%g0 + bz,pn %icc,.case8 +! delay slot + fxor %f6,%f32,%f32 + + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case4 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case2 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case1 +! delay slot + fxor %f30,%f38,%f38 + +!.case0: + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case3 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case6 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case5 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case7 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case8: + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case12 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case10 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case9 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case11 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case14 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case13 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case15 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.end: + st %f15,[%o1+4] + st %f23,[%o2+4] + st %f31,[%o3+4] + ld [%fp+biguns],%i5 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + nop +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vcos_big_ultra3 + sra %o5,0,%o5 ! delay slot + +.exit: + ret + restore + + + .align 16 +.last1: + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] +.last1_from_range1: + mov 0,%l1 + fzeros %f8 + fzero %f10 + add %fp,junk,%o1 +.last2: + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] +.last2_from_range2: + mov 0,%l2 + fzeros %f16 + fzero %f18 + add %fp,junk,%o2 +.last3: + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + st %f5,[%fp+nk0] + st %f13,[%fp+nk1] +.last3_from_range3: + mov 0,%l3 + fzeros %f24 + fzero %f26 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + cmp %l0,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f0 + fmuld %f2,%f0,%f2 + st %f2,[%o0] + ba,pt %icc,2f +! delay slot + st %f3,[%o0+4] +1: + fdtoi %f2,%f4 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o0] + st %g0,[%o0+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.end +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f8,%f0 + fmovs %f11,%f3 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range1: + cmp %l1,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f8 + fmuld %f10,%f8,%f10 + st %f10,[%o1] + ba,pt %icc,2f +! delay slot + st %f11,[%o1+4] +1: + fdtoi %f10,%f12 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o1] + st %g0,[%o1+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last1_from_range1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f16,%f8 + fmovs %f19,%f11 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range2: + cmp %l2,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f16 + fmuld %f18,%f16,%f18 + st %f18,[%o2] + ba,pt %icc,2f +! delay slot + st %f19,[%o2+4] +1: + fdtoi %f18,%f20 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o2] + st %g0,[%o2+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last2_from_range2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l3,%i5,%l2 ! hx &= ~0x80000000 + fmovs %f24,%f16 + fmovs %f27,%f19 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range3: + cmp %l3,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l3,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f24 + fmuld %f26,%f24,%f26 + st %f26,[%o3] + ba,pt %icc,2f +! delay slot + st %f27,[%o3+4] +1: + fdtoi %f26,%f28 ! raise inexact if not zero + sethi %hi(0x3ff00000),%o7 + st %o7,[%o3] + st %g0,[%o3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last3_from_range3 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l3 + ld [%i1],%f24 + ld [%i1+4],%f27 + andn %l3,%i5,%l3 ! hx &= ~0x80000000 + ba,pt %icc,.loop3 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vcos_ultra3) + diff --git a/usr/src/libm/src/mvec/vis/__vcosf.S b/usr/src/libm/src/mvec/vis/__vcosf.S new file mode 100644 index 0000000..31429c7 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vcosf.S @@ -0,0 +1,2101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vcosf.S 1.9 06/01/23 SMI" + + .file "__vcosf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define junk STACK_BIAS-0x20 +#define n3 STACK_BIAS-0x24 +#define n2 STACK_BIAS-0x28 +#define n1 STACK_BIAS-0x2c +#define n0 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 biguns + +! l0 n0 +! l1 n1 +! l2 n2 +! l3 n3 +! l4 +! l5 +! l6 +! l7 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 +! o5 +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 y0 +! f12 y1 +! f14 y2 +! f16 y3 +! f18 thresh2 (2^19 pi) +! f20 +! f22 +! f24 +! f26 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vcosf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,l0) + mov %l0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + fzero %f10 ! loop prologue + add %fp,junk,%o0 + fzero %f12 + add %fp,junk,%o1 + fzero %f14 + add %fp,junk,%o2 + fzero %f16 + ba .start + add %fp,junk,%o3 + + .align 16 +! 16-byte aligned +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%o0] + mov %i3,%o0 ! py0 = y + ble,pn %icc,.last1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%o1] + mov %i3,%o1 ! py1 = y + ble,pn %icc,.last2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%o2] + mov %i3,%o2 ! py2 = y + ble,pn %icc,.last3 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%o3] + mov %i3,%o3 ! py3 = y + add %i3,%i4,%i3 ! y += stridey +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%l0 + + fcmple32 %f32,%f18,%l1 + + fcmple32 %f34,%f18,%l2 + + fcmple32 %f36,%f18,%l3 + nop + +! 16-byte aligned + andcc %l0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%l0 + +.check1: + andcc %l1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%l1 + +.check2: + andcc %l2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%l2 + +.check3: + andcc %l3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%l3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %l0,%l1,%o4 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %l2,%l3,%o5 + + fmuld %f30,%f54,%f10 + and %o4,%o5,%o5 + + fmuld %f32,%f54,%f12 + andcc %o5,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f34,%f54,%f14 + + fmuld %f36,%f54,%f16 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + + fmuld %f30,%f10,%f10 + + fmuld %f32,%f12,%f12 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f16,%f16 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + faddd %f16,%f26,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%l0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%l1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%l2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%l3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + add %l0,1,%l0 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + add %l1,1,%l1 + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + add %l2,1,%l2 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + add %l3,1,%l3 + + fsubd %f0,%f30,%f0 + + fsubd %f2,%f32,%f2 + + fsubd %f4,%f34,%f4 + + fsubd %f6,%f36,%f6 + andcc %l0,1,%g0 + + fmuld %f0,%f0,%f30 + bz,pn %icc,.case8 +! delay slot + andcc %l1,1,%g0 + + fmuld %f2,%f2,%f32 + bz,pn %icc,.case4 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case2 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case1 +! delay slot + nop + +!.case0: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case3 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case6 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case5 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case7 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.case8: + fmuld %f2,%f2,%f32 + bz,pn %icc,.case12 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case10 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case9 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case11 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case14 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case13 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case15 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%o0] + fdtos %f12,%f12 + st %f12,[%o1] + fdtos %f14,%f14 + st %f14,[%o2] + fdtos %f16,%f16 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f16,[%o3] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vcos_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%o1] + fzeros %f2 + add %fp,junk,%o1 +.last2: + fdtos %f14,%f14 + st %f14,[%o2] + fzeros %f4 + add %fp,junk,%o2 +.last3: + fdtos %f16,%f16 + st %f16,[%o3] + fzeros %f6 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%l0 + andcc %l0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%o0] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%o0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + fcmple32 %f30,%f18,%l0 + andcc %l0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%l0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%o0 + mov 2,%l0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%l1 + andcc %l1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%o1] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%o1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + fcmple32 %f32,%f18,%l1 + andcc %l1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%l1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%o1 + mov 2,%l1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%l2 + andcc %l2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%o2] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%o2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + fcmple32 %f34,%f18,%l2 + andcc %l2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%l2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%o2 + mov 2,%l2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%l3 + andcc %l3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%o3] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%o3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + fcmple32 %f36,%f18,%l3 + andcc %l3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%l3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%o3 + mov 2,%l3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vcosf) + diff --git a/usr/src/libm/src/mvec/vis/__vexp.S b/usr/src/libm/src/mvec/vis/__vexp.S new file mode 100644 index 0000000..b5f6200 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vexp.S @@ -0,0 +1,1281 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vexp.S 1.9 06/01/23 SMI" + + .file "__vexp.S" + +#include "libm.h" + + RO_DATA + +/******************************************************************** + * vexp() algorithm is from mopt:f_exp.c. Basics are included here + * to supplement comments within this file. vexp() has been unrolled + * to a depth of 3. Only element 0 is documented. + * + * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by + * 2^44 to allow *2^k w/o shifting within the FP registers. These + * had to be removed for CHEETAH to avoid the fdtox of a very large + * number, which would trap to kernel (2^52). + * + * Let x = (k + j/256)ln2 + r + * then exp(x) = exp(ln2^(k+j/256)) * exp(r) + * = 2^k * 2^(j/256) * exp(r) + * where r is polynomial approximation + * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3 + * = 1 + r*(1+r*(B1+r*(B2+r*B3))) + * let + * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r) + * q = 2^(j/256) (high 64 bits) + * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[] + * then + * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p ) + * then actual computation is 2^k * ( q + ( t + q*p ) ) + * + ********************************************************************/ + + .align 16 +TBL: + .word 0x3ff00000,0x00000000 + .word 0x00000000,0x00000000 + .word 0x3ff00b1a,0xfa5abcbf + .word 0xbc84f6b2,0xa7609f71 + .word 0x3ff0163d,0xa9fb3335 + .word 0x3c9b6129,0x9ab8cdb7 + .word 0x3ff02168,0x143b0281 + .word 0xbc82bf31,0x0fc54eb6 + .word 0x3ff02c9a,0x3e778061 + .word 0xbc719083,0x535b085d + .word 0x3ff037d4,0x2e11bbcc + .word 0x3c656811,0xeeade11a + .word 0x3ff04315,0xe86e7f85 + .word 0xbc90a31c,0x1977c96e + .word 0x3ff04e5f,0x72f654b1 + .word 0x3c84c379,0x3aa0d08c + .word 0x3ff059b0,0xd3158574 + .word 0x3c8d73e2,0xa475b465 + .word 0x3ff0650a,0x0e3c1f89 + .word 0xbc95cb7b,0x5799c396 + .word 0x3ff0706b,0x29ddf6de + .word 0xbc8c91df,0xe2b13c26 + .word 0x3ff07bd4,0x2b72a836 + .word 0x3c832334,0x54458700 + .word 0x3ff08745,0x18759bc8 + .word 0x3c6186be,0x4bb284ff + .word 0x3ff092bd,0xf66607e0 + .word 0xbc968063,0x800a3fd1 + .word 0x3ff09e3e,0xcac6f383 + .word 0x3c914878,0x18316136 + .word 0x3ff0a9c7,0x9b1f3919 + .word 0x3c85d16c,0x873d1d38 + .word 0x3ff0b558,0x6cf9890f + .word 0x3c98a62e,0x4adc610a + .word 0x3ff0c0f1,0x45e46c85 + .word 0x3c94f989,0x06d21cef + .word 0x3ff0cc92,0x2b7247f7 + .word 0x3c901edc,0x16e24f71 + .word 0x3ff0d83b,0x23395dec + .word 0xbc9bc14d,0xe43f316a + .word 0x3ff0e3ec,0x32d3d1a2 + .word 0x3c403a17,0x27c57b53 + .word 0x3ff0efa5,0x5fdfa9c5 + .word 0xbc949db9,0xbc54021b + .word 0x3ff0fb66,0xaffed31b + .word 0xbc6b9bed,0xc44ebd7b + .word 0x3ff10730,0x28d7233e + .word 0x3c8d46eb,0x1692fdd5 + .word 0x3ff11301,0xd0125b51 + .word 0xbc96c510,0x39449b3a + .word 0x3ff11edb,0xab5e2ab6 + .word 0xbc9ca454,0xf703fb72 + .word 0x3ff12abd,0xc06c31cc + .word 0xbc51b514,0xb36ca5c7 + .word 0x3ff136a8,0x14f204ab + .word 0xbc67108f,0xba48dcf0 + .word 0x3ff1429a,0xaea92de0 + .word 0xbc932fbf,0x9af1369e + .word 0x3ff14e95,0x934f312e + .word 0xbc8b91e8,0x39bf44ab + .word 0x3ff15a98,0xc8a58e51 + .word 0x3c82406a,0xb9eeab0a + .word 0x3ff166a4,0x5471c3c2 + .word 0x3c58f23b,0x82ea1a32 + .word 0x3ff172b8,0x3c7d517b + .word 0xbc819041,0xb9d78a76 + .word 0x3ff17ed4,0x8695bbc0 + .word 0x3c709e3f,0xe2ac5a64 + .word 0x3ff18af9,0x388c8dea + .word 0xbc911023,0xd1970f6c + .word 0x3ff19726,0x58375d2f + .word 0x3c94aadd,0x85f17e08 + .word 0x3ff1a35b,0xeb6fcb75 + .word 0x3c8e5b4c,0x7b4968e4 + .word 0x3ff1af99,0xf8138a1c + .word 0x3c97bf85,0xa4b69280 + .word 0x3ff1bbe0,0x84045cd4 + .word 0xbc995386,0x352ef607 + .word 0x3ff1c82f,0x95281c6b + .word 0x3c900977,0x8010f8c9 + .word 0x3ff1d487,0x3168b9aa + .word 0x3c9e016e,0x00a2643c + .word 0x3ff1e0e7,0x5eb44027 + .word 0xbc96fdd8,0x088cb6de + .word 0x3ff1ed50,0x22fcd91d + .word 0xbc91df98,0x027bb78c + .word 0x3ff1f9c1,0x8438ce4d + .word 0xbc9bf524,0xa097af5c + .word 0x3ff2063b,0x88628cd6 + .word 0x3c8dc775,0x814a8494 + .word 0x3ff212be,0x3578a819 + .word 0x3c93592d,0x2cfcaac9 + .word 0x3ff21f49,0x917ddc96 + .word 0x3c82a97e,0x9494a5ee + .word 0x3ff22bdd,0xa27912d1 + .word 0x3c8d34fb,0x5577d69e + .word 0x3ff2387a,0x6e756238 + .word 0x3c99b07e,0xb6c70573 + .word 0x3ff2451f,0xfb82140a + .word 0x3c8acfcc,0x911ca996 + .word 0x3ff251ce,0x4fb2a63f + .word 0x3c8ac155,0xbef4f4a4 + .word 0x3ff25e85,0x711ece75 + .word 0x3c93e1a2,0x4ac31b2c + .word 0x3ff26b45,0x65e27cdd + .word 0x3c82bd33,0x9940e9d9 + .word 0x3ff2780e,0x341ddf29 + .word 0x3c9e067c,0x05f9e76c + .word 0x3ff284df,0xe1f56381 + .word 0xbc9a4c3a,0x8c3f0d7e + .word 0x3ff291ba,0x7591bb70 + .word 0xbc82cc72,0x28401cbc + .word 0x3ff29e9d,0xf51fdee1 + .word 0x3c8612e8,0xafad1255 + .word 0x3ff2ab8a,0x66d10f13 + .word 0xbc995743,0x191690a7 + .word 0x3ff2b87f,0xd0dad990 + .word 0xbc410adc,0xd6381aa4 + .word 0x3ff2c57e,0x39771b2f + .word 0xbc950145,0xa6eb5124 + .word 0x3ff2d285,0xa6e4030b + .word 0x3c900247,0x54db41d5 + .word 0x3ff2df96,0x1f641589 + .word 0x3c9d16cf,0xfbbce198 + .word 0x3ff2ecaf,0xa93e2f56 + .word 0x3c71ca0f,0x45d52383 + .word 0x3ff2f9d2,0x4abd886b + .word 0xbc653c55,0x532bda93 + .word 0x3ff306fe,0x0a31b715 + .word 0x3c86f46a,0xd23182e4 + .word 0x3ff31432,0xedeeb2fd + .word 0x3c8959a3,0xf3f3fcd0 + .word 0x3ff32170,0xfc4cd831 + .word 0x3c8a9ce7,0x8e18047c + .word 0x3ff32eb8,0x3ba8ea32 + .word 0xbc9c45e8,0x3cb4f318 + .word 0x3ff33c08,0xb26416ff + .word 0x3c932721,0x843659a6 + .word 0x3ff34962,0x66e3fa2d + .word 0xbc835a75,0x930881a4 + .word 0x3ff356c5,0x5f929ff1 + .word 0xbc8b5cee,0x5c4e4628 + .word 0x3ff36431,0xa2de883b + .word 0xbc8c3144,0xa06cb85e + .word 0x3ff371a7,0x373aa9cb + .word 0xbc963aea,0xbf42eae2 + .word 0x3ff37f26,0x231e754a + .word 0xbc99f5ca,0x9eceb23c + .word 0x3ff38cae,0x6d05d866 + .word 0xbc9e958d,0x3c9904bd + .word 0x3ff39a40,0x1b7140ef + .word 0xbc99a9a5,0xfc8e2934 + .word 0x3ff3a7db,0x34e59ff7 + .word 0xbc75e436,0xd661f5e3 + .word 0x3ff3b57f,0xbfec6cf4 + .word 0x3c954c66,0xe26fff18 + .word 0x3ff3c32d,0xc313a8e5 + .word 0xbc9efff8,0x375d29c3 + .word 0x3ff3d0e5,0x44ede173 + .word 0x3c7fe8d0,0x8c284c71 + .word 0x3ff3dea6,0x4c123422 + .word 0x3c8ada09,0x11f09ebc + .word 0x3ff3ec70,0xdf1c5175 + .word 0xbc8af663,0x7b8c9bca + .word 0x3ff3fa45,0x04ac801c + .word 0xbc97d023,0xf956f9f3 + .word 0x3ff40822,0xc367a024 + .word 0x3c8bddf8,0xb6f4d048 + .word 0x3ff4160a,0x21f72e2a + .word 0xbc5ef369,0x1c309278 + .word 0x3ff423fb,0x2709468a + .word 0xbc98462d,0xc0b314dd + .word 0x3ff431f5,0xd950a897 + .word 0xbc81c7dd,0xe35f7998 + .word 0x3ff43ffa,0x3f84b9d4 + .word 0x3c8880be,0x9704c002 + .word 0x3ff44e08,0x6061892d + .word 0x3c489b7a,0x04ef80d0 + .word 0x3ff45c20,0x42a7d232 + .word 0xbc686419,0x82fb1f8e + .word 0x3ff46a41,0xed1d0057 + .word 0x3c9c944b,0xd1648a76 + .word 0x3ff4786d,0x668b3237 + .word 0xbc9c20f0,0xed445733 + .word 0x3ff486a2,0xb5c13cd0 + .word 0x3c73c1a3,0xb69062f0 + .word 0x3ff494e1,0xe192aed2 + .word 0xbc83b289,0x5e499ea0 + .word 0x3ff4a32a,0xf0d7d3de + .word 0x3c99cb62,0xf3d1be56 + .word 0x3ff4b17d,0xea6db7d7 + .word 0xbc8125b8,0x7f2897f0 + .word 0x3ff4bfda,0xd5362a27 + .word 0x3c7d4397,0xafec42e2 + .word 0x3ff4ce41,0xb817c114 + .word 0x3c905e29,0x690abd5d + .word 0x3ff4dcb2,0x99fddd0d + .word 0x3c98ecdb,0xbc6a7833 + .word 0x3ff4eb2d,0x81d8abff + .word 0xbc95257d,0x2e5d7a52 + .word 0x3ff4f9b2,0x769d2ca7 + .word 0xbc94b309,0xd25957e3 + .word 0x3ff50841,0x7f4531ee + .word 0x3c7a249b,0x49b7465f + .word 0x3ff516da,0xa2cf6642 + .word 0xbc8f7685,0x69bd93ee + .word 0x3ff5257d,0xe83f4eef + .word 0xbc7c998d,0x43efef71 + .word 0x3ff5342b,0x569d4f82 + .word 0xbc807abe,0x1db13cac + .word 0x3ff542e2,0xf4f6ad27 + .word 0x3c87926d,0x192d5f7e + .word 0x3ff551a4,0xca5d920f + .word 0xbc8d689c,0xefede59a + .word 0x3ff56070,0xdde910d2 + .word 0xbc90fb6e,0x168eebf0 + .word 0x3ff56f47,0x36b527da + .word 0x3c99bb2c,0x011d93ad + .word 0x3ff57e27,0xdbe2c4cf + .word 0xbc90b98c,0x8a57b9c4 + .word 0x3ff58d12,0xd497c7fd + .word 0x3c8295e1,0x5b9a1de8 + .word 0x3ff59c08,0x27ff07cc + .word 0xbc97e2ce,0xe467e60f + .word 0x3ff5ab07,0xdd485429 + .word 0x3c96324c,0x054647ad + .word 0x3ff5ba11,0xfba87a03 + .word 0xbc9b77a1,0x4c233e1a + .word 0x3ff5c926,0x8a5946b7 + .word 0x3c3c4b1b,0x816986a2 + .word 0x3ff5d845,0x90998b93 + .word 0xbc9cd6a7,0xa8b45642 + .word 0x3ff5e76f,0x15ad2148 + .word 0x3c9ba6f9,0x3080e65e + .word 0x3ff5f6a3,0x20dceb71 + .word 0xbc89eadd,0xe3cdcf92 + .word 0x3ff605e1,0xb976dc09 + .word 0xbc93e242,0x9b56de47 + .word 0x3ff6152a,0xe6cdf6f4 + .word 0x3c9e4b3e,0x4ab84c27 + .word 0x3ff6247e,0xb03a5585 + .word 0xbc9383c1,0x7e40b497 + .word 0x3ff633dd,0x1d1929fd + .word 0x3c984710,0xbeb964e5 + .word 0x3ff64346,0x34ccc320 + .word 0xbc8c483c,0x759d8932 + .word 0x3ff652b9,0xfebc8fb7 + .word 0xbc9ae3d5,0xc9a73e08 + .word 0x3ff66238,0x82552225 + .word 0xbc9bb609,0x87591c34 + .word 0x3ff671c1,0xc70833f6 + .word 0xbc8e8732,0x586c6134 + .word 0x3ff68155,0xd44ca973 + .word 0x3c6038ae,0x44f73e65 + .word 0x3ff690f4,0xb19e9538 + .word 0x3c8804bd,0x9aeb445c + .word 0x3ff6a09e,0x667f3bcd + .word 0xbc9bdd34,0x13b26456 + .word 0x3ff6b052,0xfa75173e + .word 0x3c7a38f5,0x2c9a9d0e + .word 0x3ff6c012,0x750bdabf + .word 0xbc728956,0x67ff0b0d + .word 0x3ff6cfdc,0xddd47645 + .word 0x3c9c7aa9,0xb6f17309 + .word 0x3ff6dfb2,0x3c651a2f + .word 0xbc6bbe3a,0x683c88ab + .word 0x3ff6ef92,0x98593ae5 + .word 0xbc90b974,0x9e1ac8b2 + .word 0x3ff6ff7d,0xf9519484 + .word 0xbc883c0f,0x25860ef6 + .word 0x3ff70f74,0x66f42e87 + .word 0x3c59d644,0xd45aa65f + .word 0x3ff71f75,0xe8ec5f74 + .word 0xbc816e47,0x86887a99 + .word 0x3ff72f82,0x86ead08a + .word 0xbc920aa0,0x2cd62c72 + .word 0x3ff73f9a,0x48a58174 + .word 0xbc90a8d9,0x6c65d53c + .word 0x3ff74fbd,0x35d7cbfd + .word 0x3c9047fd,0x618a6e1c + .word 0x3ff75feb,0x564267c9 + .word 0xbc902459,0x57316dd3 + .word 0x3ff77024,0xb1ab6e09 + .word 0x3c9b7877,0x169147f8 + .word 0x3ff78069,0x4fde5d3f + .word 0x3c9866b8,0x0a02162c + .word 0x3ff790b9,0x38ac1cf6 + .word 0x3c9349a8,0x62aadd3e + .word 0x3ff7a114,0x73eb0187 + .word 0xbc841577,0xee04992f + .word 0x3ff7b17b,0x0976cfdb + .word 0xbc9bebb5,0x8468dc88 + .word 0x3ff7c1ed,0x0130c132 + .word 0x3c9f124c,0xd1164dd6 + .word 0x3ff7d26a,0x62ff86f0 + .word 0x3c91bddb,0xfb72b8b4 + .word 0x3ff7e2f3,0x36cf4e62 + .word 0x3c705d02,0xba15797e + .word 0x3ff7f387,0x8491c491 + .word 0xbc807f11,0xcf9311ae + .word 0x3ff80427,0x543e1a12 + .word 0xbc927c86,0x626d972b + .word 0x3ff814d2,0xadd106d9 + .word 0x3c946437,0x0d151d4d + .word 0x3ff82589,0x994cce13 + .word 0xbc9d4c1d,0xd41532d8 + .word 0x3ff8364c,0x1eb941f7 + .word 0x3c999b9a,0x31df2bd5 + .word 0x3ff8471a,0x4623c7ad + .word 0xbc88d684,0xa341cdfb + .word 0x3ff857f4,0x179f5b21 + .word 0xbc5ba748,0xf8b216d0 + .word 0x3ff868d9,0x9b4492ec + .word 0x3ca01c83,0xb21584a3 + .word 0x3ff879ca,0xd931a436 + .word 0x3c85d2d7,0xd2db47bc + .word 0x3ff88ac7,0xd98a6699 + .word 0x3c9994c2,0xf37cb53a + .word 0x3ff89bd0,0xa478580f + .word 0x3c9d5395,0x4475202a + .word 0x3ff8ace5,0x422aa0db + .word 0x3c96e9f1,0x56864b27 + .word 0x3ff8be05,0xbad61778 + .word 0x3c9ecb5e,0xfc43446e + .word 0x3ff8cf32,0x16b5448c + .word 0xbc70d55e,0x32e9e3aa + .word 0x3ff8e06a,0x5e0866d9 + .word 0xbc97114a,0x6fc9b2e6 + .word 0x3ff8f1ae,0x99157736 + .word 0x3c85cc13,0xa2e3976c + .word 0x3ff902fe,0xd0282c8a + .word 0x3c9592ca,0x85fe3fd2 + .word 0x3ff9145b,0x0b91ffc6 + .word 0xbc9dd679,0x2e582524 + .word 0x3ff925c3,0x53aa2fe2 + .word 0xbc83455f,0xa639db7f + .word 0x3ff93737,0xb0cdc5e5 + .word 0xbc675fc7,0x81b57ebc + .word 0x3ff948b8,0x2b5f98e5 + .word 0xbc8dc3d6,0x797d2d99 + .word 0x3ff95a44,0xcbc8520f + .word 0xbc764b7c,0x96a5f039 + .word 0x3ff96bdd,0x9a7670b3 + .word 0xbc5ba596,0x7f19c896 + .word 0x3ff97d82,0x9fde4e50 + .word 0xbc9d185b,0x7c1b85d0 + .word 0x3ff98f33,0xe47a22a2 + .word 0x3c7cabda,0xa24c78ed + .word 0x3ff9a0f1,0x70ca07ba + .word 0xbc9173bd,0x91cee632 + .word 0x3ff9b2bb,0x4d53fe0d + .word 0xbc9dd84e,0x4df6d518 + .word 0x3ff9c491,0x82a3f090 + .word 0x3c7c7c46,0xb071f2be + .word 0x3ff9d674,0x194bb8d5 + .word 0xbc9516be,0xa3dd8233 + .word 0x3ff9e863,0x19e32323 + .word 0x3c7824ca,0x78e64c6e + .word 0x3ff9fa5e,0x8d07f29e + .word 0xbc84a9ce,0xaaf1face + .word 0x3ffa0c66,0x7b5de565 + .word 0xbc935949,0x5d1cd533 + .word 0x3ffa1e7a,0xed8eb8bb + .word 0x3c9c6618,0xee8be70e + .word 0x3ffa309b,0xec4a2d33 + .word 0x3c96305c,0x7ddc36ab + .word 0x3ffa42c9,0x80460ad8 + .word 0xbc9aa780,0x589fb120 + .word 0x3ffa5503,0xb23e255d + .word 0xbc9d2f6e,0xdb8d41e1 + .word 0x3ffa674a,0x8af46052 + .word 0x3c650f56,0x30670366 + .word 0x3ffa799e,0x1330b358 + .word 0x3c9bcb7e,0xcac563c6 + .word 0x3ffa8bfe,0x53c12e59 + .word 0xbc94f867,0xb2ba15a8 + .word 0x3ffa9e6b,0x5579fdbf + .word 0x3c90fac9,0x0ef7fd31 + .word 0x3ffab0e5,0x21356eba + .word 0x3c889c31,0xdae94544 + .word 0x3ffac36b,0xbfd3f37a + .word 0xbc8f9234,0xcae76cd0 + .word 0x3ffad5ff,0x3a3c2774 + .word 0x3c97ef3b,0xb6b1b8e4 + .word 0x3ffae89f,0x995ad3ad + .word 0x3c97a1cd,0x345dcc81 + .word 0x3ffafb4c,0xe622f2ff + .word 0xbc94b2fc,0x0f315ecc + .word 0x3ffb0e07,0x298db666 + .word 0xbc9bdef5,0x4c80e425 + .word 0x3ffb20ce,0x6c9a8952 + .word 0x3c94dd02,0x4a0756cc + .word 0x3ffb33a2,0xb84f15fb + .word 0xbc62805e,0x3084d708 + .word 0x3ffb4684,0x15b749b1 + .word 0xbc7f763d,0xe9df7c90 + .word 0x3ffb5972,0x8de5593a + .word 0xbc9c71df,0xbbba6de3 + .word 0x3ffb6c6e,0x29f1c52a + .word 0x3c92a8f3,0x52883f6e + .word 0x3ffb7f76,0xf2fb5e47 + .word 0xbc75584f,0x7e54ac3b + .word 0x3ffb928c,0xf22749e4 + .word 0xbc9b7216,0x54cb65c6 + .word 0x3ffba5b0,0x30a1064a + .word 0xbc9efcd3,0x0e54292e + .word 0x3ffbb8e0,0xb79a6f1f + .word 0xbc3f52d1,0xc9696205 + .word 0x3ffbcc1e,0x904bc1d2 + .word 0x3c823dd0,0x7a2d9e84 + .word 0x3ffbdf69,0xc3f3a207 + .word 0xbc3c2623,0x60ea5b52 + .word 0x3ffbf2c2,0x5bd71e09 + .word 0xbc9efdca,0x3f6b9c73 + .word 0x3ffc0628,0x6141b33d + .word 0xbc8d8a5a,0xa1fbca34 + .word 0x3ffc199b,0xdd85529c + .word 0x3c811065,0x895048dd + .word 0x3ffc2d1c,0xd9fa652c + .word 0xbc96e516,0x17c8a5d7 + .word 0x3ffc40ab,0x5fffd07a + .word 0x3c9b4537,0xe083c60a + .word 0x3ffc5447,0x78fafb22 + .word 0x3c912f07,0x2493b5af + .word 0x3ffc67f1,0x2e57d14b + .word 0x3c92884d,0xff483cad + .word 0x3ffc7ba8,0x8988c933 + .word 0xbc8e76bb,0xbe255559 + .word 0x3ffc8f6d,0x9406e7b5 + .word 0x3c71acbc,0x48805c44 + .word 0x3ffca340,0x5751c4db + .word 0xbc87f2be,0xd10d08f4 + .word 0x3ffcb720,0xdcef9069 + .word 0x3c7503cb,0xd1e949db + .word 0x3ffccb0f,0x2e6d1675 + .word 0xbc7d220f,0x86009093 + .word 0x3ffcdf0b,0x555dc3fa + .word 0xbc8dd83b,0x53829d72 + .word 0x3ffcf315,0x5b5bab74 + .word 0xbc9a08e9,0xb86dff57 + .word 0x3ffd072d,0x4a07897c + .word 0xbc9cbc37,0x43797a9c + .word 0x3ffd1b53,0x2b08c968 + .word 0x3c955636,0x219a36ee + .word 0x3ffd2f87,0x080d89f2 + .word 0xbc9d487b,0x719d8578 + .word 0x3ffd43c8,0xeacaa1d6 + .word 0x3c93db53,0xbf5a1614 + .word 0x3ffd5818,0xdcfba487 + .word 0x3c82ed02,0xd75b3706 + .word 0x3ffd6c76,0xe862e6d3 + .word 0x3c5fe87a,0x4a8165a0 + .word 0x3ffd80e3,0x16c98398 + .word 0xbc911ec1,0x8beddfe8 + .word 0x3ffd955d,0x71ff6075 + .word 0x3c9a052d,0xbb9af6be + .word 0x3ffda9e6,0x03db3285 + .word 0x3c9c2300,0x696db532 + .word 0x3ffdbe7c,0xd63a8315 + .word 0xbc9b76f1,0x926b8be4 + .word 0x3ffdd321,0xf301b460 + .word 0x3c92da57,0x78f018c2 + .word 0x3ffde7d5,0x641c0658 + .word 0xbc9ca552,0x8e79ba8f + .word 0x3ffdfc97,0x337b9b5f + .word 0xbc91a5cd,0x4f184b5c + .word 0x3ffe1167,0x6b197d17 + .word 0xbc72b529,0xbd5c7f44 + .word 0x3ffe2646,0x14f5a129 + .word 0xbc97b627,0x817a1496 + .word 0x3ffe3b33,0x3b16ee12 + .word 0xbc99f4a4,0x31fdc68a + .word 0x3ffe502e,0xe78b3ff6 + .word 0x3c839e89,0x80a9cc8f + .word 0x3ffe6539,0x24676d76 + .word 0xbc863ff8,0x7522b734 + .word 0x3ffe7a51,0xfbc74c83 + .word 0x3c92d522,0xca0c8de2 + .word 0x3ffe8f79,0x77cdb740 + .word 0xbc910894,0x80b054b1 + .word 0x3ffea4af,0xa2a490da + .word 0xbc9e9c23,0x179c2893 + .word 0x3ffeb9f4,0x867cca6e + .word 0x3c94832f,0x2293e4f2 + .word 0x3ffecf48,0x2d8e67f1 + .word 0xbc9c93f3,0xb411ad8c + .word 0x3ffee4aa,0xa2188510 + .word 0x3c91c68d,0xa487568d + .word 0x3ffefa1b,0xee615a27 + .word 0x3c9dc7f4,0x86a4b6b0 + .word 0x3fff0f9c,0x1cb6412a + .word 0xbc932200,0x65181d45 + .word 0x3fff252b,0x376bba97 + .word 0x3c93a1a5,0xbf0d8e43 + .word 0x3fff3ac9,0x48dd7274 + .word 0xbc795a5a,0x3ed837de + .word 0x3fff5076,0x5b6e4540 + .word 0x3c99d3e1,0x2dd8a18b + .word 0x3fff6632,0x798844f8 + .word 0x3c9fa37b,0x3539343e + .word 0x3fff7bfd,0xad9cbe14 + .word 0xbc9dbb12,0xd006350a + .word 0x3fff91d8,0x02243c89 + .word 0xbc612ea8,0xa779f689 + .word 0x3fffa7c1,0x819e90d8 + .word 0x3c874853,0xf3a5931e + .word 0x3fffbdba,0x3692d514 + .word 0xbc796773,0x15098eb6 + .word 0x3fffd3c2,0x2b8f71f1 + .word 0x3c62eb74,0x966579e7 + .word 0x3fffe9d9,0x6b2a23d9 + .word 0x3c74a603,0x7442fde3 + + .align 16 +constants: + .word 0x3ef00000,0x00000000 + .word 0x40862e42,0xfefa39ef + .word 0x01000000,0x00000000 + .word 0x7f000000,0x00000000 + .word 0x80000000,0x00000000 + .word 0x43f00000,0x00000000 ! scaling 2^12 two96 + .word 0xfff00000,0x00000000 + .word 0x3ff00000,0x00000000 + .word 0x3fdfffff,0xfffffff6 + .word 0x3fc55555,0x721a1d14 + .word 0x3fa55555,0x6e0896af + .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256 + .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h + .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l + + ! base set w/o scaling + ! .word 0x43300000,0x00000000 ! scaling two96 + ! .word 0x40771547,0x652b82fe ! scaling invln2_256 + ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h + ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l + +#define ox3ef 0x0 +#define thresh 0x8 +#define tiny 0x10 +#define huge 0x18 +#define signbit 0x20 +#define two96 0x28 +#define neginf 0x30 +#define one 0x38 +#define B1OFF 0x40 +#define B2OFF 0x48 +#define B3OFF 0x50 +#define invln2_256 0x58 +#define ln2_256h 0x60 +#define ln2_256l 0x68 + +! local storage indices + +#define m2 STACK_BIAS-0x4 +#define m1 STACK_BIAS-0x8 +#define m0 STACK_BIAS-0xc +#define jnk STACK_BIAS-0x20 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! g1 TBL + +! l0 m0 +! l1 m1 +! l2 m2 +! l3 j0,oy0 +! l4 j1,oy1 +! l5 j2,oy2 +! l6 0x3e300000 +! l7 0x40862e41 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 scratch +! o4 scratch +! o5 0x40874910 +! o7 0x7ff00000 + +! f0 x0 +! f2 +! f4 +! f6 +! f8 +! f10 x1 +! f12 +! f14 +! f16 +! f18 +! f20 x2 +! f22 +! f24 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 0x3ef0... +! f38 thresh +! f40 tiny +! f42 huge +! f44 signbit +! f46 two96 +! f48 neginf +! f50 one +! f52 B1 +! f54 B2 +! f56 B3 +! f58 invln2_256 +! f60 ln2_256h +! f62 ln2_256l +#define BOUNDRY %f36 +#define THRESH %f38 +#define TINY %f40 +#define HUGE %f42 +#define SIGNBIT %f44 +#define TWO96 %f46 +#define NEGINF %f48 +#define ONE %f50 +#define B1 %f52 +#define B2 %f54 +#define B3 %f56 +#define INVLN2_256 %f58 +#define LN2_256H %f60 +#define LN2_256L %f62 + + ENTRY(__vexp) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o3) + PIC_SET(l7,TBL,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e300000),%l6 + sethi %hi(0x40862e41),%l7 + or %l7,%lo(0x40862e41),%l7 + sethi %hi(0x40874910),%o5 + or %o5,%lo(0x40874910),%o5 + sethi %hi(0x7ff00000),%o7 + ldd [%o3+ox3ef],BOUNDRY + ldd [%o3+thresh],THRESH + ldd [%o3+tiny],TINY + ldd [%o3+huge],HUGE + ldd [%o3+signbit],SIGNBIT + ldd [%o3+two96],TWO96 + ldd [%o3+neginf],NEGINF + ldd [%o3+one],ONE + ldd [%o3+B1OFF],B1 + ldd [%o3+B2OFF],B2 + ldd [%o3+B3OFF],B3 + ldd [%o3+invln2_256],INVLN2_256 + ldd [%o3+ln2_256h],LN2_256H + ldd [%o3+ln2_256l],LN2_256L + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,jnk,%l3 ! precondition loop + add %fp,jnk,%l4 + add %fp,jnk,%l5 + ld [%i1],%l0 ! hx = *x + ld [%i1],%f0 + ld [%i1+4],%f1 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + ba .loop0 + add %i1,%i2,%i1 ! x += stridex + + .align 16 +! -- 16 byte aligned +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%l6,%o3 + sub %l7,%l0,%o4 + fand %f0,SIGNBIT,%f2 ! get sign bit + + lda [%i1]%asi,%f10 + orcc %o3,%o4,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41 + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + for %f2,TWO96,%f2 ! used to strip least sig bits + fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%l6,%o3 + sub %l7,%l1,%o4 + fand %f10,SIGNBIT,%f12 + + lda [%i1]%asi,%f20 + orcc %o3,%o4,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41 + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + for %f12,TWO96,%f12 + fmuld %f10,INVLN2_256,%f14 + +.loop2: + sub %l2,%l6,%o3 + sub %l7,%l2,%o4 + fand %f20,SIGNBIT,%f22 + fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment + + orcc %o3,%o4,%g0 + bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41 +! delay slot + for %f22,TWO96,%f22 + faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits + +.cont: + faddd %f14,%f12,%f14 + mov %i3,%o2 ! py2 = y + + faddd %f24,%f22,%f24 + add %i3,%i4,%i3 ! y += stridey + + ! BUBBLE USIII + + fsubd %f4,%f2,%f8 ! creating k+j/256: sll + st %f6,[%l3] ! store previous loop x0 + + fsubd %f14,%f12,%f18 + st %f7,[%l3+4] ! store previous loop x0 + + fsubd %f24,%f22,%f28 + st %f16,[%l4] + + ! BUBBLE USIII + + fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x + st %f17,[%l4+4] + + fmuld %f18,LN2_256H,%f12 + st %f26,[%l5] + + fmuld %f28,LN2_256H,%f22 + st %f27,[%l5+4] + + ! BUBBLE USIII + + fsubd %f0,%f2,%f0 ! r = x - p*LN2_256H + fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec + + fsubd %f10,%f12,%f10 + fmuld %f18,LN2_256L,%f14 + + fsubd %f20,%f22,%f20 + fmuld %f28,LN2_256L,%f24 + + ! BUBBLE USIII + + fsubd %f0,%f4,%f0 ! r -= p*LN2_256L + + fsubd %f10,%f14,%f10 + + fsubd %f20,%f24,%f20 + +!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here + + ! Alternate polynomial grouping allowing non-sequential calc of p + ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) ) + ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ] + ! + ! let SLi Ri SRi be accumulators + + fmuld %f0,B3,%f2 ! SR1 = r1 * B3 + fdtoi %f8,%f8 ! convert k+j/256 to int + st %f8,[%fp+m0] ! store k, to shift return/use + + fmuld %f10,B3,%f12 ! SR2 = r2 * B3 + fdtoi %f18,%f18 ! convert k+j/256 to int + st %f18,[%fp+m1] ! store k, to shift return/use + + fmuld %f20,B3,%f22 ! SR3 = r3 * B3 + fdtoi %f28,%f28 ! convert k+j/256 to int + st %f28,[%fp+m2] ! store k, to shift return/use + + fmuld %f0,%f0,%f4 ! R1 = r1 * r1 + + fmuld %f10,%f10,%f14 ! R2 = r2 * r2 + faddd %f2,B2,%f2 ! SR1 += B2 + + fmuld %f20,%f20,%f24 ! R3 = r3 * r3 + faddd %f12,B2,%f12 ! SR2 += B2 + + faddd %f22,B2,%f22 ! SR3 += B2 + fmuld %f0,B1,%f6 ! SL1 = r1 * B1 + + fmuld %f10,B1,%f32 ! SL2 = r2 * B1 + fand %f8,NEGINF,%f8 + ! best here for RAW BYPASS + ld [%fp+m0],%l0 ! get nonshifted k into intreg + + fmuld %f20,B1,%f34 ! SL3 = r3 * B1 + fand %f18,NEGINF,%f18 + ld [%fp+m1],%l1 ! get nonshifted k into intreg + + fmuld %f4,%f2,%f4 ! R1 = R1 * SR1 + fand %f28,NEGINF,%f28 + ld [%fp+m2],%l2 ! get nonshifted k into intreg + + fmuld %f14,%f12,%f14 ! R2 = R2 * SR2 + faddd %f6,ONE,%f6 ! SL1 += 1 + + fmuld %f24,%f22,%f24 ! R3 = R3 * SR3 + faddd %f32,ONE,%f32 ! SL2 += 1 + sra %l0,8,%l3 ! shift k tobe offset 256-8byte + + faddd %f34,ONE,%f34 ! SL3 += 1 + sra %l1,8,%l4 ! shift k tobe offset 256-8byte + sra %l2,8,%l5 ! shift k tobe offset 256-8byte + + ! BUBBLE in USIII + and %l3,0xff0,%l3 + and %l4,0xff0,%l4 + + + + faddd %f6,%f4,%f6 ! R1 = SL1 + R1 + ldd [%g1+%l3],%f4 ! tbl[j] + add %l3,8,%l3 ! inc j + and %l5,0xff0,%l5 + + + faddd %f32,%f14,%f32 ! R2 = SL2 + R2 + ldd [%g1+%l4],%f14 ! tbl[j] + add %l4,8,%l4 ! inc j + sra %l0,20,%o3 + + faddd %f34,%f24,%f34 ! R3 = SL3 + R3 + ldd [%g1+%l5],%f24 ! tbl[j] + add %l5,8,%l5 ! inc j + sra %l1,20,%l1 + + ! BUBBLE in USIII + ldd [%g1+%l4],%f16 ! tbl[j+1] + add %o3,1021,%o3 ! inc j + + fmuld %f0,%f6,%f0 ! p1 = r1 * R1 + ldd [%g1+%l3],%f6 ! tbl[j+1] + add %l1,1021,%l1 ! inc j + sra %l2,20,%l2 + + fmuld %f10,%f32,%f10 ! p2 = r2 * R2 + ldd [%g1+%l5],%f26 ! tbl[j+1] + add %l2,1021,%l2 ! inc j + + fmuld %f20,%f34,%f20 ! p3 = r3 * R3 + + + + + +!!!!!!!!!!!!!!!!!!! poly-reorder - ends here + + fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j] + mov %o0,%l3 + + fmuld %f10,%f14,%f10 + mov %o1,%l4 + + fmuld %f20,%f24,%f20 + mov %o2,%l5 + + faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits + lda [%i1]%asi,%l0 ! preload next argument + + faddd %f10,%f16,%f16 + lda [%i1]%asi,%f0 + + faddd %f20,%f26,%f26 + lda [%i1+4]%asi,%f1 + + faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits + add %i1,%i2,%i1 ! x += stridex + + faddd %f16,%f14,%f16 + andn %l0,%i5,%l0 + or %o3,%l1,%o4 + +! -- 16 byte aligned + orcc %o4,%l2,%o4 + bl,pn %icc,.small +! delay slot + faddd %f26,%f24,%f26 + + fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k + fpadd32 %f16,%f18,%f16 + + + addcc %i0,-1,%i0 + bg,pn %icc,.loop0 +! delay slot + fpadd32 %f26,%f28,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + + .align 16 +.small: + tst %o3 + bge,pt %icc,1f +! delay slot + fpadd32 %f6,%f8,%f6 + fpadd32 %f6,BOUNDRY,%f6 + fmuld %f6,TINY,%f6 +1: + tst %l1 + bge,pt %icc,1f +! delay slot + fpadd32 %f16,%f18,%f16 + fpadd32 %f16,BOUNDRY,%f16 + fmuld %f16,TINY,%f16 +1: + tst %l2 + bge,pt %icc,1f +! delay slot + fpadd32 %f26,%f28,%f26 + fpadd32 %f26,BOUNDRY,%f26 + fmuld %f26,TINY,%f26 +1: + addcc %i0,-1,%i0 + bg,pn %icc,.loop0 +! delay slot + nop + ba,pt %icc,.endloop0 +! delay slot + nop + + +.endloop2: + for %f12,TWO96,%f12 + fmuld %f10,INVLN2_256,%f14 + faddd %f14,%f12,%f14 + fsubd %f14,%f12,%f18 + fmuld %f18,LN2_256H,%f12 + fsubd %f10,%f12,%f10 + fmuld %f18,LN2_256L,%f14 + fsubd %f10,%f14,%f10 + fmuld %f10,B3,%f12 + fdtoi %f18,%f18 + st %f18,[%fp+m1] + fmuld %f10,%f10,%f14 + faddd %f12,B2,%f12 + fmuld %f10,B1,%f32 + fand %f18,NEGINF,%f18 + ld [%fp+m1],%l1 + fmuld %f14,%f12,%f14 + faddd %f32,ONE,%f32 + sra %l1,8,%o4 + and %o4,0xff0,%o4 + faddd %f32,%f14,%f32 + ldd [%g1+%o4],%f14 + add %o4,8,%o4 + sra %l1,20,%l1 + ldd [%g1+%o4],%f30 + addcc %l1,1021,%l1 + fmuld %f10,%f32,%f10 + fmuld %f10,%f14,%f10 + faddd %f10,%f30,%f30 + faddd %f30,%f14,%f30 + bge,pt %icc,1f +! delay slot + fpadd32 %f30,%f18,%f30 + fpadd32 %f30,BOUNDRY,%f30 + fmuld %f30,TINY,%f30 +1: + st %f30,[%o1] + st %f31,[%o1+4] + +.endloop1: + for %f2,TWO96,%f2 + fmuld %f0,INVLN2_256,%f4 + faddd %f4,%f2,%f4 + fsubd %f4,%f2,%f8 + fmuld %f8,LN2_256H,%f2 + fsubd %f0,%f2,%f0 + fmuld %f8,LN2_256L,%f4 + fsubd %f0,%f4,%f0 + fmuld %f0,B3,%f2 + fdtoi %f8,%f8 + st %f8,[%fp+m0] + fmuld %f0,%f0,%f4 + faddd %f2,B2,%f2 + fmuld %f0,B1,%f32 + fand %f8,NEGINF,%f8 + ld [%fp+m0],%l0 + fmuld %f4,%f2,%f4 + faddd %f32,ONE,%f32 + sra %l0,8,%o4 + and %o4,0xff0,%o4 + faddd %f32,%f4,%f32 + ldd [%g1+%o4],%f4 + add %o4,8,%o4 + sra %l0,20,%o3 + ldd [%g1+%o4],%f30 + addcc %o3,1021,%o3 + fmuld %f0,%f32,%f0 + fmuld %f0,%f4,%f0 + faddd %f0,%f30,%f30 + faddd %f30,%f4,%f30 + bge,pt %icc,1f +! delay slot + fpadd32 %f30,%f8,%f30 + fpadd32 %f30,BOUNDRY,%f30 + fmuld %f30,TINY,%f30 +1: + st %f30,[%o0] + st %f31,[%o0+4] + +.endloop0: + st %f6,[%l3] + st %f7,[%l3+4] + st %f16,[%l4] + st %f17,[%l4+4] + st %f26,[%l5] + st %f27,[%l5+4] + ret + restore + + +.range0: + cmp %l0,%l6 + bl,a,pt %icc,3f ! if x is tiny +! delay slot, annulled if branch not taken + faddd %f0,ONE,%f4 + + cmp %l0,%o5 + bg,pt %icc,1f ! if x is huge, inf, nan +! delay slot + nop + + fcmpd %fcc0,%f0,THRESH + fbg,a,pt %fcc0,3f ! if x is huge and positive +! delay slot, annulled if branch not taken + fmuld HUGE,HUGE,%f4 + +! x is near the extremes but within range; return to the loop + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + for %f2,TWO96,%f2 + ba,pt %icc,.loop1 +! delay slot + fmuld %f0,INVLN2_256,%f4 + +1: + cmp %l0,%o7 + bl,pn %icc,2f ! if x is finite +! delay slot + nop + fzero %f4 + fcmpd %fcc0,%f0,NEGINF + fmovdne %fcc0,%f0,%f4 + ba,pt %icc,3f + fmuld %f4,%f4,%f4 ! x*x or zero*zero +2: + fmovd HUGE,%f4 + fcmpd %fcc0,%f0,ONE + fmovdl %fcc0,TINY,%f4 + fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny +3: + st %f4,[%o0] + andn %l1,%i5,%l0 + add %i1,%i2,%i1 ! x += stridex + fmovd %f10,%f0 + st %f5,[%o0+4] + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + ba,pt %icc,.endloop0 +! delay slot + nop + + +.range1: + cmp %l1,%l6 + bl,a,pt %icc,3f ! if x is tiny +! delay slot, annulled if branch not taken + faddd %f10,ONE,%f14 + + cmp %l1,%o5 + bg,pt %icc,1f ! if x is huge, inf, nan +! delay slot + nop + + fcmpd %fcc0,%f10,THRESH + fbg,a,pt %fcc0,3f ! if x is huge and positive +! delay slot, annulled if branch not taken + fmuld HUGE,HUGE,%f14 + +! x is near the extremes but within range; return to the loop + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + for %f12,TWO96,%f12 + ba,pt %icc,.loop2 +! delay slot + fmuld %f10,INVLN2_256,%f14 + +1: + cmp %l1,%o7 + bl,pn %icc,2f ! if x is finite +! delay slot + nop + fzero %f14 + fcmpd %fcc0,%f10,NEGINF + fmovdne %fcc0,%f10,%f14 + ba,pt %icc,3f + fmuld %f14,%f14,%f14 ! x*x or zero*zero +2: + fmovd HUGE,%f14 + fcmpd %fcc0,%f10,ONE + fmovdl %fcc0,TINY,%f14 + fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny +3: + st %f14,[%o1] + andn %l2,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + fmovd %f20,%f10 + st %f15,[%o1+4] + addcc %i0,-1,%i0 + bg,pt %icc,.loop1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + ba,pt %icc,.endloop1 +! delay slot + nop + + +.range2: + cmp %l2,%l6 + bl,a,pt %icc,3f ! if x is tiny +! delay slot, annulled if branch not taken + faddd %f20,ONE,%f24 + + cmp %l2,%o5 + bg,pt %icc,1f ! if x is huge, inf, nan +! delay slot + nop + + fcmpd %fcc0,%f20,THRESH + fbg,a,pt %fcc0,3f ! if x is huge and positive +! delay slot, annulled if branch not taken + fmuld HUGE,HUGE,%f24 + +! x is near the extremes but within range; return to the loop + ba,pt %icc,.cont +! delay slot + faddd %f4,%f2,%f4 + +1: + cmp %l2,%o7 + bl,pn %icc,2f ! if x is finite +! delay slot + nop + fzero %f24 + fcmpd %fcc0,%f20,NEGINF + fmovdne %fcc0,%f20,%f24 + ba,pt %icc,3f + fmuld %f24,%f24,%f24 ! x*x or zero*zero +2: + fmovd HUGE,%f24 + fcmpd %fcc0,%f20,ONE + fmovdl %fcc0,TINY,%f24 + fmuld %f24,%f24,%f24 ! huge*huge or tiny*tiny +3: + st %f24,[%i3] + st %f25,[%i3+4] + lda [%i1]%asi,%l2 ! preload next argument + lda [%i1]%asi,%f20 + lda [%i1+4]%asi,%f21 + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + bg,pt %icc,.loop2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + ba,pt %icc,.endloop2 +! delay slot + nop + + SET_SIZE(__vexp) + diff --git a/usr/src/libm/src/mvec/vis/__vexpf.S b/usr/src/libm/src/mvec/vis/__vexpf.S new file mode 100644 index 0000000..b533e3b --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vexpf.S @@ -0,0 +1,2113 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vexpf.S 1.7 06/01/23 SMI" + + .file "__vexpf.S" + +#include "libm.h" + + RO_DATA + .align 64 +!! 2^(i/256) - ((i & 0xf0) << 44), i = [0, 255] +.CONST_TBL: + .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf + .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 + .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc + .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 + .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 + .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 + .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 + .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 + .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85 + .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec + .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5 + .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e + .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6 + .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab + .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e + .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2 + .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0 + .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f + .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c + .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b + .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027 + .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d + .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819 + .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1 + .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a + .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75 + .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29 + .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70 + .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13 + .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f + .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589 + .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b + .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd + .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32 + .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d + .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b + .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a + .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef + .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4 + .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173 + .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175 + .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024 + .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a + .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4 + .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232 + .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237 + .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2 + .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7 + .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114 + .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff + .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee + .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef + .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27 + .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2 + .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf + .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc + .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03 + .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93 + .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71 + .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4 + .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd + .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7 + .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6 + .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538 + .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e + .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645 + .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5 + .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87 + .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a + .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd + .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09 + .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6 + .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb + .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0 + .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491 + .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9 + .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7 + .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21 + .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436 + .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f + .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778 + .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9 + .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a + .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2 + .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5 + .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3 + .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2 + .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d + .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5 + .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e + .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb + .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8 + .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052 + .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59 + .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba + .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774 + .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff + .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952 + .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1 + .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a + .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4 + .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f + .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207 + .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d + .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c + .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22 + .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933 + .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db + .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675 + .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74 + .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968 + .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6 + .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3 + .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075 + .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315 + .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658 + .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17 + .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12 + .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76 + .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740 + .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e + .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510 + .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a + .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274 + .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8 + .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89 + .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514 + .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9 + + .word 0x7149f2ca, 0x0da24260 ! 1.0e30f, 1.0e-30f + .word 0x3ecebfbe, 0x9d182250 ! KA2 = 3.66556671660783833261e-06 + .word 0x3f662e43, 0xe2528362 ! KA1 = 2.70760782821392980564e-03 + .word 0x40771547, 0x652b82fe ! K256ONLN2 = 369.3299304675746271 + .word 0x42aeac4f, 0x42b17218 ! THRESHOLD = 87.3365402f + ! THRESHOLDL = 88.7228394f +! local storage indices + +#define tmp0 STACK_BIAS-32 +#define tmp1 STACK_BIAS-28 +#define tmp2 STACK_BIAS-24 +#define tmp3 STACK_BIAS-20 +#define tmp4 STACK_BIAS-16 +#define tmp5 STACK_BIAS-12 +#define tmp6 STACK_BIAS-8 +#define tmp7 STACK_BIAS-4 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +#define I5_THRESHOLD %i5 +#define G1_CONST_TBL %g5 +#define G5_CONST %g1 + +#define F62_K256ONLN2 %f62 +#define F60_KA2 %f60 +#define F58_KA1 %f58 + +#define THRESHOLDL %f0 + +! register use +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey + +! i5 0x42aeac4f (87.3365402f) + +! g1 CONST_TBL +! g5 0x7fffffff + +! f62 K256ONLN2 = 369.3299304675746271 +! f60 KA2 = 3.66556671660783833261e-06 +! f58 KA1 = 2.70760782821392980564e-03 + + +! !!!!! Algorithm !!!!! +! +! double y, dtmp, drez; +! int k, sign, Xi; +! float X, Y; +! int THRESHOLD = 0x42aeac4f; /* 87.3365402f */ +! float THRESHOLDL = 88.7228394f; +! double KA2 = 3.66556671660783833261e-06; +! double KA1 = 2.70760782821392980564e-03; +! double K256ONLN2 = 369.3299304675746271; +! char *CONST_TBL; +! +! X = px[0]; +! Xi = ((int*)px)[0]; +! ax = Xi & 0x7fffffff; +! +! if (ax > THRESHOLD) { +! sign = ((unsigned)Xi >> 29) & 4; +! if (ax >= 0x7f800000) { /* Inf or NaN */ +! if (ax > 0x7f800000) { /* NaN */ +! Y = X * X; /* NaN -> NaN */ +! return Y; +! } +! Y = (sign) ? zero : X; /* +Inf -> +Inf , -Inf -> zero */ +! return Y; +! } +! +! if ( X < 0.0f || X >= THRESHOLDL ) { +! Y = ((float*)(CONST_TBL + 2048 + sign))[0]; +! /* Xi >= THRESHOLDL : Y = 1.0e+30f */ +! /* Xi < -THRESHOLD : Y = 1.0e-30f */ +! Y = Y * Y; +! /* Xi >= THRESHOLDL : +Inf + overflow */ +! /* Xi < -THRESHOLD : +0 + underflow */ +! return Y; +! } +! } +! vis_write_gsr(12 << 3); +! y = (double) X; +! y = K256ONLN2 * y; +! k = (int) y; +! dtmp = (double) k; +! y -= dtmp; +! dtmp = y * KA2; +! dtmp += KA1; +! y *= dtmp; +! y = (y * KA2 + KA1) * y; +! ((int*)&drez)[0] = k; +! ((int*)&drez)[1] = 0; +! ((float*)&drez)[0] = vis_fpackfix(drez); +! k &= 255; +! k <<= 3; +! dtmp = ((double*)(CONST_TBL + k))[0]; +! drez = vis_fpadd32(drez,dtmp); +! y *= drez; +! y += drez; +! Y = (float) y; +! +! +! fstod %f16,%f40 ! y = (double) X +! fmuld F62_K256ONLN2,%f40,%f40 ! y *= K256ONLN2 +! fdtoi %f40,%f16 ! k = (int) y +! st %f16,[%fp+tmp0] ! store k +! fitod %f16,%f34 ! dtmp = (double) k +! fpackfix %f16,%f16 ! ((float*)&drez)[0] = vis_fpackfix(drez) +! fsubd %f40,%f34,%f40 ! y -= dtmp +! fmuld F60_KA2,%f40,%f34 ! dtmp = y * KA2 +! faddd F58_KA1,%f34,%f34 ! dtmp += KA1 +! ld [%fp+tmp0],%o0 ! load k +! fmuld %f34,%f40,%f40 ! y *= dtmp +! and %o0,255,%o0 ! k &= 255 +! sll %o0,3,%o0 ! k <<= 3 +! ldd [G1_CONST_TBL+%o0],%f34 ! dtmp = ((double*)(CONST_TBL + k))[0] +! fpadd32 %f16,%f34,%f34 ! drez = vis_fpadd32(drez,dtmp) +! fmuld %f34,%f40,%f40 ! y *= drez +! faddd %f34,%f40,%f40 ! y += drez +! fdtos %f40,%f26 ! (float) y +!-------------------------------------------------------------------- + + ENTRY(__vexpf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + + wr %g0,0x82,%asi ! set %asi for non-faulting loads + wr %g0,0x60,%gsr + + sll %i2,2,%i2 + sll %i4,2,%i4 + + ldd [G1_CONST_TBL+2056],F60_KA2 + sethi %hi(0x7ffffc00),G5_CONST + ldd [G1_CONST_TBL+2064],F58_KA1 + add G5_CONST,1023,G5_CONST + ldd [G1_CONST_TBL+2072],F62_K256ONLN2 + ld [G1_CONST_TBL+2080],I5_THRESHOLD + ld [G1_CONST_TBL+2084],THRESHOLDL + + subcc %i0,8,%i0 + bneg,pn %icc,.tail + fzeros %f3 + +.main_loop_preload: + +! preload 8 elements and get absolute values + ld [%i1],%l0 ! (0) Xi = ((int*)px)[0] + fzeros %f5 + ld [%i1],%f16 ! (0) X = px[0] + fzeros %f7 + add %i1,%i2,%o5 ! px += stridex + ld [%o5],%l1 ! (1) Xi = ((int*)px)[0] + and %l0,G5_CONST,%l0 ! (0) ax = Xi & 0x7fffffff + fzeros %f9 + ld [%o5],%f2 ! (1) X = px[0] + fzeros %f11 + add %o5,%i2,%i1 ! px += stridex + ld [%i1],%l2 ! (2) Xi = ((int*)px)[0] + and %l1,G5_CONST,%l1 ! (1) ax = Xi & 0x7fffffff + fzeros %f13 + ld [%i1],%f4 ! (2) X = px[0] + fzeros %f15 + add %i1,%i2,%o5 ! px += stridex + ld [%o5],%l3 ! (3) Xi = ((int*)px)[0] + and %l2,G5_CONST,%l2 ! (2) ax = Xi & 0x7fffffff + fzeros %f17 + ld [%o5],%f6 ! (3) X = px[0] + add %o5,%i2,%o0 ! px += stridex + ld [%o0],%l4 ! (4) Xi = ((int*)px)[0] + and %l3,G5_CONST,%l3 ! (3) ax = Xi & 0x7fffffff + add %o0,%i2,%o1 ! px += stridex + ld [%o1],%l5 ! (5) Xi = ((int*)px)[0] + add %o1,%i2,%o2 ! px += stridex + ld [%o2],%l6 ! (6) Xi = ((int*)px)[0] + and %l4,G5_CONST,%l4 ! (4) ax = Xi & 0x7fffffff + add %o2,%i2,%o3 ! px += stridex + ld [%o3],%l7 ! (7) Xi = ((int*)px)[0] + add %o3,%i2,%i1 ! px += stridex + and %l5,G5_CONST,%l5 ! (5) ax = Xi & 0x7fffffff + and %l6,G5_CONST,%l6 ! (6) ax = Xi & 0x7fffffff + ba .main_loop + and %l7,G5_CONST,%l7 ! (7) ax = Xi & 0x7fffffff + + .align 16 +.main_loop: + cmp %l0,I5_THRESHOLD + bg,pn %icc,.spec0 ! (0) if (ax > THRESHOLD) + lda [%o0]%asi,%f8 ! (4) X = px[0] + fstod %f16,%f40 ! (0) y = (double) X +.spec0_cont: + cmp %l1,I5_THRESHOLD + bg,pn %icc,.spec1 ! (1) if (ax > THRESHOLD) + lda [%o1]%asi,%f10 ! (5) X = px[0] + fstod %f2,%f42 ! (1) y = (double) X +.spec1_cont: + cmp %l2,I5_THRESHOLD + bg,pn %icc,.spec2 ! (2) if (ax > THRESHOLD) + lda [%o2]%asi,%f12 ! (6) X = px[0] + fstod %f4,%f44 ! (2) y = (double) X +.spec2_cont: + cmp %l3,I5_THRESHOLD + bg,pn %icc,.spec3 ! (3) if (ax > THRESHOLD) + lda [%o3]%asi,%f14 ! (7) X = px[0] + fstod %f6,%f46 ! (3) y = (double) X +.spec3_cont: + cmp %l4,I5_THRESHOLD + bg,pn %icc,.spec4 ! (4) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2 + fstod %f8,%f48 ! (4) y = (double) X +.spec4_cont: + cmp %l5,I5_THRESHOLD + bg,pn %icc,.spec5 ! (5) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2 + fstod %f10,%f50 ! (5) y = (double) X +.spec5_cont: + cmp %l6,I5_THRESHOLD + bg,pn %icc,.spec6 ! (6) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2 + fstod %f12,%f52 ! (6) y = (double) X +.spec6_cont: + cmp %l7,I5_THRESHOLD + bg,pn %icc,.spec7 ! (7) if (ax > THRESHOLD) + fmuld F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2 + fstod %f14,%f54 ! (7) y = (double) X +.spec7_cont: + fdtoi %f40,%f16 ! (0) k = (int) y + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2 + + fdtoi %f42,%f2 ! (1) k = (int) y + st %f2,[%fp+tmp1] + fmuld F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2 + + fdtoi %f44,%f4 ! (2) k = (int) y + st %f4,[%fp+tmp2] + fmuld F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2 + + fdtoi %f46,%f6 ! (3) k = (int) y + st %f6,[%fp+tmp3] + fmuld F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2 + + fdtoi %f48,%f8 ! (4) k = (int) y + st %f8,[%fp+tmp4] + + fdtoi %f50,%f10 ! (5) k = (int) y + st %f10,[%fp+tmp5] + + fitod %f16,%f34 ! (0) dtmp = (double) k + fpackfix %f16,%f16 ! (0) ((float*)&drez)[0] = vis_fpackfix(drez) + nop + nop + + fdtoi %f52,%f12 ! (6) k = (int) y + st %f12,[%fp+tmp6] + + fdtoi %f54,%f14 ! (7) k = (int) y + st %f14,[%fp+tmp7] + + lda [%i1]%asi,%l0 ! (8) Xi = ((int*)px)[0] + add %i1,%i2,%o5 ! px += stridex + fitod %f2,%f18 ! (1) dtmp = (double) k + fpackfix %f2,%f2 ! (1) ((float*)&drez)[0] = vis_fpackfix(drez) + + lda [%o5]%asi,%l1 ! (9) Xi = ((int*)px)[0] + add %o5,%i2,%i1 ! px += stridex + fitod %f4,%f20 ! (2) dtmp = (double) k + fpackfix %f4,%f4 ! (2) ((float*)&drez)[0] = vis_fpackfix(drez) + + lda [%i1]%asi,%l2 ! (10) Xi = ((int*)px)[0] + add %i1,%i2,%o5 ! px += stridex + fitod %f6,%f22 ! (3) dtmp = (double) k + fpackfix %f6,%f6 ! (3) ((float*)&drez)[0] = vis_fpackfix(drez) + + lda [%o5]%asi,%l3 ! (11) Xi = ((int*)px)[0] + add %o5,%i2,%i1 ! px += stridex + fitod %f8,%f24 ! (4) dtmp = (double) k + fpackfix %f8,%f8 ! (4) ((float*)&drez)[0] = vis_fpackfix(drez) + + fitod %f10,%f26 ! (5) dtmp = (double) k + fpackfix %f10,%f10 ! (5) ((float*)&drez)[0] = vis_fpackfix(drez) + + fitod %f12,%f28 ! (6) dtmp = (double) k + fpackfix %f12,%f12 ! (6) ((float*)&drez)[0] = vis_fpackfix(drez) + + fitod %f14,%f30 ! (7) dtmp = (double) k + fpackfix %f14,%f14 ! (7) ((float*)&drez)[0] = vis_fpackfix(drez) + + ld [%fp+tmp0],%o0 ! (0) load k + and %l0,G5_CONST,%l0 ! (8) ax = Xi & 0x7fffffff + fsubd %f40,%f34,%f40 ! (0) y -= dtmp + + ld [%fp+tmp1],%o1 ! (1) load k + and %l1,G5_CONST,%l1 ! (9) ax = Xi & 0x7fffffff + fsubd %f42,%f18,%f42 ! (1) y -= dtmp + + ld [%fp+tmp2],%o2 ! (2) load k + and %l2,G5_CONST,%l2 ! (10) ax = Xi & 0x7fffffff + and %o0,255,%o0 ! (0) k &= 255 + fsubd %f44,%f20,%f44 ! (2) y -= dtmp + + ld [%fp+tmp3],%o3 ! (3) load k + and %o1,255,%o1 ! (1) k &= 255 + fsubd %f46,%f22,%f46 ! (3) y -= dtmp + + sll %o0,3,%o0 ! (0) k <<= 3 + sll %o1,3,%o1 ! (1) k <<= 3 + fmuld F60_KA2,%f40,%f34 ! (0) dtmp = y * KA2 + fsubd %f48,%f24,%f48 ! (4) y -= dtmp + + and %l3,G5_CONST,%l3 ! (11) ax = Xi & 0x7fffffff + and %o2,255,%o2 ! (2) k &= 255 + fmuld F60_KA2,%f42,%f18 ! (1) dtmp = y * KA2 + fsubd %f50,%f26,%f50 ! (5) y -= dtmp + + sll %o2,3,%o2 ! (2) k <<= 3 + fmuld F60_KA2,%f44,%f20 ! (2) dtmp = y * KA2 + fsubd %f52,%f28,%f52 ! (6) y -= dtmp + + ld [%fp+tmp4],%o4 ! (4) load k + and %o3,255,%o3 ! (3) k &= 255 + fmuld F60_KA2,%f46,%f22 ! (3) dtmp = y * KA2 + fsubd %f54,%f30,%f54 ! (7) y -= dtmp + + ld [%fp+tmp5],%o5 ! (5) load k + sll %o3,3,%o3 ! (3) k <<= 3 + fmuld F60_KA2,%f48,%f24 ! (4) dtmp = y * KA2 + faddd F58_KA1,%f34,%f34 ! (0) dtmp += KA1 + + ld [%fp+tmp6],%o7 ! (6) load k + and %o4,255,%o4 ! (4) k &= 255 + fmuld F60_KA2,%f50,%f26 ! (5) dtmp = y * KA2 + faddd F58_KA1,%f18,%f18 ! (1) dtmp += KA1 + + ld [%fp+tmp7],%l4 ! (7) load k + and %o5,255,%o5 ! (5) k &= 255 + fmuld F60_KA2,%f52,%f28 ! (6) dtmp = y * KA2 + faddd F58_KA1,%f20,%f20 ! (2) dtmp += KA1 + + sll %o5,3,%o5 ! (5) k <<= 3 + fmuld F60_KA2,%f54,%f30 ! (7) dtmp = y * KA2 + faddd F58_KA1,%f22,%f22 ! (3) dtmp += KA1 + + fmuld %f34,%f40,%f40 ! (0) y *= dtmp + ldd [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0] + and %l4,255,%l4 ! (7) k &= 255 + faddd F58_KA1,%f24,%f24 ! (4) dtmp += KA1 + + fmuld %f18,%f42,%f42 ! (1) y *= dtmp + ldd [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0] + sll %l4,3,%l4 ! (7) k <<= 3 + faddd F58_KA1,%f26,%f26 ! (5) dtmp += KA1 + + fmuld %f20,%f44,%f44 ! (2) y *= dtmp + ldd [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0] + faddd F58_KA1,%f28,%f28 ! (6) dtmp += KA1 + + fmuld %f22,%f46,%f46 ! (3) y *= dtmp + ldd [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0] + sll %o4,3,%o4 ! (4) k <<= 3 + faddd F58_KA1,%f30,%f30 ! (7) dtmp += KA1 + + fmuld %f24,%f48,%f48 ! (4) y *= dtmp + ldd [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0] + and %o7,255,%o7 ! (6) k &= 255 + fpadd32 %f16,%f34,%f34 ! (0) drez = vis_fpadd32(drez,dtmp) + + fmuld %f26,%f50,%f50 ! (5) y *= dtmp + ldd [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0] + sll %o7,3,%o7 ! (6) k <<= 3 + fpadd32 %f2,%f18,%f18 ! (1) drez = vis_fpadd32(drez,dtmp) + + fmuld %f28,%f52,%f52 ! (6) y *= dtmp + ldd [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0] + sll %i2,2,%o0 + fpadd32 %f4,%f20,%f20 ! (2) drez = vis_fpadd32(drez,dtmp) + + fmuld %f30,%f54,%f54 ! (7) y *= dtmp + ldd [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0] + sub %i1,%o0,%o0 + fpadd32 %f6,%f22,%f22 ! (3) drez = vis_fpadd32(drez,dtmp) + + lda [%i1]%asi,%l4 ! (12) Xi = ((int*)px)[0] + add %i1,%i2,%o1 ! px += stridex + fpadd32 %f8,%f24,%f24 ! (4) drez = vis_fpadd32(drez,dtmp) + fmuld %f34,%f40,%f40 ! (0) y *= drez + + lda [%o1]%asi,%l5 ! (13) Xi = ((int*)px)[0] + add %o1,%i2,%o2 ! px += stridex + fpadd32 %f10,%f26,%f26 ! (5) drez = vis_fpadd32(drez,dtmp) + fmuld %f18,%f42,%f42 ! (1) y *= drez + + lda [%o2]%asi,%l6 ! (14) Xi = ((int*)px)[0] + add %o2,%i2,%o3 ! px += stridex + fpadd32 %f12,%f28,%f28 ! (6) drez = vis_fpadd32(drez,dtmp) + fmuld %f20,%f44,%f44 ! (2) y *= drez + + lda [%o3]%asi,%l7 ! (15) Xi = ((int*)px)[0] + add %o3,%i2,%i1 ! px += stridex + fpadd32 %f14,%f30,%f30 ! (7) drez = vis_fpadd32(drez,dtmp) + fmuld %f22,%f46,%f46 ! (3) y *= drez + + lda [%o0]%asi,%f16 ! (8) X = px[0] + add %o0,%i2,%o5 + fmuld %f24,%f48,%f48 ! (4) y *= drez + faddd %f34,%f40,%f40 ! (0) y += drez + + lda [%o5]%asi,%f2 ! (9) X = px[0] + add %o5,%i2,%o0 + fmuld %f26,%f50,%f50 ! (5) y *= drez + faddd %f18,%f42,%f42 ! (1) y += drez + + lda [%o0]%asi,%f4 ! (10) X = px[0] + add %o0,%i2,%o5 + fmuld %f28,%f52,%f52 ! (6) y *= drez + faddd %f20,%f44,%f44 ! (2) y += drez + + lda [%o5]%asi,%f6 ! (11) X = px[0] + add %o5,%i2,%o0 + fmuld %f30,%f54,%f54 ! (7) y *= drez + faddd %f22,%f46,%f46 ! (3) y += drez + + and %l4,G5_CONST,%l4 ! (12) ax = Xi & 0x7fffffff + faddd %f24,%f48,%f48 ! (4) y += drez + + and %l5,G5_CONST,%l5 ! (13) ax = Xi & 0x7fffffff + faddd %f26,%f50,%f50 ! (5) y += drez + + and %l6,G5_CONST,%l6 ! (14) ax = Xi & 0x7fffffff + faddd %f28,%f52,%f52 ! (6) y += drez + + and %l7,G5_CONST,%l7 ! (15) ax = Xi & 0x7fffffff + faddd %f30,%f54,%f54 ! (7) y += drez + + fdtos %f40,%f26 ! (0) (float) y + st %f26,[%i3] + add %i3,%i4,%o4 ! py += stridey + + fdtos %f42,%f18 ! (1) (float) y + st %f18,[%o4] + add %o4,%i4,%i3 ! py += stridey + + fdtos %f44,%f20 ! (2) (float) y + st %f20,[%i3] + add %i3,%i4,%o4 ! py += stridey + + fdtos %f46,%f22 ! (3) (float) y + st %f22,[%o4] + add %o4,%i4,%i3 ! py += stridey + + fdtos %f48,%f24 ! (4) (float) y + st %f24,[%i3] + subcc %i0,8,%i0 + add %i3,%i4,%o4 ! py += stridey + + fdtos %f50,%f26 ! (5) (float) y + st %f26,[%o4] + add %o4,%i4,%o5 ! py += stridey + add %i4,%i4,%o7 + + fdtos %f52,%f28 ! (6) (float) y + st %f28,[%o5] + add %o5,%i4,%o4 ! py += stridey + add %o5,%o7,%i3 ! py += stridey + + fdtos %f54,%f30 ! (7) (float) y + st %f30,[%o4] + bpos,pt %icc,.main_loop + nop +.after_main_loop: + sll %i2,3,%o2 + sub %i1,%o2,%i1 + +.tail: + add %i0,8,%i0 + subcc %i0,1,%i0 + bneg,pn %icc,.exit + + ld [%i1],%l0 + ld [%i1],%f2 + add %i1,%i2,%i1 + +.tail_loop: + and %l0,G5_CONST,%l1 + cmp %l1,I5_THRESHOLD + bg,pn %icc,.tail_spec + nop +.tail_spec_cont: + fstod %f2,%f40 + fmuld F62_K256ONLN2,%f40,%f40 + fdtoi %f40,%f2 + st %f2,[%fp+tmp0] + fitod %f2,%f16 + fpackfix %f2,%f2 + fsubd %f40,%f16,%f40 + fmuld F60_KA2,%f40,%f16 + faddd F58_KA1,%f16,%f16 + ld [%fp+tmp0],%o0 + fmuld %f16,%f40,%f40 + and %o0,255,%o0 + sll %o0,3,%o0 + ldd [G1_CONST_TBL+%o0],%f16 + fpadd32 %f2,%f16,%f16 + lda [%i1]%asi,%l0 + fmuld %f16,%f40,%f40 + lda [%i1]%asi,%f2 + faddd %f16,%f40,%f40 + add %i1,%i2,%i1 + fdtos %f40,%f16 + st %f16,[%i3] + add %i3,%i4,%i3 + subcc %i0,1,%i0 + bpos,pt %icc,.tail_loop + nop + +.exit: + ret + restore + +.tail_spec: + sethi %hi(0x7f800000),%o4 + cmp %l1,%o4 + bl,pt %icc,.tail_spec_out_of_range + nop + + srl %l0,29,%l0 + ble,pn %icc,.tail_spec_inf + andcc %l0,4,%g0 + +! NaN -> NaN + + fmuls %f2,%f2,%f2 + ba .tail_spec_exit + st %f2,[%i3] + +.tail_spec_inf: + be,a,pn %icc,.tail_spec_exit + st %f2,[%i3] + + ba .tail_spec_exit + st %f3,[%i3] + +.tail_spec_out_of_range: + fcmpes %fcc0,%f2,%f3 + fcmpes %fcc1,%f2,THRESHOLDL + fbl,pn %fcc0,1f ! if ( X < 0.0f ) + nop + fbl,pt %fcc1,.tail_spec_cont ! if ( X < THRESHOLDL ) + nop +1: + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.tail_spec_exit: + lda [%i1]%asi,%l0 + lda [%i1]%asi,%f2 + add %i1,%i2,%i1 + + subcc %i0,1,%i0 + bpos,pt %icc,.tail_loop + add %i3,%i4,%i3 + ba .exit + nop + + .align 16 +.spec0: + sethi %hi(0x7f800000),%o5 + cmp %l0,%o5 + bl,pt %icc,.spec0_out_of_range + sll %i2,3,%o4 + + ble,pn %icc,.spec0_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f16,%f16,%f16 + ba .spec0_exit + st %f16,[%i3] + +.spec0_inf: + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec0_exit + st %f16,[%i3] + + ba .spec0_exit + st %f3,[%i3] + +.spec0_out_of_range: + fcmpes %fcc0,%f16,%f3 + fcmpes %fcc1,%f16,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f16,%f40 ! (0) y = (double) X + fbl,a,pt %fcc1,.spec0_cont ! if ( X < THRESHOLDL ) + fstod %f16,%f40 ! (0) y = (double) X +1: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f16 + fmuls %f16,%f16,%f16 + st %f16,[%i3] + +.spec0_exit: + fmovs %f2,%f16 + mov %l1,%l0 + fmovs %f4,%f2 + mov %l2,%l1 + fmovs %f6,%f4 + mov %l3,%l2 + fmovs %f8,%f6 + mov %l4,%l3 + mov %l5,%l4 + mov %l6,%l5 + mov %l7,%l6 + lda [%i1]%asi,%l7 + add %i1,%i2,%i1 + mov %o1,%o0 + mov %o2,%o1 + mov %o3,%o2 + and %l7,G5_CONST,%l7 + add %o2,%i2,%o3 + + subcc %i0,1,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec1: + sethi %hi(0x7f800000),%o5 + cmp %l1,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f2,%f3 + fcmpes %fcc1,%f2,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f2,%f42 ! (1) y = (double) X + fbl,a,pt %fcc1,.spec1_cont ! if ( X < THRESHOLDL ) + fstod %f2,%f42 ! (1) y = (double) X +1: + fmuld F62_K256ONLN2,%f40,%f40 + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fitod %f16,%f34 + fpackfix %f16,%f16 + fsubd %f40,%f34,%f40 + fmuld F60_KA2,%f40,%f34 + faddd F58_KA1,%f34,%f34 + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + and %o0,255,%o0 + sll %o0,3,%o0 + ldd [G1_CONST_TBL+%o0],%f34 + fpadd32 %f16,%f34,%f34 + fmuld %f34,%f40,%f40 + faddd %f34,%f40,%f40 + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%i3 + + cmp %l1,%o5 + bl,pt %icc,.spec1_out_of_range + sll %i2,3,%o4 + + ble,pn %icc,.spec1_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f2,%f2,%f2 + ba .spec1_exit + st %f2,[%i3] + +.spec1_inf: + add %o4,%i2,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec1_exit + st %f2,[%i3] + + ba .spec1_exit + st %f3,[%i3] + +.spec1_out_of_range: + sub %i1,%o4,%o4 + add %o4,%i2,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec1_exit: + fmovs %f4,%f16 + mov %l2,%l0 + fmovs %f6,%f2 + mov %l3,%l1 + fmovs %f8,%f4 + mov %l4,%l2 + fmovs %f10,%f6 + mov %l5,%l3 + mov %l6,%l4 + mov %l7,%l5 + lda [%i1]%asi,%l6 + add %i1,%i2,%i1 + lda [%i1]%asi,%l7 + add %i1,%i2,%i1 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + mov %o2,%o0 + mov %o3,%o1 + add %o1,%i2,%o2 + add %o2,%i2,%o3 + + subcc %i0,2,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec2: + sethi %hi(0x7f800000),%o5 + cmp %l2,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f4,%f3 + fcmpes %fcc1,%f4,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f4,%f44 ! (2) y = (double) X + fbl,a,pt %fcc1,.spec2_cont ! if ( X < THRESHOLDL ) + fstod %f4,%f44 ! (2) y = (double) X +1: + fmuld F62_K256ONLN2,%f40,%f40 + + fmuld F62_K256ONLN2,%f42,%f42 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fsubd %f40,%f34,%f40 + + fsubd %f42,%f18,%f42 + + fmuld F60_KA2,%f40,%f34 + + fmuld F60_KA2,%f42,%f18 + + faddd F58_KA1,%f34,%f34 + + faddd F58_KA1,%f18,%f18 + + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + + ld [%fp+tmp1],%o1 + fmuld %f18,%f42,%f42 + + and %o0,255,%o0 + + and %o1,255,%o1 + + sll %o0,3,%o0 + + sll %o1,3,%o1 + + ldd [G1_CONST_TBL+%o0],%f34 + + ldd [G1_CONST_TBL+%o1],%f18 + + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + cmp %l2,%o5 + sll %i2,1,%o5 + bl,pt %icc,.spec2_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec2_inf + add %o4,%o5,%o4 + +! NaN -> NaN + + fmuls %f4,%f4,%f4 + ba .spec2_exit + st %f4,[%i3] + +.spec2_inf: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec2_exit + st %f4,[%i3] + + ba .spec2_exit + st %f3,[%i3] + +.spec2_out_of_range: + add %o4,%o5,%o4 + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec2_exit: + fmovs %f6,%f16 + mov %l3,%l0 + mov %o3,%o0 + fmovs %f8,%f2 + mov %l4,%l1 + add %o0,%i2,%o1 + fmovs %f10,%f4 + mov %l5,%l2 + add %o1,%i2,%o2 + fmovs %f12,%f6 + mov %l6,%l3 + mov %l7,%l4 + lda [%i1]%asi,%l5 + add %i1,%i2,%i1 + add %o2,%i2,%o3 + lda [%i1]%asi,%l6 + add %i1,%i2,%i1 + lda [%i1]%asi,%l7 + add %i1,%i2,%i1 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,3,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop +.spec3: + sethi %hi(0x7f800000),%o5 + cmp %l3,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f6,%f3 + fcmpes %fcc1,%f6,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f6,%f46 ! (3) y = (double) X + fbl,a,pt %fcc1,.spec3_cont ! if ( X < THRESHOLDL ) + fstod %f6,%f46 ! (3) y = (double) X +1: + fmuld F62_K256ONLN2,%f40,%f40 + + fmuld F62_K256ONLN2,%f42,%f42 + + fmuld F62_K256ONLN2,%f44,%f44 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fsubd %f40,%f34,%f40 + + fsubd %f42,%f18,%f42 + + fsubd %f44,%f20,%f44 + + fmuld F60_KA2,%f40,%f34 + + fmuld F60_KA2,%f42,%f18 + + fmuld F60_KA2,%f44,%f20 + + faddd F58_KA1,%f34,%f34 + + faddd F58_KA1,%f18,%f18 + + faddd F58_KA1,%f20,%f20 + + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + + ld [%fp+tmp1],%o1 + fmuld %f18,%f42,%f42 + + ld [%fp+tmp2],%o2 + fmuld %f20,%f44,%f44 + + and %o0,255,%o0 + and %o1,255,%o1 + + and %o2,255,%o2 + sll %o0,3,%o0 + + sll %o1,3,%o1 + sll %o2,3,%o2 + + ldd [G1_CONST_TBL+%o0],%f34 + + ldd [G1_CONST_TBL+%o1],%f18 + + ldd [G1_CONST_TBL+%o2],%f20 + + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%i3 + + cmp %l3,%o5 + bl,pt %icc,.spec3_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec3_inf + add %o4,%i2,%o4 + +! NaN -> NaN + + fmuls %f6,%f6,%f6 + ba .spec3_exit + st %f6,[%i3] + +.spec3_inf: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec3_exit + st %f6,[%i3] + + ba .spec3_exit + st %f3,[%i3] + +.spec3_out_of_range: + add %o4,%i2,%o4 + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec3_exit: + fmovs %f8,%f16 + mov %l4,%l0 + fmovs %f10,%f2 + mov %l5,%l1 + fmovs %f12,%f4 + mov %l6,%l2 + fmovs %f14,%f6 + mov %l7,%l3 + mov %i1,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,4,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec4: + sethi %hi(0x7f800000),%o5 + cmp %l4,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f8,%f3 + fcmpes %fcc1,%f8,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f8,%f48 ! (4) y = (double) X + fbl,a,pt %fcc1,.spec4_cont ! if ( X < THRESHOLDL ) + fstod %f8,%f48 ! (4) y = (double) X +1: + fmuld F62_K256ONLN2,%f42,%f42 + + fmuld F62_K256ONLN2,%f44,%f44 + + fmuld F62_K256ONLN2,%f46,%f46 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fsubd %f40,%f34,%f40 + + fsubd %f42,%f18,%f42 + + fsubd %f44,%f20,%f44 + + fsubd %f46,%f22,%f46 + + fmuld F60_KA2,%f40,%f34 + + fmuld F60_KA2,%f42,%f18 + + fmuld F60_KA2,%f44,%f20 + + fmuld F60_KA2,%f46,%f22 + + faddd F58_KA1,%f34,%f34 + + faddd F58_KA1,%f18,%f18 + + faddd F58_KA1,%f20,%f20 + + faddd F58_KA1,%f22,%f22 + + ld [%fp+tmp0],%o0 + fmuld %f34,%f40,%f40 + + ld [%fp+tmp1],%o1 + fmuld %f18,%f42,%f42 + + ld [%fp+tmp2],%o2 + fmuld %f20,%f44,%f44 + + ld [%fp+tmp3],%o3 + fmuld %f22,%f46,%f46 + + and %o0,255,%o0 + and %o1,255,%o1 + + and %o2,255,%o2 + and %o3,255,%o3 + + sll %o0,3,%o0 + sll %o1,3,%o1 + + sll %o2,3,%o2 + sll %o3,3,%o3 + + ldd [G1_CONST_TBL+%o0],%f34 + + ldd [G1_CONST_TBL+%o1],%f18 + + ldd [G1_CONST_TBL+%o2],%f20 + + ldd [G1_CONST_TBL+%o3],%f22 + + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + cmp %l4,%o5 + bl,pt %icc,.spec4_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec4_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f8,%f8,%f8 + ba .spec4_exit + st %f8,[%i3] + +.spec4_inf: + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec4_exit + st %f8,[%i3] + + ba .spec4_exit + st %f3,[%i3] + +.spec4_out_of_range: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec4_exit: + fmovs %f10,%f16 + mov %l5,%l0 + fmovs %f12,%f2 + mov %l6,%l1 + fmovs %f14,%f4 + mov %l7,%l2 + lda [%i1]%asi,%l3 + lda [%i1]%asi,%f6 + add %i1,%i2,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l3,G5_CONST,%l3 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,5,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec5: + sethi %hi(0x7f800000),%o5 + cmp %l5,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f10,%f3 + fcmpes %fcc1,%f10,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f10,%f50 ! (5) y = (double) X + fbl,a,pt %fcc1,.spec5_cont ! if ( X < THRESHOLDL ) + fstod %f10,%f50 ! (5) y = (double) X +1: + fmuld F62_K256ONLN2,%f44,%f44 + + fmuld F62_K256ONLN2,%f46,%f46 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fdtoi %f48,%f8 + st %f8,[%fp+tmp4] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fitod %f8,%f24 + fpackfix %f8,%f8 + + ld [%fp+tmp0],%o0 + fsubd %f40,%f34,%f40 + + ld [%fp+tmp1],%o1 + fsubd %f42,%f18,%f42 + + ld [%fp+tmp2],%o2 + and %o0,255,%o0 + fsubd %f44,%f20,%f44 + + ld [%fp+tmp3],%o3 + and %o1,255,%o1 + fsubd %f46,%f22,%f46 + + sll %o0,3,%o0 + sll %o1,3,%o1 + fmuld F60_KA2,%f40,%f34 + fsubd %f48,%f24,%f48 + + and %o2,255,%o2 + fmuld F60_KA2,%f42,%f18 + + sll %o2,3,%o2 + fmuld F60_KA2,%f44,%f20 + + ld [%fp+tmp4],%o4 + and %o3,255,%o3 + fmuld F60_KA2,%f46,%f22 + + sll %o3,3,%o3 + fmuld F60_KA2,%f48,%f24 + faddd F58_KA1,%f34,%f34 + + and %o4,255,%o4 + faddd F58_KA1,%f18,%f18 + + faddd F58_KA1,%f20,%f20 + + faddd F58_KA1,%f22,%f22 + + fmuld %f34,%f40,%f40 + ldd [G1_CONST_TBL+%o0],%f34 + faddd F58_KA1,%f24,%f24 + + fmuld %f18,%f42,%f42 + ldd [G1_CONST_TBL+%o1],%f18 + + fmuld %f20,%f44,%f44 + ldd [G1_CONST_TBL+%o2],%f20 + + fmuld %f22,%f46,%f46 + ldd [G1_CONST_TBL+%o3],%f22 + sll %o4,3,%o4 + + fmuld %f24,%f48,%f48 + ldd [G1_CONST_TBL+%o4],%f24 + fpadd32 %f16,%f34,%f34 + + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fpadd32 %f8,%f24,%f24 + fmuld %f34,%f40,%f40 + + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + fmuld %f24,%f48,%f48 + faddd %f34,%f40,%f40 + + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + faddd %f24,%f48,%f48 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + fdtos %f48,%f24 + st %f24,[%i3] + add %i3,%i4,%i3 + + cmp %l5,%o5 + bl,pt %icc,.spec5_out_of_range + sll %i2,2,%o4 + + ble,pn %icc,.spec5_inf + sub %o4,%i2,%o4 + +! NaN -> NaN + + fmuls %f10,%f10,%f10 + ba .spec5_exit + st %f10,[%i3] + +.spec5_inf: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec5_exit + st %f10,[%i3] + + ba .spec5_exit + st %f3,[%i3] + +.spec5_out_of_range: + sub %o4,%i2,%o4 + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec5_exit: + fmovs %f12,%f16 + mov %l6,%l0 + fmovs %f14,%f2 + mov %l7,%l1 + lda [%i1]%asi,%l2 + lda [%i1]%asi,%f4 + add %i1,%i2,%i1 + lda [%i1]%asi,%l3 + lda [%i1]%asi,%f6 + add %i1,%i2,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l2,G5_CONST,%l2 + and %l3,G5_CONST,%l3 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,6,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop +.spec6: + sethi %hi(0x7f800000),%o5 + cmp %l6,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f12,%f3 + fcmpes %fcc1,%f12,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f12,%f52 ! (6) y = (double) X + fbl,a,pt %fcc1,.spec6_cont ! if ( X < THRESHOLDL ) + fstod %f12,%f52 ! (6) y = (double) X +1: + fmuld F62_K256ONLN2,%f46,%f46 + + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + fmuld F62_K256ONLN2,%f50,%f50 + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fdtoi %f48,%f8 + st %f8,[%fp+tmp4] + + fdtoi %f50,%f10 + st %f10,[%fp+tmp5] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fitod %f8,%f24 + fpackfix %f8,%f8 + + fitod %f10,%f26 + fpackfix %f10,%f10 + + ld [%fp+tmp0],%o0 + fsubd %f40,%f34,%f40 + + ld [%fp+tmp1],%o1 + fsubd %f42,%f18,%f42 + + ld [%fp+tmp2],%o2 + and %o0,255,%o0 + fsubd %f44,%f20,%f44 + + ld [%fp+tmp3],%o3 + and %o1,255,%o1 + fsubd %f46,%f22,%f46 + + sll %o0,3,%o0 + sll %o1,3,%o1 + fmuld F60_KA2,%f40,%f34 + fsubd %f48,%f24,%f48 + + and %o2,255,%o2 + fmuld F60_KA2,%f42,%f18 + fsubd %f50,%f26,%f50 + + sll %o2,3,%o2 + fmuld F60_KA2,%f44,%f20 + + ld [%fp+tmp4],%o4 + and %o3,255,%o3 + fmuld F60_KA2,%f46,%f22 + + ld [%fp+tmp5],%o5 + sll %o3,3,%o3 + fmuld F60_KA2,%f48,%f24 + faddd F58_KA1,%f34,%f34 + + and %o4,255,%o4 + fmuld F60_KA2,%f50,%f26 + faddd F58_KA1,%f18,%f18 + + and %o5,255,%o5 + faddd F58_KA1,%f20,%f20 + + sll %o5,3,%o5 + faddd F58_KA1,%f22,%f22 + + fmuld %f34,%f40,%f40 + ldd [G1_CONST_TBL+%o0],%f34 + faddd F58_KA1,%f24,%f24 + + fmuld %f18,%f42,%f42 + ldd [G1_CONST_TBL+%o1],%f18 + faddd F58_KA1,%f26,%f26 + + fmuld %f20,%f44,%f44 + ldd [G1_CONST_TBL+%o2],%f20 + + fmuld %f22,%f46,%f46 + ldd [G1_CONST_TBL+%o3],%f22 + sll %o4,3,%o4 + + fmuld %f24,%f48,%f48 + ldd [G1_CONST_TBL+%o4],%f24 + fpadd32 %f16,%f34,%f34 + + fmuld %f26,%f50,%f50 + ldd [G1_CONST_TBL+%o5],%f26 + fpadd32 %f2,%f18,%f18 + + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fpadd32 %f8,%f24,%f24 + fmuld %f34,%f40,%f40 + + fpadd32 %f10,%f26,%f26 + fmuld %f18,%f42,%f42 + + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + fmuld %f24,%f48,%f48 + faddd %f34,%f40,%f40 + + fmuld %f26,%f50,%f50 + faddd %f18,%f42,%f42 + + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + faddd %f24,%f48,%f48 + + faddd %f26,%f50,%f50 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + fdtos %f48,%f24 + st %f24,[%i3] + add %i3,%i4,%o4 + + fdtos %f50,%f26 + st %f26,[%o4] + add %o4,%i4,%i3 + + sethi %hi(0x7f800000),%o5 + cmp %l6,%o5 + bl,pt %icc,.spec6_out_of_range + sll %i2,1,%o4 + + ble,pn %icc,.spec6_inf + sub %i1,%o4,%o4 + +! NaN -> NaN + + fmuls %f12,%f12,%f12 + ba .spec6_exit + st %f12,[%i3] + +.spec6_inf: + ld [%o4],%l0 + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec6_exit + st %f12,[%i3] + + ba .spec6_exit + st %f3,[%i3] + +.spec6_out_of_range: + sub %i1,%o4,%o4 + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec6_exit: + fmovs %f14,%f16 + mov %l7,%l0 + lda [%i1]%asi,%l1 + lda [%i1]%asi,%f2 + add %i1,%i2,%i1 + lda [%i1]%asi,%l2 + lda [%i1]%asi,%f4 + add %i1,%i2,%i1 + lda [%i1]%asi,%l3 + lda [%i1]%asi,%f6 + add %i1,%i2,%o0 + lda [%o0]%asi,%l4 + add %o0,%i2,%o1 + lda [%o1]%asi,%l5 + add %o1,%i2,%o2 + lda [%o2]%asi,%l6 + add %o2,%i2,%o3 + lda [%o3]%asi,%l7 + add %o3,%i2,%i1 + and %l1,G5_CONST,%l1 + and %l2,G5_CONST,%l2 + and %l3,G5_CONST,%l3 + and %l4,G5_CONST,%l4 + and %l5,G5_CONST,%l5 + and %l6,G5_CONST,%l6 + and %l7,G5_CONST,%l7 + + subcc %i0,7,%i0 + bpos,pt %icc,.main_loop + add %i3,%i4,%i3 + ba .after_main_loop + nop + + .align 16 +.spec7: + sethi %hi(0x7f800000),%o5 + cmp %l7,%o5 + bge,pn %icc,1f + nop + fcmpes %fcc0,%f14,%f3 + fcmpes %fcc1,%f14,THRESHOLDL + fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) + fstod %f14,%f54 ! (7) y = (double) X + fbl,a,pt %fcc1,.spec7_cont ! if ( X < THRESHOLDL ) + fstod %f14,%f54 ! (7) y = (double) X +1: + fdtoi %f40,%f16 + st %f16,[%fp+tmp0] + fmuld F62_K256ONLN2,%f48,%f48 + + fdtoi %f42,%f2 + st %f2,[%fp+tmp1] + fmuld F62_K256ONLN2,%f50,%f50 + + fdtoi %f44,%f4 + st %f4,[%fp+tmp2] + fmuld F62_K256ONLN2,%f52,%f52 + + fdtoi %f46,%f6 + st %f6,[%fp+tmp3] + + fdtoi %f48,%f8 + st %f8,[%fp+tmp4] + + fdtoi %f50,%f10 + st %f10,[%fp+tmp5] + + fdtoi %f52,%f12 + st %f12,[%fp+tmp6] + + fitod %f16,%f34 + fpackfix %f16,%f16 + + fitod %f2,%f18 + fpackfix %f2,%f2 + + fitod %f4,%f20 + fpackfix %f4,%f4 + + fitod %f6,%f22 + fpackfix %f6,%f6 + + fitod %f8,%f24 + fpackfix %f8,%f8 + + fitod %f10,%f26 + fpackfix %f10,%f10 + + fitod %f12,%f28 + fpackfix %f12,%f12 + + ld [%fp+tmp0],%o0 + fsubd %f40,%f34,%f40 + + ld [%fp+tmp1],%o1 + fsubd %f42,%f18,%f42 + + ld [%fp+tmp2],%o2 + and %o0,255,%o0 + fsubd %f44,%f20,%f44 + + ld [%fp+tmp3],%o3 + and %o1,255,%o1 + fsubd %f46,%f22,%f46 + + sll %o0,3,%o0 + sll %o1,3,%o1 + fmuld F60_KA2,%f40,%f34 + fsubd %f48,%f24,%f48 + + and %o2,255,%o2 + fmuld F60_KA2,%f42,%f18 + fsubd %f50,%f26,%f50 + + sll %o2,3,%o2 + fmuld F60_KA2,%f44,%f20 + fsubd %f52,%f28,%f52 + + ld [%fp+tmp4],%o4 + and %o3,255,%o3 + fmuld F60_KA2,%f46,%f22 + + ld [%fp+tmp5],%o5 + sll %o3,3,%o3 + fmuld F60_KA2,%f48,%f24 + faddd F58_KA1,%f34,%f34 + + ld [%fp+tmp6],%o7 + and %o4,255,%o4 + fmuld F60_KA2,%f50,%f26 + faddd F58_KA1,%f18,%f18 + + and %o5,255,%o5 + fmuld F60_KA2,%f52,%f28 + faddd F58_KA1,%f20,%f20 + + sll %o5,3,%o5 + faddd F58_KA1,%f22,%f22 + + fmuld %f34,%f40,%f40 + ldd [G1_CONST_TBL+%o0],%f34 + faddd F58_KA1,%f24,%f24 + + fmuld %f18,%f42,%f42 + ldd [G1_CONST_TBL+%o1],%f18 + faddd F58_KA1,%f26,%f26 + + fmuld %f20,%f44,%f44 + ldd [G1_CONST_TBL+%o2],%f20 + faddd F58_KA1,%f28,%f28 + + fmuld %f22,%f46,%f46 + ldd [G1_CONST_TBL+%o3],%f22 + sll %o4,3,%o4 + + fmuld %f24,%f48,%f48 + ldd [G1_CONST_TBL+%o4],%f24 + and %o7,255,%o7 + fpadd32 %f16,%f34,%f34 + + fmuld %f26,%f50,%f50 + ldd [G1_CONST_TBL+%o5],%f26 + sll %o7,3,%o7 + fpadd32 %f2,%f18,%f18 + + fmuld %f28,%f52,%f52 + ldd [G1_CONST_TBL+%o7],%f28 + fpadd32 %f4,%f20,%f20 + + fpadd32 %f6,%f22,%f22 + + fpadd32 %f8,%f24,%f24 + fmuld %f34,%f40,%f40 + + fpadd32 %f10,%f26,%f26 + fmuld %f18,%f42,%f42 + + fpadd32 %f12,%f28,%f28 + fmuld %f20,%f44,%f44 + + fmuld %f22,%f46,%f46 + + fmuld %f24,%f48,%f48 + faddd %f34,%f40,%f40 + + fmuld %f26,%f50,%f50 + faddd %f18,%f42,%f42 + + fmuld %f28,%f52,%f52 + faddd %f20,%f44,%f44 + + faddd %f22,%f46,%f46 + + faddd %f24,%f48,%f48 + + faddd %f26,%f50,%f50 + + faddd %f28,%f52,%f52 + + fdtos %f40,%f26 + st %f26,[%i3] + add %i3,%i4,%o4 + + fdtos %f42,%f18 + st %f18,[%o4] + add %o4,%i4,%i3 + + fdtos %f44,%f20 + st %f20,[%i3] + add %i3,%i4,%o4 + + fdtos %f46,%f22 + st %f22,[%o4] + add %o4,%i4,%i3 + + fdtos %f48,%f24 + st %f24,[%i3] + add %i3,%i4,%o4 + + fdtos %f50,%f26 + st %f26,[%o4] + add %o4,%i4,%i3 + + fdtos %f52,%f28 + st %f28,[%i3] + add %i3,%i4,%i3 + + sethi %hi(0x7f800000),%o5 + cmp %l7,%o5 + bl,pt %icc,.spec7_out_of_range + sub %i1,%i2,%o4 + + ble,pn %icc,.spec7_inf + ld [%o4],%l0 + +! NaN -> NaN + + fmuls %f14,%f14,%f14 + ba .spec7_exit + st %f14,[%i3] + +.spec7_inf: + srl %l0,29,%l0 + andcc %l0,4,%l0 + be,a,pn %icc,.spec7_exit + st %f14,[%i3] + + ba .spec7_exit + st %f3,[%i3] + +.spec7_out_of_range: + ld [%o4],%l0 + srl %l0,29,%l0 + and %l0,4,%l0 + add %l0,2048,%l0 + ld [G1_CONST_TBL+%l0],%f2 + fmuls %f2,%f2,%f2 + st %f2,[%i3] + +.spec7_exit: + subcc %i0,8,%i0 + bpos,pt %icc,.main_loop_preload + add %i3,%i4,%i3 + + ba .tail + nop + SET_SIZE(__vexpf) + diff --git a/usr/src/libm/src/mvec/vis/__vhypot.S b/usr/src/libm/src/mvec/vis/__vhypot.S new file mode 100644 index 0000000..7d1962b --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vhypot.S @@ -0,0 +1,1242 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vhypot.S 1.7 06/01/23 SMI" + + .file "__vhypot.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x7ff00000, 0 ! DC0 + .word 0x7fe00000, 0 ! DC1 + .word 0x00100000, 0 ! DC2 + .word 0x41b00000, 0 ! D2ON28 = 268435456.0 + .word 0x7fd00000, 0 ! DC3 + +#define counter %i0 +#define tmp_counter %l3 +#define tmp_px %l5 +#define tmp_py %o7 +#define stridex %i2 +#define stridey %i4 +#define stridez %l0 + +#define DC0 %f8 +#define DC0_HI %f8 +#define DC0_LO %f9 +#define DC1 %f46 +#define DC2 %f48 +#define DC3 %f0 +#define D2ON28 %f62 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&x)[0] = ((float*)px)[0]; +! ((float*)&x)[1] = ((float*)px)[1]; +! +! ((float*)&y)[0] = ((float*)py)[0]; +! ((float*)&y)[1] = ((float*)py)[1]; +! +! x = fabs(x); +! y = fabs(y); +! +! c0 = vis_fcmple32(DC1,x); +! c2 = vis_fcmple32(DC1,y); +! c1 = vis_fcmpgt32(DC2,x); +! c3 = vis_fcmpgt32(DC2,y); +! +! c0 |= c2; +! c1 &= c3; +! if ( (c0 & 2) != 0 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! hx = *(int*)px; +! hy = *(int*)py; +! +! hx &= 0x7fffffff; +! hy &= 0x7fffffff; +! +! j0 = hx; +! if ( j0 < hy ) j0 = hy; +! j0 &= 0x7ff00000; +! if ( j0 >= 0x7ff00000 ) +! { +! if ( hx == 0x7ff00000 && lx == 0 ) res = x == y ? y : x; +! else if ( hy == 0x7ff00000 && ly == 0 ) res = x == y ? x : y; +! else res = x * y; +! +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! } +! else +! { +! diff = hy - hx; +! j0 = diff >> 31; +! if ( ((diff ^ j0) - j0) < 0x03600000 ) +! {! +! x *= D2ONM1022; +! y *= D2ONM1022; +! +! x_hi = ( x + two28 ) - two28; +! x_lo = x - x_hi; +! y_hi = ( y + two28 ) - two28; +! y_lo = y - y_hi; +! res = (x_hi * x_hi + y_hi * y_hi); +! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); +! +! res = sqrt(res); +! +! res = D2ONP1022 * res; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! } +! else +! { +! res = x + y; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! } +! } +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! if ( (c1 & 2) != 0 ) +! { +! x *= D2ONP1022; +! y *= D2ONP1022; +! +! x_hi = ( x + two28 ) - two28; +! x_lo = x - x_hi; +! y_hi = ( y + two28 ) - two28; +! y_lo = y - y_hi; +! res = (x_hi * x_hi + y_hi * y_hi); +! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); +! +! res = sqrt(res); +! +! res = D2ONM1022 * res; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! +! dmax = x; +! if ( dmax < y ) dmax = y; +! +! dmax = vis_fand(dmax,DC0); +! dnorm = vis_fpsub32(DC1,dmax); +! +! x *= dnorm; +! y *= dnorm; +! +! x_hi = x + D2ON28; +! x_hi -= D2ON28; +! x_lo = x - x_hi; +! +! y_hi = y + D2ON28; +! y_hi -= D2ON28; +! y_lo = y - y_hi; +! +! res = x_hi * x_hi; +! dtmp1 = x + x_hi; +! dtmp0 = y_hi * y_hi; +! dtmp2 = y + y_hi; +! +! res += dtmp0; +! dtmp1 *= x_lo; +! dtmp2 *= y_lo; +! dtmp1 += dtmp2; +! res += dtmp1; +! +! res = sqrt(res); +! +! res = dmax * res; +! ((float*)pz)[0] = ((float*)&res)[0]; +! ((float*)pz)[1] = ((float*)&res)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vhypot) + save %sp,-SA(MINFRAME),%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o3) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],%l0 +#else + ld [%fp+STACK_BIAS+92],%l0 +#endif + ldd [%o3],DC0 + sll %i2,3,stridex + mov %i0,tmp_counter + + ldd [%o3+8],DC1 + sll %i4,3,stridey + mov %i1,tmp_px + + ldd [%o3+16],DC2 + sll %l0,3,stridez + mov %i3,tmp_py + + ldd [%o3+24],D2ON28 + + ldd [%o3+32],DC3 + +.begin: + mov tmp_counter,counter + mov tmp_px,%i1 + mov tmp_py,%i3 + clr tmp_counter +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i1]%asi,%o0 + sethi %hi(0x7ffffc00),%o5 + + lda [%i3]%asi,%o2 + add %o5,1023,%o5 + + lda [%i1]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; + + lda [%i1+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; + add %i1,stridex,%o1 ! px += stridex + + lda [%i3]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; + sethi %hi(0x00100000),%l7 + and %o0,%o5,%o0 + + lda [%i3+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; + and %o2,%o5,%o2 + sethi %hi(0x7fe00000),%l6 + + fabsd %f26,%f36 ! (1_0) x = fabs(x); + cmp %o0,%o2 + mov %o2,%l4 + + fabsd %f24,%f54 ! (1_0) y = fabs(y); + add %i3,stridey,%o5 ! py += stridey + movg %icc,%o0,%o2 + lda [%o5]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; + + cmp %o2,%l6 + sethi %hi(0x7ff00000),%o4 + bge,pn %icc,.spec0 + lda [%o5+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; + + cmp %o2,%l7 + bl,pn %icc,.spec1 + nop + lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; + + lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; + add %i3,stridey,%i3 ! py += stridey + + fabsd %f28,%f34 ! (2_0) y = fabs(y); + + fabsd %f26,%f50 ! (2_0) x = fabs(x); + + fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); + + fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); + + fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); + + fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); + + or %o3,%o0,%o3 ! (2_0) c0 |= c2; + + andcc %o3,2,%g0 ! (2_0) c0 & 2 + bnz,pn %icc,.update0 ! (2_0) if ( (c0 & 2) != 0 ) + and %o4,%o5,%o4 ! (2_0) c1 &= c3; +.cont0: + add %i3,stridey,%l4 ! py += stridey + andcc %o4,2,%g0 ! (2_0) c1 & 2 + bnz,pn %icc,.update1 ! (2_0) if ( (c1 & 2) != 0 ) + fmovd %f36,%f56 ! (1_0) dmax = x; +.cont1: + lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; + add %o1,stridex,%l2 ! px += stridex + + lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; + + lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0]; + + lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1]; + + fabsd %f30,%f30 ! (3_1) y = fabs(y); + + fabsd %f18,%f18 ! (3_1) x = fabs(x); + + fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y + + fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y; + + fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x); + + fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y); + + fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x); + + fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y); + + fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (3_1) c0 |= c2; + + andcc %o3,2,%g0 ! (3_1) c0 & 2 + bnz,pn %icc,.update2 ! (3_1) if ( (c0 & 2) != 0 ) + and %o4,%o1,%o4 ! (3_1) c1 &= c3; +.cont2: + add %l4,stridey,%i3 ! py += stridey + andcc %o4,2,%g0 ! (3_1) c1 & 2 + bnz,pn %icc,.update3 ! (3_1) if ( (c1 & 2) != 0 ) + fmovd %f50,%f32 ! (2_1) dmax = x; +.cont3: + fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax); + lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0]; + + lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1]; + + add %l2,stridex,%l1 ! px += stridex + + fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm; + lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0] + + lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm; + fabsd %f20,%f40 ! (0_0) y = fabs(y); + + fabsd %f22,%f20 ! (0_0) x = fabs(x); + + fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y + + + fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x); + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y); + + fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x); + + fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0); + + or %g5,%o2,%g5 ! (0_0) c0 |= c2; + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + andcc %g5,2,%g0 ! (0_0) c0 & 2 + bnz,pn %icc,.update4 ! (0_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; +.cont4: + and %g1,%o4,%g1 ! (0_0) c1 &= c3; + + add %i3,stridey,%l2 ! py += stridey + andcc %g1,2,%g0 ! (0_0) c1 & 2 + bnz,pn %icc,.update5 ! (0_0) if ( (c1 & 2) != 0 ) + fmovd %f18,%f44 ! (3_1) dmax = x; +.cont5: + fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax); + lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; + add %l1,stridex,%l7 ! px += stridex + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; + + fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm; + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm; + fabsd %f24,%f54 ! (1_0) y = fabs(y); + + fabsd %f26,%f36 ! (1_0) x = fabs(x); + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y; + + faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28; + fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x); + + faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28; + fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y); + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0); + + or %g1,%g5,%g1 ! (1_0) c0 |= c2; + fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28; + + andcc %g1,2,%g0 ! (1_0) c0 & 2 + bnz,pn %icc,.update6 ! (1_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28; +.cont6: + and %o5,%o1,%o5 ! (1_0) c1 &= c3; + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + add %l2,stridey,%i3 ! py += stridey + andcc %o5,2,%g0 ! (1_0) c1 & 2 + bnz,pn %icc,.update7 ! (1_0) if ( (c1 & 2) != 0 ) + fmovd %f20,%f4 ! (0_0) dmax = x; +.cont7: + fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax); + lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi; + lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; + add %l7,stridex,%o1 ! px += stridex + faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi; + + fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm; + fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi; + lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi; + fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi; + + fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm; + fabsd %f28,%f34 ! (2_0) y = fabs(y); + + fabsd %f26,%f50 ! (2_0) x = fabs(x); + + fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo; + fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y + + fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo; + + fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y; + + faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28; + fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); + + faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28; + fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); + + faddd %f2,%f44,%f30 ! (2_1) res += dtmp0; + fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (2_0) c0 |= c2; + fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28; + + andcc %o3,2,%g0 ! (2_0) c0 & 2 + bnz,pn %icc,.update8 ! (2_0) if ( (c0 & 2) != 0 ) + fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28; +.cont8: + and %o4,%o5,%o4 ! (2_0) c1 &= c3; + faddd %f30,%f26,%f12 ! (2_1) res += dtmp1; + + add %i3,stridey,%l4 ! py += stridey + andcc %o4,2,%g0 ! (2_0) c1 & 2 + bnz,pn %icc,.update9 ! (2_0) if ( (c1 & 2) != 0 ) + fmovd %f36,%f56 ! (1_0) dmax = x; +.cont9: + lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; + add %o1,stridex,%l2 ! px += stridex + fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax); + + fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi; + lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi; + + fsqrtd %f12,%f12 ! (2_1) res = sqrt(res); + faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi; + + cmp counter,4 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,4,counter + + .align 16 +.main_loop: + fmuld %f20,%f44,%f2 ! (0_1) x *= dnorm; + fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi; + lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0]; + + fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi; + lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1]; + fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi; + + fmuld %f40,%f44,%f44 ! (0_1) y *= dnorm; + fabsd %f30,%f30 ! (3_1) y = fabs(y); + + fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res; + fabsd %f18,%f18 ! (3_1) x = fabs(x); + st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo; + st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y + + fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo; + + fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y; + + faddd %f2,D2ON28,%f10 ! (0_1) x_hi = x + D2ON28; + fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x); + + faddd %f44,D2ON28,%f20 ! (0_1) y_hi = y + D2ON28; + fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y); + + faddd %f60,%f22,%f22 ! (3_2) res += dtmp0; + fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x); + + faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2; + fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y); + + fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (3_1) c0 |= c2; + fsubd %f10,D2ON28,%f58 ! (0_1) x_hi -= D2ON28; + + andcc %o3,2,%g0 ! (3_1) c0 & 2 + bnz,pn %icc,.update10 ! (3_1) if ( (c0 & 2) != 0 ) + fsubd %f20,D2ON28,%f56 ! (0_1) y_hi -= D2ON28; +.cont10: + faddd %f22,%f26,%f28 ! (3_2) res += dtmp1; + and %o4,%o1,%o4 ! (3_1) c1 &= c3; + + add %l4,stridey,%i3 ! py += stridey + andcc %o4,2,%g0 ! (3_1) c1 & 2 + bnz,pn %icc,.update11 ! (3_1) if ( (c1 & 2) != 0 ) + fmovd %f50,%f32 ! (2_1) dmax = x; +.cont11: + fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax); + add %l2,stridex,%l1 ! px += stridex + lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f58,%f58,%f6 ! (0_1) res = x_hi * x_hi; + lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1]; + add %i5,stridez,%l6 ! pz += stridez + faddd %f44,%f56,%f60 ! (0_1) dtmp2 = y + y_hi; + + fsqrtd %f28,%f4 ! (3_2) res = sqrt(res); + lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f2,%f58,%f24 ! (0_1) dtmp1 = x + x_hi; + + fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm; + fsubd %f2,%f58,%f26 ! (0_1) x_lo = x - x_hi; + lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f56,%f56,%f28 ! (0_1) dtmp0 = y_hi * y_hi; + fsubd %f44,%f56,%f44 ! (0_1) y_lo = y - y_hi; + + fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm; + fabsd %f20,%f40 ! (0_0) y = fabs(y); + + fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res; + fabsd %f22,%f20 ! (0_0) x = fabs(x); + st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f24,%f26,%f10 ! (0_1) dtmp1 *= x_lo; + st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y + + fmuld %f60,%f44,%f12 ! (0_1) dtmp2 *= y_lo; + + fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x); + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y); + + faddd %f6,%f28,%f24 ! (0_1) res += dtmp0; + fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f12,%f26 ! (0_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0); + + or %g5,%o2,%g5 ! (0_0) c0 |= c2; + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + andcc %g5,2,%g0 ! (0_0) c0 & 2 + bnz,pn %icc,.update12 ! (0_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; +.cont12: + and %g1,%o4,%g1 ! (0_0) c1 &= c3; + faddd %f24,%f26,%f12 ! (0_1) res += dtmp1; + + add %i3,stridey,%l2 ! py += stridey + andcc %g1,2,%g0 ! (0_0) c1 & 2 + bnz,pn %icc,.update13 ! (0_0) if ( (c1 & 2) != 0 ) + fmovd %f18,%f44 ! (3_1) dmax = x; +.cont13: + fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax); + add %l1,stridex,%l7 ! px += stridex + lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + add %l6,stridez,%i5 ! pz += stridez + lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + fsqrtd %f12,%f12 ! (0_1) res = sqrt(res); + lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + + fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm; + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm; + fabsd %f24,%f54 ! (1_0) y = fabs(y); + + fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res; + fabsd %f26,%f36 ! (1_0) x = fabs(x); + st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y; + + faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28; + fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x); + + faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28; + fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y); + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0); + + or %g1,%g5,%g1 ! (1_0) c0 |= c2; + fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28; + + andcc %g1,2,%g0 ! (1_0) c0 & 2 + bnz,pn %icc,.update14 ! (1_0) if ( (c0 & 2) != 0 ) + fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28; +.cont14: + and %o5,%o1,%o5 ! (1_0) c1 &= c3; + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + add %l2,stridey,%i3 ! py += stridey + andcc %o5,2,%g0 ! (1_0) c1 & 2 + bnz,pn %icc,.update15 ! (1_0) if ( (c1 & 2) != 0 ) + fmovd %f20,%f4 ! (0_0) dmax = x; +.cont15: + fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax); + add %l7,stridex,%o1 ! px += stridex + lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; + + fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi; + add %i5,stridez,%g5 ! pz += stridez + lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; + faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi; + + fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm; + fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi; + lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; + + fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi; + fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi; + + fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm; + fabsd %f28,%f34 ! (2_0) y = fabs(y); + + fmuld %f16,%f12,%f16 ! (0_1) res = dmax * res; + fabsd %f26,%f50 ! (2_0) x = fabs(x); + st %f16,[%g5] ! (0_1) ((float*)pz)[0] = ((float*)&res)[0]; + + fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo; + st %f17,[%g5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res)[1]; + fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y + + fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo; + + fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y; + + faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28; + fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); + + faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28; + fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); + + faddd %f2,%f44,%f30 ! (2_1) res += dtmp0; + fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); + + faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2; + fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); + + fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0); + + or %o3,%o0,%o3 ! (2_0) c0 |= c2; + fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28; + + andcc %o3,2,%g0 ! (2_0) c0 & 2 + bnz,pn %icc,.update16 ! (2_0) if ( (c0 & 2) != 0 ) + fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28; +.cont16: + and %o4,%o5,%o4 ! (2_0) c1 &= c3; + faddd %f30,%f26,%f12 ! (2_1) res += dtmp1; + + add %i3,stridey,%l4 ! py += stridey + andcc %o4,2,%g0 ! (2_0) c1 & 2 + bnz,pn %icc,.update17 ! (2_0) if ( (c1 & 2) != 0 ) + fmovd %f36,%f56 ! (1_0) dmax = x; +.cont17: + lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; + add %o1,stridex,%l2 ! px += stridex + fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax); + + fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi; + add %g5,stridez,%i5 ! pz += stridez + lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; + faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi; + + fsqrtd %f12,%f12 ! (2_1) res = sqrt(res); + subcc counter,4,counter ! counter -= 4; + bpos,pt %icc,.main_loop + faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi; + + add counter,4,counter + +.tail: + subcc counter,1,counter + bneg,a .begin + nop + + fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi; + + fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi; + fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi; + + fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res; + st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0]; + + st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1]; + + subcc counter,1,counter + bneg,a .begin + add %i5,stridez,%i5 + + fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo; + + fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo; + + faddd %f60,%f22,%f22 ! (3_2) res += dtmp0; + + faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2; + + faddd %f22,%f26,%f28 ! (3_2) res += dtmp1; + + add %i5,stridez,%l6 ! pz += stridez + + fsqrtd %f28,%f4 ! (3_2) res = sqrt(res); + add %l2,stridex,%l1 ! px += stridex + + fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res; + st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0]; + + st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1]; + + subcc counter,1,counter + bneg .begin + add %l6,stridez,%i5 + + fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res; + st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0]; + + st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1]; + + ba .begin + add %i5,stridez,%i5 + + .align 16 +.spec0: + ld [%i1+4],%l1 ! lx = ((int*)px)[1]; + cmp %o2,%o4 ! j0 ? 0x7ff00000 + bge,pn %icc,1f ! if ( j0 >= 0x7ff00000 ) + fabsd %f26,%f26 ! x = fabs(x); + + sub %o0,%l4,%o0 ! diff = hy - hx; + fabsd %f24,%f24 ! y = fabs(y); + + sra %o0,31,%l4 ! j0 = diff >> 31; + + xor %o0,%l4,%o0 ! diff ^ j0 + + sethi %hi(0x03600000),%l1 + sub %o0,%l4,%o0 ! (diff ^ j0) - j0 + + cmp %o0,%l1 ! ((diff ^ j0) - j0) ? 0x03600000 + bge,a,pn %icc,2f ! if ( ((diff ^ j0) - j0) >= 0x03600000 ) + faddd %f26,%f24,%f24 ! *pz = x + y + + fmuld %f26,DC2,%f36 ! (1_1) x *= dnorm; + + fmuld %f24,DC2,%f56 ! (1_1) y *= dnorm; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + + fmuld DC3,%f24,%f24 ! (1_2) res = dmax * res; +2: + add %i3,stridey,%i3 + add %i1,stridex,%i1 + st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; + st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; + + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + +1: + ld [%i3+4],%l2 ! ly = ((int*)py)[1]; + cmp %o0,%o4 ! hx ? 0x7ff00000 + bne,pn %icc,1f ! if ( hx != 0x7ff00000 ) + fabsd %f24,%f24 ! y = fabs(y); + + cmp %l1,0 ! lx ? 0 + be,pn %icc,2f ! if ( lx == 0 ) + nop +1: + cmp %l4,%o4 ! hy ? 0x7ff00000 + bne,pn %icc,1f ! if ( hy != 0x7ff00000 ) + nop + + cmp %l2,0 ! ly ? 0 + be,pn %icc,2f ! if ( ly == 0 ) + nop +1: + add %i3,stridey,%i3 + add %i1,stridex,%i1 + fmuld %f26,%f24,%f24 ! res = x * y; + st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; + + st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; + + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + +2: + add %i1,stridex,%i1 + add %i3,stridey,%i3 + st DC0_HI,[%i5] ! ((int*)pz)[0] = 0x7ff00000; + st DC0_LO,[%i5+4] ! ((int*)pz)[1] = 0; + fcmpd %f26,%f24 ! x ? y + + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + fmuld %f26,DC3,%f36 ! (1_1) x *= dnorm; + + fmuld %f24,DC3,%f56 ! (1_1) y *= dnorm; + + faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; + + faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; + + fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; + + fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; + + fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; + faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; + + faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; + + fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; + + fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; + fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; + + fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; + + fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; + + faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; + + faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; + + faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; + + fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); + + fmuld DC2,%f24,%f24 ! (1_2) res = dmax * res; + + add %i3,stridey,%i3 + add %i1,stridex,%i1 + st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; + + st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; + add %i5,stridez,%i5 + ba .begin1 + sub counter,1,counter + + .align 16 +.update0: + fzero %f50 + cmp counter,1 + ble .cont0 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,1,tmp_counter + ba .cont0 + mov 1,counter + + .align 16 +.update1: + fzero %f50 + cmp counter,1 + ble .cont1 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,1,tmp_counter + ba .cont1 + mov 1,counter + + .align 16 +.update2: + fzero %f18 + cmp counter,2 + ble .cont2 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont1 + mov 2,counter + + .align 16 +.update3: + fzero %f18 + cmp counter,2 + ble .cont3 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont3 + mov 2,counter + + .align 16 +.update4: + fzero %f20 + cmp counter,3 + ble .cont4 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont4 + mov 3,counter + + .align 16 +.update5: + fzero %f20 + cmp counter,3 + ble .cont5 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont5 + mov 3,counter + + .align 16 +.update6: + fzero %f36 + cmp counter,4 + ble .cont6 + fzero %f54 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont6 + mov 4,counter + + .align 16 +.update7: + fzero %f36 + cmp counter,4 + ble .cont7 + fzero %f54 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont7 + mov 4,counter + + .align 16 +.update8: + fzero %f50 + cmp counter,5 + ble .cont8 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont8 + mov 5,counter + + .align 16 +.update9: + fzero %f50 + cmp counter,5 + ble .cont9 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont9 + mov 5,counter + + + .align 16 +.update10: + fzero %f18 + cmp counter,2 + ble .cont10 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont10 + mov 2,counter + + .align 16 +.update11: + fzero %f18 + cmp counter,2 + ble .cont11 + fzero %f30 + + mov %l2,tmp_px + mov %l4,tmp_py + + sub counter,2,tmp_counter + ba .cont11 + mov 2,counter + + .align 16 +.update12: + fzero %f20 + cmp counter,3 + ble .cont12 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont12 + mov 3,counter + + .align 16 +.update13: + fzero %f20 + cmp counter,3 + ble .cont13 + fzero %f40 + + mov %l1,tmp_px + mov %i3,tmp_py + + sub counter,3,tmp_counter + ba .cont13 + mov 3,counter + + .align 16 +.update14: + fzero %f54 + cmp counter,4 + ble .cont14 + fzero %f36 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont14 + mov 4,counter + + .align 16 +.update15: + fzero %f54 + cmp counter,4 + ble .cont15 + fzero %f36 + + mov %l7,tmp_px + mov %l2,tmp_py + + sub counter,4,tmp_counter + ba .cont15 + mov 4,counter + + .align 16 +.update16: + fzero %f50 + cmp counter,5 + ble .cont16 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont16 + mov 5,counter + + .align 16 +.update17: + fzero %f50 + cmp counter,5 + ble .cont17 + fzero %f34 + + mov %o1,tmp_px + mov %i3,tmp_py + + sub counter,5,tmp_counter + ba .cont17 + mov 5,counter + + .align 16 +.exit: + ret + restore + SET_SIZE(__vhypot) + diff --git a/usr/src/libm/src/mvec/vis/__vhypotf.S b/usr/src/libm/src/mvec/vis/__vhypotf.S new file mode 100644 index 0000000..7bfddc3 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vhypotf.S @@ -0,0 +1,1226 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vhypotf.S 1.6 06/01/23 SMI" + + .file "__vhypotf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01 + .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01 + .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff + .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000 + .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000 + .word 0x7fe00000, 0x00000000 ! DA0 = 0x7fe0000000000000 + .word 0x47efffff, 0xe0000000 ! DFMAX = 3.402823e+38 + .word 0x7f7fffff, 0x80808080 ! FMAX = 3.402823e+38 , SCALE = 0x80808080 + .word 0x20000000, 0x00000000 ! DA1 = 0x2000000000000000 + +#define DC0 %f12 +#define DC1 %f10 +#define DC2 %f42 +#define DA0 %f6 +#define DA1 %f4 +#define K2 %f26 +#define K1 %f28 +#define SCALE %f3 +#define FMAX %f2 +#define DFMAX %f50 + +#define stridex %l6 +#define stridey %i4 +#define stridez %l5 +#define _0x7fffffff %o1 +#define _0x7f3504f3 %o2 +#define _0x1ff0 %l2 +#define TBL %l1 + +#define counter %l0 + +#define tmp_px STACK_BIAS-0x30 +#define tmp_py STACK_BIAS-0x28 +#define tmp_counter STACK_BIAS-0x20 +#define tmp0 STACK_BIAS-0x18 +#define tmp1 STACK_BIAS-0x10 +#define tmp2 STACK_BIAS-0x0c +#define tmp3 STACK_BIAS-0x08 +#define tmp4 STACK_BIAS-0x04 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! hx0 = *(int*)px; +! x0 = *px; +! px += stridex; +! +! hy0 = *(int*)py; +! y0 = *py; +! py += stridey; +! +! hx0 &= 0x7fffffff; +! hy0 &= 0x7fffffff; +! +! if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 ) +! { +! if ( hx >= 0x7f800000 || hy >= 0x7f800000 ) +! { +! if ( hx == 0x7f800000 || hy == 0x7f800000 ) +! *(int*)pz = 0x7f800000; +! else *pz = x * y; +! } +! else +! { +! hyp = sqrt(x * (double)x + y * (double)y); +! if ( hyp <= DMAX ) ftmp0 = (float)hyp; +! else ftmp0 = FMAX * FMAX; +! *pz = ftmp0; +! } +! pz += stridez; +! continue; +! } +! if ( (hx | hy) == 0 ) +! { +! *pz = 0; +! pz += stridez; +! continue; +! } +! dx0 = x0 * (double)x0; +! dy0 = y0 * (double)y0; +! db0 = dx0 + dy0; +! +! iexp0 = ((int*)&db0)[0]; +! +! h0 = vis_fand(db0,DC0); +! h0 = vis_for(h0,DC1); +! h_hi0 = vis_fand(h0,DC2); +! +! db0 = vis_fand(db0,DA0); +! db0 = vis_fmul8x16(SCALE, db0); +! db0 = vis_fpadd32(db0,DA1); +! +! iexp0 >>= 8; +! di0 = iexp0 & 0x1ff0; +! si0 = (char*)sqrt_arr + di0; +! +! dtmp0 = ((double*)((char*)div_arr + di0))[0]; +! xx0 = h0 - h_hi0; +! xx0 *= dmp0; +! +! dtmp0 = ((double*)si0)[1]; +! res0 = K2 * xx0; +! res0 += K1; +! res0 *= xx0; +! res0 += DC1; +! res0 = dtmp0 * res0; +! res0 *= db0; +! ftmp0 = (float)res0; +! *pz = ftmp0; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vhypotf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o3) + PIC_SET(l7,__vlibm_TBL_sqrtf,l1) + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + st %i0,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + ldd [%o3],K1 + sethi %hi(0x7ffffc00),%o1 + + ldd [%o3+8],K2 + sethi %hi(0x7f350400),%o2 + + ldd [%o3+16],DC0 + add %o1,1023,_0x7fffffff + add %o2,0xf3,_0x7f3504f3 + + ldd [%o3+24],DC1 + sll %i2,2,stridex + + ld [%o3+56],FMAX + + ldd [%o3+32],DC2 + sll %i4,2,stridey + + ldd [%o3+40],DA0 + sll stridez,2,stridez + + ldd [%o3+48],DFMAX + + ld [%o3+60],SCALE + or %g0,0xff8,%l2 + + ldd [%o3+64],DA1 + sll %l2,1,_0x1ff0 + or %g0,%i5,%l7 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i1 + ldx [%fp+tmp_py],%i2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px; + + lda [%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; + + lda [%i1]0x82,%f17 ! (3_0) x0 = *px; + and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; + + cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 + bge,pn %icc,.spec ! (3_0) if ( hx >= 0x7f3504f3 ) + and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; + + cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 + bge,pn %icc,.spec ! (3_0) if ( hy >= 0x7f3504f3 ) + or %g0,%i2,%o7 + + orcc %l3,%l4,%g0 + bz,pn %icc,.spec1 + + add %i1,stridex,%i1 ! px += stridex + fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; + lda [%i2]0x82,%f17 ! (3_0) y0 = *py; + + lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; + + lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; + + and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; + + fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 + bge,pn %icc,.update0 ! (4_0) if ( hx >= 0x7f3504f3 ) + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + + orcc %l3,%l4,%g0 + bz,pn %icc,.update0 + lda [%i1]0x82,%f17 ! (4_0) x0 = *px; +.cont0: + faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3 + lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py; + + add %o7,stridey,%i5 ! py += stridey + lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px; + + bge,pn %icc,.update1 ! (4_1) if ( hy >= 0x7f3504f3 ) + st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0]; +.cont1: + and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff; + + fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0; + lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px; + + add %i1,stridex,%i1 ! px += stridex + + lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py; + cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3 + bge,pn %icc,.update2 ! (0_0) if ( hx >= 0x7f3504f3 ) + add %i5,stridey,%o4 ! py += stridey +.cont2: + faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0; + + fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0; + and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff; + lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py; + + cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3 + bge,pn %icc,.update3 ! (0_0) if ( hy >= 0x7f3504f3 ) + st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0]; + + orcc %l3,%l4,%g0 + bz,pn %icc,.update3 +.cont3: + lda [%i1+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px; + + fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0); + + and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff; + + fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3 + lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py; + + add %i1,stridex,%i1 ! px += stridex + + lda [%i1]0x82,%f17 ! (1_0) x0 = *px; + bge,pn %icc,.update4 ! (1_0) if ( hx >= 0x7f3504f3 ) + add %o4,stridey,%i5 ! py += stridey +.cont4: + and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff; + for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1); + + cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3 + ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0; + add %i1,stridex,%i1 ! px += stridex + lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py; + + srax %o0,8,%o0 ! (3_1) iexp0 >>= 8; + bge,pn %icc,.update5 ! (1_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + bz,pn %icc,.update5 +.cont5: + lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px; + + and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0; + st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0]; + fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0; + add %i5,stridey,%i2 ! py += stridey + lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py; + + and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff; + + lda [%i1]0x82,%f17 ! (2_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3 + + fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff; + for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1); + + bge,pn %icc,.update6 ! (2_0) if ( hx >= 0x7f3504f3 ) + ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0]; +.cont6: + faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3 + lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py; + + add %i1,stridex,%i1 ! px += stridex + bge,pn %icc,.update7 ! (2_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + bz,pn %icc,.update7 + nop +.cont7: + fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0; + srax %o3,8,%o3 ! (4_1) iexp0 >>= 8; + lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px; + + and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0; + st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0]; + fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %i2,stridey,%o7 ! py += stridey + fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0; + lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; + and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; + + faddd %f56,K1,%f54 ! (3_1) res0 += K1; + cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 + + lda [%i1]0x82,%f17 ! (3_0) x0 = *px; + add %i1,stridex,%i1 ! px += stridex + bge,pn %icc,.update8 ! (3_0) if ( hx >= 0x7f3504f3 ) + + fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0; +.cont8: + and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; + for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1); + + cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 + ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0]; + faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; + bge,pn %icc,.update9 ! (3_0) if ( hy >= 0x7f3504f3 ) + lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py; + + orcc %l3,%l4,%g0 + bz,pn %icc,.update9 + nop +.cont9: + fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0; + lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; + fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0; + srax %g1,8,%o5 ! (0_0) iexp0 >>= 8; + lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; + fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0); + + and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0; + st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0]; + fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0; + and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; + fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 + bge,pn %icc,.update10 ! (4_0) if ( hx >= 0x7f3504f3 ) + faddd %f40,DC1,%f40 ! (3_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + + lda [%i1]0x82,%f17 ! (4_0) x0 = *px; +.cont10: + fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0; + cmp counter,5 + for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1); + + ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0]; + fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0; + faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; + + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,5,counter + + .align 16 +.main_loop: + fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3 + lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py; + fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0; + add %o7,stridey,%i5 ! py += stridey + st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0]; + fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0; + srax %g1,8,%g5 ! (1_1) iexp0 >>= 8; + bge,pn %icc,.update11 ! (4_1) if ( hy >= 0x7f3504f3 ) + fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0); + + orcc %l3,%l4,%g0 + nop + bz,pn %icc,.update11 + fzero %f52 +.cont11: + fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0; + and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0; + lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px; + fand %f30,DC0,%f60 ! (2_1) h0 = vis_fand(db0,DC0); + + ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0; + add %i1,stridex,%i0 ! px += stridex + fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0; + nop + lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px; + faddd %f58,DC1,%f36 ! (4_2) res0 += DC1; + + faddd %f56,K1,%f58 ! (0_1) res0 += K1; + and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0); + + lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py; + cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3 + bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7f3504f3 ) + fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0; +.cont12: + fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0; + add %l7,stridez,%o7 ! pz += stridez + st %f14,[%l7] ! (3_2) *pz = ftmp0; + for %f60,DC1,%f46 ! (2_1) h0 = vis_for(h0,DC1); + + fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0; + add %i5,stridey,%o4 ! py += stridey + ld [%fp+tmp4],%g1 ! (2_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0; + + fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0; + and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff; + lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py; + fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0; + cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3 + st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0]; + fand %f46,DC2,%f58 ! (2_1) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0; + srax %g1,8,%g1 ! (2_1) iexp0 >>= 8; + bge,pn %icc,.update13 ! (0_0) if ( hy >= 0x7f3504f3 ) + fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0); + + orcc %l3,%l4,%g0 + nop + bz,pn %icc,.update13 + fzero %f52 +.cont13: + fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0; + and %g1,_0x1ff0,%g1 ! (2_1) di0 = iexp0 & 0x1ff0; + lda [%i0+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px; + fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0); + + ldd [TBL+%g1],%f22 ! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0; + add %i0,stridex,%i1 ! px += stridex + fsubd %f46,%f58,%f58 ! (2_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0; + add %o7,stridez,%i0 ! pz += stridez + lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py; + faddd %f38,DC1,%f36 ! (0_1) res0 += DC1; + + faddd %f56,K1,%f38 ! (1_1) res0 += K1; + and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff; + ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0); + + lda [%i1]0x82,%f17 ! (1_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3 + bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7f3504f3 ) + fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0; +.cont14: + fmuld %f58,%f22,%f58 ! (2_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff; + add %o4,stridey,%i5 ! py += stridey + for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1); + + fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0; + cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3 + ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0; + add %i1,stridex,%i1 ! px += stridex + lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py; + fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0; + st %f14,[%o7] ! (4_2) *pz = ftmp0; + bge,pn %icc,.update15 ! (1_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + bz,pn %icc,.update15 + nop +.cont15: + fmuld K2,%f58,%f54 ! (2_1) res0 = K2 * xx0; + srax %o0,8,%o0 ! (3_1) iexp0 >>= 8; + st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0]; + fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0; + and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0; + lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px; + fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0; + add %i0,stridez,%i3 ! pz += stridez + fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0; + add %i5,stridey,%i2 ! py += stridey + lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py; + faddd %f44,DC1,%f44 ! (1_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0); + and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff; + ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (2_1) res0 += K1; + + lda [%i1]0x82,%f17 ! (2_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3 + add %i3,stridez,%o4 ! pz += stridez + fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0; + + fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff; + st %f14,[%i0] ! (0_1) *pz = ftmp0; + for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1); + + fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0; + bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7f3504f3 ) + ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0]; + faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; +.cont16: + fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0; + cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3 + lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py; + fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f54,%f58,%f54 ! (2_1) res0 *= xx0; + add %i1,stridex,%l7 ! px += stridex + bge,pn %icc,.update17 ! (2_0) if ( hy >= 0x7f3504f3 ) + fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2); + + orcc %l3,%l4,%g0 + nop + bz,pn %icc,.update17 + fzero %f52 +.cont17: + fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0; + srax %o3,8,%o3 ! (4_1) iexp0 >>= 8; + st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0]; + fand %f30,DA0,%f40 ! (2_1) db0 = vis_fand(db0,DA0); + + fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0; + and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0; + lda [%l7]0x82,%l3 ! (3_0) hx0 = *(int*)px; + fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0); + + ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %g1,TBL,%g1 ! (2_1) si0 = (char*)sqrt_arr + di0; + add %i2,stridey,%o7 ! py += stridey + fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0; + lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; + add %l7,stridex,%i1 ! px += stridex + faddd %f54,DC1,%f36 ! (2_1) res0 += DC1; + + faddd %f56,K1,%f54 ! (3_1) res0 += K1; + and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (2_1) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f40,%f40 ! (2_1) db0 = vis_fmul8x16(SCALE, db0); + + lda [%l7]0x82,%f17 ! (3_0) x0 = *px; + cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 + bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7f3504f3 ) + fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0; +.cont18: + fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; + st %f14,[%i3] ! (1_1) *pz = ftmp0; + for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1); + + fmuld %f56,%f36,%f36 ! (2_1) res0 = dtmp0 * res0; + cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 + ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0]; + faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0; + + fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; + bge,pn %icc,.update19 ! (3_0) if ( hy >= 0x7f3504f3 ) + lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py; + fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1); + +.cont19: + fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0; + orcc %l3,%l4,%g0 + st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0]; + fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0; + srax %g1,8,%o5 ! (0_0) iexp0 >>= 8; + lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; + fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (2_1) res0 *= db0; + and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0; + bz,pn %icc,.update19a + fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0); +.cont19a: + ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0; + and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; + fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0; + + fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; + cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 + lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; + faddd %f40,DC1,%f40 ! (3_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); + bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7f3504f3 ) + ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + + lda [%i1]0x82,%f17 ! (4_0) x0 = *px; +.cont20: + subcc counter,5,counter ! counter -= 5 + add %o4,stridez,%l7 ! pz += stridez + fdtos %f62,%f14 ! (2_1) ftmp0 = (float)res0; + + fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0; + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + st %f14,[%o4] ! (2_1) *pz = ftmp0; + for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1); + + ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0]; + fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0; + bpos,pt %icc,.main_loop + faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; + + add counter,5,counter + +.tail: + subcc counter,1,counter + bneg .begin + nop + + fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0; + fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2); + + fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0; + srax %g1,8,%g5 ! (1_1) iexp0 >>= 8; + fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0); + + fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0; + and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0; + + ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; + add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0; + fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0; + + faddd %f58,DC1,%f36 ! (4_2) res0 += DC1; + + faddd %f56,K1,%f58 ! (0_1) res0 += K1; + ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0); + + fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0; + + fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0; + add %l7,stridez,%o7 ! pz += stridez + st %f14,[%l7] ! (3_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + or %g0,%o7,%l7 + + fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0; + + fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1); + + fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0; + + fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0; + fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0; + + add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0; + + faddd %f38,DC1,%f36 ! (0_1) res0 += DC1; + + faddd %f56,K1,%f38 ! (1_1) res0 += K1; + ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1]; + fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0); + + add %o7,stridez,%i0 ! pz += stridez + fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0; + + fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0; + + fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0; + add %i0,stridez,%i3 ! pz += stridez + st %f14,[%o7] ! (4_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + or %g0,%i0,%l7 + + fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0); + + fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0; + + add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0; + + faddd %f44,DC1,%f44 ! (1_1) res0 += DC1; + + fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0); + ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1]; + + add %i3,stridez,%o4 ! pz += stridez + fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0; + + st %f14,[%i0] ! (0_1) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + or %g0,%i3,%l7 + + fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0; + + fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1); + + fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0; + + fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0; + + st %f14,[%i3] ! (1_1) *pz = ftmp0; + + ba .begin + or %g0,%o4,%l7 + + .align 16 +.spec1: + st %g0,[%l7] ! *pz = 0; + add %l7,stridez,%l7 ! pz += stridez + + add %i2,stridey,%i2 ! py += stridey + ba .begin1 + sub counter,1,counter ! counter-- + + .align 16 +.spec: + sethi %hi(0x7f800000),%i0 + cmp %l3,%i0 ! hx ? 0x7f800000 + bge,pt %icc,2f ! if ( hx >= 0x7f800000 ) + ld [%i2],%f8 + + cmp %l4,%i0 ! hy ? 0x7f800000 + bge,pt %icc,2f ! if ( hy >= 0x7f800000 ) + nop + + fsmuld %f17,%f17,%f44 ! x * (double)x + fsmuld %f8,%f8,%f24 ! y * (double)y + faddd %f44,%f24,%f24 ! x * (double)x + y * (double)y + fsqrtd %f24,%f24 ! hyp = sqrt(x * (double)x + y * (double)y); + fcmped %f24,DFMAX ! hyp ? DMAX + fbug,a 1f ! if ( hyp > DMAX ) + fmuls FMAX,FMAX,%f20 ! ftmp0 = FMAX * FMAX; + + fdtos %f24,%f20 ! ftmp0 = (float)hyp; +1: + st %f20,[%l7] ! *pz = ftmp0; + add %l7,stridez,%l7 ! pz += stridez + add %i1,stridex,%i1 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + ba .begin1 + sub counter,1,counter ! counter-- +2: + fcmps %f17,%f8 ! exceptions + cmp %l3,%i0 ! hx ? 0x7f800000 + be,a %icc,1f ! if ( hx == 0x7f800000 ) + st %i0,[%l7] ! *(int*)pz = 0x7f800000; + + cmp %l4,%i0 ! hy ? 0x7f800000 + be,a %icc,1f ! if ( hy == 0x7f800000 + st %i0,[%l7] ! *(int*)pz = 0x7f800000; + + fmuls %f17,%f8,%f8 ! x * y + st %f8,[%l7] ! *pz = x * y; + +1: + add %l7,stridez,%l7 ! pz += stridez + add %i1,stridex,%i1 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + ba .begin1 + sub counter,1,counter ! counter-- + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + + add %o7,stridey,%i5 + stx %i5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + fzeros %f8 + + stx %i1,[%fp+tmp_px] + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + fzeros %f17 + + sub %i1,stridex,%i2 + stx %i2,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + fzeros %f17 + + sub %i1,stridex,%o7 + stx %o7,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + fzeros %f17 + + sub %i1,stridex,%o5 + stx %o5,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + ble .cont9 + fzeros %f17 + + sub %i1,stridex,%o5 + stx %o5,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); + and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; + ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + + cmp counter,6 + ble .cont10 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + add %o7,stridey,%i5 + stx %i5,[%fp+tmp_py] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,1 + ble .cont11 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont11 + or %g0,1,counter + + .align 16 +.update12: + cmp counter,2 + ble .cont12 + fzeros %f8 + + stx %i0,[%fp+tmp_px] + add %i5,stridey,%o4 + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + or %g0,2,counter + + .align 16 +.update13: + cmp counter,2 + ble .cont13 + fzeros %f17 + + stx %i0,[%fp+tmp_px] + stx %o4,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + or %g0,2,counter + + .align 16 +.update14: + cmp counter,3 + ble .cont14 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + add %o4,stridey,%i5 + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + or %g0,3,counter + + .align 16 +.update15: + cmp counter,3 + ble .cont15 + fzeros %f17 + + sub %i1,stridex,%i2 + stx %i2,[%fp+tmp_px] + stx %i5,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + or %g0,3,counter + + .align 16 +.update16: + faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; + cmp counter,4 + ble .cont16 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + or %g0,4,counter + + .align 16 +.update17: + cmp counter,4 + ble .cont17 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + stx %i2,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + or %g0,4,counter + + .align 16 +.update18: + cmp counter,5 + ble .cont18 + fzeros %f17 + + stx %l7,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + or %g0,5,counter + + .align 16 +.update19: + fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1); + cmp counter,5 + ble .cont19 + fzeros %f17 + + stx %l7,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont19 + or %g0,5,counter + + .align 16 +.update19a: + cmp counter,5 + ble .cont19a + fzeros %f17 + + stx %l7,[%fp+tmp_px] + stx %o7,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont19a + or %g0,5,counter + + .align 16 +.update20: + faddd %f54,K1,%f54 ! (4_1) res0 += K1; + cmp counter,6 + ble .cont20 + fzeros %f17 + + stx %i1,[%fp+tmp_px] + add %o7,stridey,%g1 + stx %g1,[%fp+tmp_py] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + or %g0,6,counter + +.exit: + ret + restore + SET_SIZE(__vhypotf) + diff --git a/usr/src/libm/src/mvec/vis/__vlog.S b/usr/src/libm/src/mvec/vis/__vlog.S new file mode 100644 index 0000000..bf5e478 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vlog.S @@ -0,0 +1,670 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vlog.S 1.8 06/01/23 SMI" + + .file "__vlog.S" + +#include "libm.h" + + RO_DATA + .align 32 +TBL: + .word 0xbfd522ae, 0x0738a000 + .word 0xbd2ebe70, 0x8164c759 + .word 0xbfd3c252, 0x77333000 + .word 0xbd183b54, 0xb606bd5c + .word 0xbfd26962, 0x1134e000 + .word 0x3d31b61f, 0x10522625 + .word 0xbfd1178e, 0x8227e000 + .word 0xbd31ef78, 0xce2d07f2 + .word 0xbfcf991c, 0x6cb3c000 + .word 0x3d390d04, 0xcd7cc834 + .word 0xbfcd1037, 0xf2656000 + .word 0x3d084a7e, 0x75b6f6e4 + .word 0xbfca93ed, 0x3c8ae000 + .word 0x3d287243, 0x50562169 + .word 0xbfc823c1, 0x6551a000 + .word 0xbd1e0ddb, 0x9a631e83 + .word 0xbfc5bf40, 0x6b544000 + .word 0x3d127023, 0xeb68981c + .word 0xbfc365fc, 0xb015a000 + .word 0x3d3fd3a0, 0xafb9691b + .word 0xbfc1178e, 0x8227e000 + .word 0xbd21ef78, 0xce2d07f2 + .word 0xbfbda727, 0x63844000 + .word 0xbd1a8940, 0x1fa71733 + .word 0xbfb9335e, 0x5d594000 + .word 0xbd23115c, 0x3abd47da + .word 0xbfb4d311, 0x5d208000 + .word 0x3cf53a25, 0x82f4e1ef + .word 0xbfb08598, 0xb59e4000 + .word 0x3d17e5dd, 0x7009902c + .word 0xbfa894aa, 0x149f8000 + .word 0xbd39a19a, 0x8be97661 + .word 0xbfa0415d, 0x89e78000 + .word 0x3d3dddc7, 0xf461c516 + .word 0xbf902056, 0x58930000 + .word 0xbd3611d2, 0x7c8e8417 + .word 0x00000000, 0x00000000 + .word 0x00000000, 0x00000000 + .word 0x3f9f829b, 0x0e780000 + .word 0x3d298026, 0x7c7e09e4 + .word 0x3faf0a30, 0xc0110000 + .word 0x3d48a998, 0x5f325c5c + .word 0x3fb6f0d2, 0x8ae58000 + .word 0xbd34b464, 0x1b664613 + .word 0x3fbe2707, 0x6e2b0000 + .word 0xbd2a342c, 0x2af0003c + .word 0x3fc29552, 0xf8200000 + .word 0xbd35b967, 0xf4471dfc + .word 0x3fc5ff30, 0x70a78000 + .word 0x3d43d3c8, 0x73e20a07 + .word 0x3fc9525a, 0x9cf44000 + .word 0x3d46b476, 0x41307539 + .word 0x3fcc8ff7, 0xc79a8000 + .word 0x3d4a21ac, 0x25d81ef3 + .word 0x3fcfb918, 0x6d5e4000 + .word 0xbd0d572a, 0xab993c87 + .word 0x3fd1675c, 0xababa000 + .word 0x3d38380e, 0x731f55c4 + .word 0x3fd2e8e2, 0xbae12000 + .word 0xbd267b1e, 0x99b72bd8 + .word 0x3fd4618b, 0xc21c6000 + .word 0xbd13d82f, 0x484c84cc + .word 0x3fd5d1bd, 0xbf580000 + .word 0x3d4394a1, 0x1b1c1ee4 +! constants: + .word 0x40000000,0x00000000 + .word 0x3fe55555,0x555571da + .word 0x3fd99999,0x8702be3a + .word 0x3fd24af7,0x3f4569b1 + .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20 + .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20 + .word 0xffff8000,0x00000000 + .word 0x43200000 + .word 0xfff00000 + .word 0xc0194000 + .word 0x4000 + +#define two 0x200 +#define A1 0x208 +#define A2 0x210 +#define A3 0x218 +#define ln2hi 0x220 +#define ln2lo 0x228 +#define mask 0x230 +#define ox43200000 0x238 +#define oxfff00000 0x23c +#define oxc0194000 0x240 +#define ox4000 0x244 + +! local storage indices + +#define jnk STACK_BIAS-0x8 +#define tmp2 STACK_BIAS-0x10 +#define tmp1 STACK_BIAS-0x18 +#define tmp0 STACK_BIAS-0x20 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 + +! g1 TBL + +! l0 j0 +! l1 j1 +! l2 j2 +! l3 +! l4 0x94000 +! l5 +! l6 0x000fffff +! l7 0x7ff00000 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 +! o4 +! o5 +! o7 + +! f0 u0,q0 +! f2 v0,(two-v0)-u0,z0 +! f4 n0,f0,q0 +! f6 s0 +! f8 q +! f10 u1,q1 +! f12 v1,(two-v1)-u1,z1 +! f14 n1,f1,q1 +! f16 s1 +! f18 t +! f20 u2,q2 +! f22 v2,(two-v2)-u2,q2 +! f24 n2,f2,q2 +! f26 s2 +! f28 0xfff00000 +! f29 0x43200000 +! f30 0x4000 +! f31 0xc0194000 +! f32 t0 +! f34 h0,f0-(c0-h0) +! f36 c0 +! f38 A1 +! f40 two +! f42 t1 +! f44 h1,f1-(c1-h1) +! f46 c1 +! f48 A2 +! f50 0xffff8000... +! f52 t2 +! f54 h2,f2-(c2-h2) +! f56 c2 +! f58 A3 +! f60 ln2hi +! f62 ln2lo + + ENTRY(__vlog) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,TBL,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + sethi %hi(0x94000),%l4 + sethi %hi(0x000fffff),%l6 + or %l6,%lo(0x000fffff),%l6 + sethi %hi(0x7ff00000),%l7 + ldd [%g1+two],%f40 + ldd [%g1+A1],%f38 + ldd [%g1+A2],%f48 + ldd [%g1+A3],%f58 + ldd [%g1+ln2hi],%f60 + ldd [%g1+ln2lo],%f62 + ldd [%g1+mask],%f50 + ld [%g1+ox43200000],%f29 + ld [%g1+oxfff00000],%f28 + ld [%g1+oxc0194000],%f31 + ld [%g1+ox4000],%f30 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,jnk,%o0 ! precondition loop + add %fp,jnk,%o1 + add %fp,jnk,%o2 + fzero %f2 + fzero %f6 + fzero %f18 + fzero %f36 + fzero %f12 + fzero %f14 + fzero %f16 + fzero %f42 + fzero %f44 + fzero %f46 + std %f46,[%fp+tmp1] + fzero %f24 + fzero %f26 + fzero %f52 + fzero %f54 + std %f54,[%fp+tmp2] + sub %i3,%i4,%i3 + ld [%i1],%l0 ! ix + ld [%i1],%f0 ! u.l[0] = *x + ba .loop0 + ld [%i1+4],%f1 ! u.l[1] = *(1+x) + + .align 16 +! -- 16 byte aligned +.loop0: + sub %l0,%l7,%o3 + sub %l6,%l0,%o4 + fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 + fmuld %f6,%f2,%f8 ! (previous iteration) + + andcc %o3,%o4,%o4 + bge,pn %icc,.range0 ! ix <= 0x000fffff or >= 0x7ff00000 +! delay slot + fands %f4,%f28,%f4 + + add %i1,%i2,%i1 ! x += stridex + add %i3,%i4,%i3 ! y += stridey + fpsub32s %f0,%f4,%f0 ! u.l[0] -= n + +.cont0: + lda [%i1]%asi,%l1 ! preload next argument + add %l0,%l4,%l0 ! j = ix + 0x94000 + fpadd32s %f0,%f30,%f2 ! v.l[0] = u.l[0] + 0x4000 + + lda [%i1]%asi,%f10 + srl %l0,11,%l0 ! j = (j >> 11) & 0x1f0 + fand %f2,%f50,%f2 ! v.l &= 0xffff8000... + + lda [%i1+4]%asi,%f11 + and %l0,0x1f0,%l0 + fitod %f4,%f32 ! (double) n + + add %l0,8,%l3 + fsubd %f0,%f2,%f4 ! f = u.d - v.d + + faddd %f0,%f2,%f6 ! s = f / (u.d + v.d) + + fsubd %f40,%f2,%f2 ! two - v.d + fmuld %f32,%f60,%f34 ! h = n * ln2hi + TBL[j] + + faddd %f8,%f18,%f8 ! y = c + (t + q) + fmuld %f32,%f62,%f32 ! t = n * ln2lo + TBL[j+1] + + fdivd %f4,%f6,%f6 + + faddd %f54,%f24,%f56 ! c = h + f + fmuld %f26,%f26,%f22 ! z = s * s + + faddd %f8,%f36,%f8 + st %f8,[%o0] + + st %f9,[%o0+4] + mov %i3,%o0 + faddd %f14,%f38,%f14 + + fsubd %f56,%f54,%f54 ! t += f - (c - h) + fmuld %f22,%f58,%f20 ! q = ... + + fsubd %f2,%f0,%f2 ! (two - v.d) - u.d + ldd [%g1+%l0],%f36 + + faddd %f42,%f44,%f18 + fmuld %f12,%f14,%f14 + ldd [%fp+tmp1],%f12 + + faddd %f20,%f48,%f20 + nop + + faddd %f34,%f36,%f34 + ldd [%g1+%l3],%f0 + + faddd %f14,%f12,%f12 + + fsubd %f24,%f54,%f54 + fmuld %f22,%f20,%f24 + + std %f2,[%fp+tmp0] + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot + faddd %f32,%f0,%f32 + +! -- 16 byte aligned +.loop1: + sub %l1,%l7,%o3 + sub %l6,%l1,%o4 + fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 + fmuld %f16,%f12,%f8 ! (previous iteration) + + andcc %o3,%o4,%o4 + bge,pn %icc,.range1 ! ix <= 0x000fffff or >= 0x7ff00000 +! delay slot + fands %f14,%f28,%f14 + + add %i1,%i2,%i1 ! x += stridex + add %i3,%i4,%i3 ! y += stridey + fpsub32s %f10,%f14,%f10 ! u.l[0] -= n + +.cont1: + lda [%i1]%asi,%l2 ! preload next argument + add %l1,%l4,%l1 ! j = ix + 0x94000 + fpadd32s %f10,%f30,%f12 ! v.l[0] = u.l[0] + 0x4000 + + lda [%i1]%asi,%f20 + srl %l1,11,%l1 ! j = (j >> 11) & 0x1f0 + fand %f12,%f50,%f12 ! v.l &= 0xffff8000... + + lda [%i1+4]%asi,%f21 + and %l1,0x1f0,%l1 + fitod %f14,%f42 ! (double) n + + add %l1,8,%l3 + fsubd %f10,%f12,%f14 ! f = u.d - v.d + + faddd %f10,%f12,%f16 ! s = f / (u.d + v.d) + + fsubd %f40,%f12,%f12 ! two - v.d + fmuld %f42,%f60,%f44 ! h = n * ln2hi + TBL[j] + + faddd %f8,%f18,%f8 ! y = c + (t + q) + fmuld %f42,%f62,%f42 ! t = n * ln2lo + TBL[j+1] + + fdivd %f14,%f16,%f16 + + faddd %f34,%f4,%f36 ! c = h + f + fmuld %f6,%f6,%f2 ! z = s * s + + faddd %f8,%f46,%f8 + st %f8,[%o1] + + st %f9,[%o1+4] + mov %i3,%o1 + faddd %f24,%f38,%f24 + + fsubd %f36,%f34,%f34 ! t += f - (c - h) + fmuld %f2,%f58,%f0 ! q = ... + + fsubd %f12,%f10,%f12 ! (two - v.d) - u.d + ldd [%g1+%l1],%f46 + + faddd %f52,%f54,%f18 + fmuld %f22,%f24,%f24 + ldd [%fp+tmp2],%f22 + + faddd %f0,%f48,%f0 + nop + + faddd %f44,%f46,%f44 + ldd [%g1+%l3],%f10 + + faddd %f24,%f22,%f22 + + fsubd %f4,%f34,%f34 + fmuld %f2,%f0,%f4 + + std %f12,[%fp+tmp1] + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot + faddd %f42,%f10,%f42 + +! -- 16 byte aligned +.loop2: + sub %l2,%l7,%o3 + sub %l6,%l2,%o4 + fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 + fmuld %f26,%f22,%f8 ! (previous iteration) + + andcc %o3,%o4,%o4 + bge,pn %icc,.range2 ! ix <= 0x000fffff or >= 0x7ff00000 +! delay slot + fands %f24,%f28,%f24 + + add %i1,%i2,%i1 ! x += stridex + add %i3,%i4,%i3 ! y += stridey + fpsub32s %f20,%f24,%f20 ! u.l[0] -= n + +.cont2: + lda [%i1]%asi,%l0 ! preload next argument + add %l2,%l4,%l2 ! j = ix + 0x94000 + fpadd32s %f20,%f30,%f22 ! v.l[0] = u.l[0] + 0x4000 + + lda [%i1]%asi,%f0 + srl %l2,11,%l2 ! j = (j >> 11) & 0x1f0 + fand %f22,%f50,%f22 ! v.l &= 0xffff8000... + + lda [%i1+4]%asi,%f1 + and %l2,0x1f0,%l2 + fitod %f24,%f52 ! (double) n + + add %l2,8,%l3 + fsubd %f20,%f22,%f24 ! f = u.d - v.d + + faddd %f20,%f22,%f26 ! s = f / (u.d + v.d) + + fsubd %f40,%f22,%f22 ! two - v.d + fmuld %f52,%f60,%f54 ! h = n * ln2hi + TBL[j] + + faddd %f8,%f18,%f8 ! y = c + (t + q) + fmuld %f52,%f62,%f52 ! t = n * ln2lo + TBL[j+1] + + fdivd %f24,%f26,%f26 + + faddd %f44,%f14,%f46 ! c = h + f + fmuld %f16,%f16,%f12 ! z = s * s + + faddd %f8,%f56,%f8 + st %f8,[%o2] + + st %f9,[%o2+4] + mov %i3,%o2 + faddd %f4,%f38,%f4 + + fsubd %f46,%f44,%f44 ! t += f - (c - h) + fmuld %f12,%f58,%f10 ! q = ... + + fsubd %f22,%f20,%f22 ! (two - v.d) - u.d + ldd [%g1+%l2],%f56 + + faddd %f32,%f34,%f18 + fmuld %f2,%f4,%f4 + ldd [%fp+tmp0],%f2 + + faddd %f10,%f48,%f10 + nop + + faddd %f54,%f56,%f54 + ldd [%g1+%l3],%f20 + + faddd %f4,%f2,%f2 + + fsubd %f14,%f44,%f44 + fmuld %f12,%f10,%f14 + + std %f22,[%fp+tmp2] + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + faddd %f52,%f20,%f52 + + +! Once we get to the last element, we loop three more times to finish +! the computations in progress. This means we will load past the end +! of the argument vector, but since we use non-faulting loads and never +! use the data, the only potential problem is cache miss. (Note that +! when the argument is 2, the only exception that occurs in the compu- +! tation is an inexact result in the final addition, and we break out +! of the "extra" iterations before then.) +.endloop2: + sethi %hi(0x40000000),%l0 ! "next argument" = two + cmp %i0,-3 + bg,a,pt %icc,.loop0 +! delay slot + fmovd %f40,%f0 + ret + restore + + .align 16 +.endloop0: + sethi %hi(0x40000000),%l1 ! "next argument" = two + cmp %i0,-3 + bg,a,pt %icc,.loop1 +! delay slot + fmovd %f40,%f10 + ret + restore + + .align 16 +.endloop1: + sethi %hi(0x40000000),%l2 ! "next argument" = two + cmp %i0,-3 + bg,a,pt %icc,.loop2 +! delay slot + fmovd %f40,%f20 + ret + restore + + + .align 16 +.range0: + cmp %l0,%l7 + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [%i1+4],%o5 + fxtod %f0,%f0 ! scale by 2**1074 w/o trapping + st %f0,[%fp+tmp0] + add %i1,%i2,%i1 ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 + fands %f4,%f28,%f4 + fpsub32s %f0,%f4,%f0 ! u.l[0] -= n + ld [%fp+tmp0],%l0 + ba,pt %icc,.cont0 +! delay slot + fpsub32s %f4,%f29,%f4 ! n -= 0x43200000 +1: + fdivs %f29,%f1,%f4 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st %f28,[%i3] ! store -inf +2: + sll %l0,1,%l0 ! lop off sign bit + add %i1,%i2,%i1 ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fabsd %f0,%f4 ! *y = (x + |x|) * inf + faddd %f0,%f4,%f0 + fand %f28,%f50,%f4 + fnegd %f4,%f4 + fmuld %f0,%f4,%f0 + st %f0,[%i3] +3: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop2 +! delay slot + st %f1,[%i3+4] + ld [%i1],%l0 ! get next argument + ld [%i1],%f0 + ba,pt %icc,.loop0 +! delay slot + ld [%i1+4],%f1 + + + .align 16 +.range1: + cmp %l1,%l7 + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [%i1+4],%o5 + fxtod %f10,%f10 ! scale by 2**1074 w/o trapping + st %f10,[%fp+tmp1] + add %i1,%i2,%i1 ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 + fands %f14,%f28,%f14 + fpsub32s %f10,%f14,%f10 ! u.l[0] -= n + ld [%fp+tmp1],%l1 + ba,pt %icc,.cont1 +! delay slot + fpsub32s %f14,%f29,%f14 ! n -= 0x43200000 +1: + fdivs %f29,%f11,%f14 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st %f28,[%i3] ! store -inf +2: + sll %l1,1,%l1 ! lop off sign bit + add %i1,%i2,%i1 ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fabsd %f10,%f14 ! *y = (x + |x|) * inf + faddd %f10,%f14,%f10 + fand %f28,%f50,%f14 + fnegd %f14,%f14 + fmuld %f10,%f14,%f10 + st %f10,[%i3] +3: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot + st %f11,[%i3+4] + ld [%i1],%l1 ! get next argument + ld [%i1],%f10 + ba,pt %icc,.loop1 +! delay slot + ld [%i1+4],%f11 + + + .align 16 +.range2: + cmp %l2,%l7 + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [%i1+4],%o5 + fxtod %f20,%f20 ! scale by 2**1074 w/o trapping + st %f20,[%fp+tmp2] + add %i1,%i2,%i1 ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 + fands %f24,%f28,%f24 + fpsub32s %f20,%f24,%f20 ! u.l[0] -= n + ld [%fp+tmp2],%l2 + ba,pt %icc,.cont2 +! delay slot + fpsub32s %f24,%f29,%f24 ! n -= 0x43200000 +1: + fdivs %f29,%f21,%f24 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st %f28,[%i3] ! store -inf +2: + sll %l2,1,%l2 ! lop off sign bit + add %i1,%i2,%i1 ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add %i3,%i4,%i3 ! y += stridey + fabsd %f20,%f24 ! *y = (x + |x|) * inf + faddd %f20,%f24,%f20 + fand %f28,%f50,%f24 + fnegd %f24,%f24 + fmuld %f20,%f24,%f20 + st %f20,[%i3] +3: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot + st %f21,[%i3+4] + ld [%i1],%l2 ! get next argument + ld [%i1],%f20 + ba,pt %icc,.loop2 +! delay slot + ld [%i1+4],%f21 + + SET_SIZE(__vlog) + diff --git a/usr/src/libm/src/mvec/vis/__vlog_ultra3.S b/usr/src/libm/src/mvec/vis/__vlog_ultra3.S new file mode 100644 index 0000000..aed1b59 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vlog_ultra3.S @@ -0,0 +1,2904 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vlog_ultra3.S 1.9 06/01/23 SMI" + + .file "__vlog_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vlog + .type __vlog,#function + __vlog = __vlog_ultra3 +#endif + +/* + * ELEVENBIT table and order 5 POLYNOMIAL no explicit correction t + */ + + RO_DATA + .align 64 +!! this is a new 11 bit table. +TBL: + .word 0xbfd522ae, 0x0738a000 + .word 0xbd2ebe70, 0x8164c759 + .word 0xbfd5178d, 0x9ab55000 + .word 0xbd35c153, 0x0fe963b3 + .word 0xbfd50c6f, 0x1d11b000 + .word 0xbd42f8ca, 0x40bec1ea + .word 0xbfd50152, 0x8da1f000 + .word 0xbd42cfac, 0x6d29f4d7 + .word 0xbfd4f637, 0xebba9000 + .word 0xbd401f53, 0x9a676da3 + .word 0xbfd4eb1f, 0x36b07000 + .word 0xbd184047, 0x46e5797b + .word 0xbfd4e008, 0x6dd8b000 + .word 0xbd4594b6, 0xaf0ddc3c + .word 0xbfd4d4f3, 0x90890000 + .word 0xbd19fd79, 0x3a9f1441 + .word 0xbfd4c9e0, 0x9e172000 + .word 0xbd4877dd, 0xb93d49d7 + .word 0xbfd4becf, 0x95d97000 + .word 0xbd422662, 0x6ffee2c8 + .word 0xbfd4b3c0, 0x77267000 + .word 0xbd4d3497, 0x2fdf5a8c + .word 0xbfd4a8b3, 0x41552000 + .word 0xbd46127e, 0x3d0dc8d1 + .word 0xbfd49da7, 0xf3bcc000 + .word 0xbd307b33, 0x4daf4b9a + .word 0xbfd4929e, 0x8db4e000 + .word 0xbd3b9056, 0x556c70de + .word 0xbfd48797, 0x0e958000 + .word 0xbd3dc1b8, 0x465cf25f + .word 0xbfd47c91, 0x75b6f000 + .word 0xbd05acd1, 0x7009e35b + .word 0xbfd4718d, 0xc271c000 + .word 0xbd306c18, 0xfb4c14c5 + .word 0xbfd4668b, 0xf41ef000 + .word 0xbd432874, 0x4e9d2b85 + .word 0xbfd45b8c, 0x0a17d000 + .word 0xbd4e26ed, 0xf182f57b + .word 0xbfd4508e, 0x03b61000 + .word 0xbd40ef1c, 0x2579199c + .word 0xbfd44591, 0xe0539000 + .word 0xbd4e916a, 0x76d6dc28 + .word 0xbfd43a97, 0x9f4ac000 + .word 0xbd23ee07, 0x6a81f88e + .word 0xbfd42f9f, 0x3ff62000 + .word 0xbd390644, 0x0f7d3354 + .word 0xbfd424a8, 0xc1b0c000 + .word 0xbd2dc57c, 0x99ae2a25 + .word 0xbfd419b4, 0x23d5e000 + .word 0xbd418e43, 0x6ec90e0a + .word 0xbfd40ec1, 0x65c13000 + .word 0xbd3f59a8, 0xa01757f6 + .word 0xbfd403d0, 0x86cea000 + .word 0xbd3e6ef5, 0x74487308 + .word 0xbfd3f8e1, 0x865a8000 + .word 0xbd26f338, 0x912773e3 + .word 0xbfd3edf4, 0x63c16000 + .word 0xbd407cc1, 0xeb4069e1 + .word 0xbfd3e309, 0x1e604000 + .word 0xbd43f634, 0xa2afb68d + .word 0xbfd3d81f, 0xb5946000 + .word 0xbd4b74e0, 0xf558b217 + .word 0xbfd3cd38, 0x28bb6000 + .word 0xbd489faf, 0xb06c8342 + .word 0xbfd3c252, 0x77333000 + .word 0xbd183b54, 0xb606bd5c + .word 0xbfd3b76e, 0xa059f000 + .word 0xbd47b5cf, 0x9912c7cb + .word 0xbfd3ac8c, 0xa38e5000 + .word 0xbd48bd04, 0x10ff506d + .word 0xbfd3a1ac, 0x802f3000 + .word 0xbd398ecf, 0x399abd8d + .word 0xbfd396ce, 0x359bb000 + .word 0xbd4ea7c6, 0x3a99c99c + .word 0xbfd38bf1, 0xc3337000 + .word 0xbd4ce9e9, 0x41e9516d + .word 0xbfd38117, 0x28564000 + .word 0xbd496386, 0xdb17e3f5 + .word 0xbfd3763e, 0x64645000 + .word 0xbd318b1f, 0x291dcb56 + .word 0xbfd36b67, 0x76be1000 + .word 0xbd116ecd, 0xb0f177c8 + .word 0xbfd36092, 0x5ec44000 + .word 0xbd4eb929, 0xf344bbd1 + .word 0xbfd355bf, 0x1bd82000 + .word 0xbd491599, 0x1da6c3c6 + .word 0xbfd34aed, 0xad5b1000 + .word 0xbd3a2aac, 0xf2be1fdd + .word 0xbfd3401e, 0x12aec000 + .word 0xbd4741c6, 0x5548eb71 + .word 0xbfd33550, 0x4b355000 + .word 0xbd446efc, 0x89cefc92 + .word 0xbfd32a84, 0x56512000 + .word 0xbd04f928, 0x139af5d6 + .word 0xbfd31fba, 0x3364c000 + .word 0xbd4a08d8, 0x6ce5a16e + .word 0xbfd314f1, 0xe1d35000 + .word 0xbd49c761, 0x4b37b0d2 + .word 0xbfd30a2b, 0x61001000 + .word 0xbd4a53e9, 0x6290ef5b + .word 0xbfd2ff66, 0xb04ea000 + .word 0xbd43a896, 0xd5f0c8e9 + .word 0xbfd2f4a3, 0xcf22e000 + .word 0xbd4b8693, 0xf85f2705 + .word 0xbfd2e9e2, 0xbce12000 + .word 0xbd24300c, 0x128d1dc2 + .word 0xbfd2df23, 0x78edd000 + .word 0xbce292b7, 0xcd95c595 + .word 0xbfd2d466, 0x02adc000 + .word 0xbd49dcbc, 0x88caaf9b + .word 0xbfd2c9aa, 0x59863000 + .word 0xbd4a7f90, 0xe829d4d2 + .word 0xbfd2bef0, 0x7cdc9000 + .word 0xbd2a9cfa, 0x4a5004f4 + .word 0xbfd2b438, 0x6c168000 + .word 0xbd4e1827, 0x3a343630 + .word 0xbfd2a982, 0x269a3000 + .word 0xbd4b7e9c, 0x6aa35e8c + .word 0xbfd29ecd, 0xabcdf000 + .word 0xbd44073b, 0x3bdc2243 + .word 0xbfd2941a, 0xfb186000 + .word 0xbd46f79e, 0xa4678ebb + .word 0xbfd2896a, 0x13e08000 + .word 0xbd3a8ed0, 0x27e16952 + .word 0xbfd27eba, 0xf58d8000 + .word 0xbd49399d, 0xffd2d096 + .word 0xbfd2740d, 0x9f870000 + .word 0xbd45f660, 0x0b9a802a + .word 0xbfd26962, 0x1134d000 + .word 0xbd4724f0, 0x77d6ecee + .word 0xbfd25eb8, 0x49ff2000 + .word 0xbd310c25, 0x03f76b8e + .word 0xbfd25410, 0x494e5000 + .word 0xbd3b1d7a, 0xc0ef77f2 + .word 0xbfd2496a, 0x0e8b3000 + .word 0xbd003238, 0x687cfe2e + .word 0xbfd23ec5, 0x991eb000 + .word 0xbd44920d, 0xdbae8d6f + .word 0xbfd23422, 0xe8724000 + .word 0xbd40708a, 0x931c895b + .word 0xbfd22981, 0xfbef7000 + .word 0xbd42f5ef, 0x4fb53f93 + .word 0xbfd21ee2, 0xd3003000 + .word 0xbd40382e, 0x41be00e3 + .word 0xbfd21445, 0x6d0eb000 + .word 0xbd41a87d, 0xeba46baf + .word 0xbfd209a9, 0xc9857000 + .word 0xbd45b053, 0x3ba9c94d + .word 0xbfd1ff0f, 0xe7cf4000 + .word 0xbd3e9d5b, 0x513ff0c1 + .word 0xbfd1f477, 0xc7573000 + .word 0xbd26d6d4, 0x010d751a + .word 0xbfd1e9e1, 0x67889000 + .word 0xbd43e8a8, 0x961ba4d1 + .word 0xbfd1df4c, 0xc7cf2000 + .word 0xbd30b43f, 0x0455f7e4 + .word 0xbfd1d4b9, 0xe796c000 + .word 0xbd222a66, 0x7c42e56d + .word 0xbfd1ca28, 0xc64ba000 + .word 0xbd4ca760, 0xf7a15533 + .word 0xbfd1bf99, 0x635a6000 + .word 0xbd4729bb, 0x5451ef6e + .word 0xbfd1b50b, 0xbe2fc000 + .word 0xbd38ecd7, 0x3263201f + .word 0xbfd1aa7f, 0xd638d000 + .word 0xbd29f60a, 0x9616f7a0 + .word 0xbfd19ff5, 0xaae2f000 + .word 0xbce69fd9, 0x9ec05ba8 + .word 0xbfd1956d, 0x3b9bc000 + .word 0xbd27d2f7, 0x3ad1aa14 + .word 0xbfd18ae6, 0x87d13000 + .word 0xbd43a034, 0x64df39ff + .word 0xbfd18061, 0x8ef18000 + .word 0xbd45be80, 0x1bc9638d + .word 0xbfd175de, 0x506b3000 + .word 0xbd30c07c, 0x4da5752f + .word 0xbfd16b5c, 0xcbacf000 + .word 0xbd46e6b3, 0x7de945a0 + .word 0xbfd160dd, 0x0025e000 + .word 0xbd4ba5c1, 0xc499684a + .word 0xbfd1565e, 0xed455000 + .word 0xbd4f8629, 0x48125517 + .word 0xbfd14be2, 0x927ae000 + .word 0xbd49a817, 0xc85685e2 + .word 0xbfd14167, 0xef367000 + .word 0xbd3e0c07, 0x824daaf5 + .word 0xbfd136ef, 0x02e82000 + .word 0xbd4217d3, 0xe78d3ed8 + .word 0xbfd12c77, 0xcd007000 + .word 0xbd13b294, 0x8a11f797 + .word 0xbfd12202, 0x4cf00000 + .word 0xbd38fdd9, 0x76fabda5 + .word 0xbfd1178e, 0x8227e000 + .word 0xbd31ef78, 0xce2d07f2 + .word 0xbfd10d1c, 0x6c194000 + .word 0xbd4cb3de, 0x00324ee4 + .word 0xbfd102ac, 0x0a35c000 + .word 0xbd483810, 0x88080a5e + .word 0xbfd0f83d, 0x5bef2000 + .word 0xbd475fa0, 0x37a37ba8 + .word 0xbfd0edd0, 0x60b78000 + .word 0xbd0019b5, 0x2d8435f5 + .word 0xbfd0e365, 0x18012000 + .word 0xbd2a5943, 0x8bbdca93 + .word 0xbfd0d8fb, 0x813eb000 + .word 0xbd1ee8c8, 0x8753fa35 + .word 0xbfd0ce93, 0x9be30000 + .word 0xbd4e8266, 0xd788ddf1 + .word 0xbfd0c42d, 0x67616000 + .word 0xbd27188b, 0x163ceae9 + .word 0xbfd0b9c8, 0xe32d1000 + .word 0xbd42224e, 0x89208f94 + .word 0xbfd0af66, 0x0eb9e000 + .word 0xbd23c7c3, 0xf528d80a + .word 0xbfd0a504, 0xe97bb000 + .word 0xbd303094, 0xe6690c44 + .word 0xbfd09aa5, 0x72e6c000 + .word 0xbd3b50a1, 0xe1734342 + .word 0xbfd09047, 0xaa6f9000 + .word 0xbd3f18e8, 0x3ce75c0e + .word 0xbfd085eb, 0x8f8ae000 + .word 0xbd3e5d51, 0x3f45fe7b + .word 0xbfd07b91, 0x21adb000 + .word 0xbd4520ba, 0x8e9b8a72 + .word 0xbfd07138, 0x604d5000 + .word 0xbd40c4e6, 0xd8b76a75 + .word 0xbfd066e1, 0x4adf4000 + .word 0xbd47f6bb, 0x351a4a71 + .word 0xbfd05c8b, 0xe0d96000 + .word 0xbd2ad0f1, 0xc77ccb58 + .word 0xbfd05238, 0x21b1a000 + .word 0xbd4ec752, 0xd39776ce + .word 0xbfd047e6, 0x0cde8000 + .word 0xbd2dbdf1, 0x0d397f3c + .word 0xbfd03d95, 0xa1d67000 + .word 0xbd3a1788, 0x0f236109 + .word 0xbfd03346, 0xe0106000 + .word 0xbcf89ff8, 0xa966395c + .word 0xbfd028f9, 0xc7035000 + .word 0xbd483851, 0x858333c0 + .word 0xbfd01eae, 0x5626c000 + .word 0xbd3a43dc, 0xfade85ae + .word 0xbfd01464, 0x8cf23000 + .word 0xbd4d082a, 0x567b45ed + .word 0xbfd00a1c, 0x6adda000 + .word 0xbd31cd8d, 0x688b9e18 + .word 0xbfcfffab, 0xdec23000 + .word 0xbd236a1a, 0xdb4a75a4 + .word 0xbfcfeb22, 0x33ea0000 + .word 0xbd2f3418, 0xde00938b + .word 0xbfcfd69b, 0xd4240000 + .word 0xbd3641a8, 0xff2ccc45 + .word 0xbfcfc218, 0xbe620000 + .word 0xbd34bba4, 0x6f1cf6a0 + .word 0xbfcfad98, 0xf1965000 + .word 0xbd16ee92, 0x73d7c2de + .word 0xbfcf991c, 0x6cb3b000 + .word 0xbd1bcbec, 0xca0cdf30 + .word 0xbfcf84a3, 0x2ead7000 + .word 0xbd386af1, 0xd33d9e37 + .word 0xbfcf702d, 0x36777000 + .word 0xbd3bdf9a, 0xba663077 + .word 0xbfcf5bba, 0x83060000 + .word 0xbd341b25, 0x4a43da63 + .word 0xbfcf474b, 0x134df000 + .word 0xbd1146d8, 0x38821289 + .word 0xbfcf32de, 0xe6448000 + .word 0xbd2efb83, 0x625f1609 + .word 0xbfcf1e75, 0xfadf9000 + .word 0xbd37bcea, 0x6d13e04a + .word 0xbfcf0a10, 0x50157000 + .word 0xbd3dad5f, 0x7347f55b + .word 0xbfcef5ad, 0xe4dcf000 + .word 0xbd3fcbbd, 0xd53488e4 + .word 0xbfcee14e, 0xb82d6000 + .word 0xbd39d172, 0x6f4de261 + .word 0xbfceccf2, 0xc8fe9000 + .word 0xbd104e71, 0x7062a6fe + .word 0xbfceb89a, 0x1648b000 + .word 0xbd32e26f, 0x74808b80 + .word 0xbfcea444, 0x9f04a000 + .word 0xbd35e916, 0x63732a36 + .word 0xbfce8ff2, 0x622ba000 + .word 0xbd378e13, 0xd33981e5 + .word 0xbfce7ba3, 0x5eb77000 + .word 0xbd3c5422, 0x3b90d937 + .word 0xbfce6757, 0x93a26000 + .word 0xbd01dc8e, 0xc0554762 + .word 0xbfce530e, 0xffe71000 + .word 0xbcc21227, 0x6041f430 + .word 0xbfce3ec9, 0xa280c000 + .word 0xbd14bd96, 0x3fb80bff + .word 0xbfce2a87, 0x7a6b2000 + .word 0xbd382381, 0x7787081a + .word 0xbfce1648, 0x86a27000 + .word 0xbd36ce95, 0xba645527 + .word 0xbfce020c, 0xc6235000 + .word 0xbd356a7f, 0xa92375ee + .word 0xbfcdedd4, 0x37eae000 + .word 0xbd3e0125, 0x53595898 + .word 0xbfcdd99e, 0xdaf6d000 + .word 0xbd2fa273, 0x2c71522a + .word 0xbfcdc56c, 0xae452000 + .word 0xbd3eb37a, 0xa24e1817 + .word 0xbfcdb13d, 0xb0d48000 + .word 0xbd32806a, 0x847527e6 + .word 0xbfcd9d11, 0xe1a3f000 + .word 0xbd19da04, 0xfa9fa4c6 + .word 0xbfcd88e9, 0x3fb2f000 + .word 0xbd2141af, 0xfb96815e + .word 0xbfcd74c3, 0xca018000 + .word 0xbd393e4c, 0xfa17dce1 + .word 0xbfcd60a1, 0x7f903000 + .word 0xbd24523f, 0x207be58e + .word 0xbfcd4c82, 0x5f5fd000 + .word 0xbd3e3f04, 0x21df291e + .word 0xbfcd3866, 0x6871f000 + .word 0xbd21935e, 0x98ed9a88 + .word 0xbfcd244d, 0x99c85000 + .word 0xbd29cfb0, 0x0c890770 + .word 0xbfcd1037, 0xf2655000 + .word 0xbd3cf6b0, 0x31492124 + .word 0xbfccfc25, 0x714bd000 + .word 0xbd39fbd3, 0x34e03910 + .word 0xbfcce816, 0x157f1000 + .word 0xbd330faa, 0x2efb3576 + .word 0xbfccd409, 0xde02d000 + .word 0xbd132115, 0x39f1dcc5 + .word 0xbfccc000, 0xc9db3000 + .word 0xbd38a4a9, 0xe8aa1402 + .word 0xbfccabfa, 0xd80d0000 + .word 0xbd11e253, 0x70a10e3e + .word 0xbfcc97f8, 0x079d4000 + .word 0xbd23b161, 0xa8c6e6c5 + .word 0xbfcc83f8, 0x57919000 + .word 0xbd358740, 0x00c94a0f + .word 0xbfcc6ffb, 0xc6f00000 + .word 0xbd3ee138, 0xd3a69d43 + .word 0xbfcc5c02, 0x54bf2000 + .word 0xbd1d2f55, 0x73da163b + .word 0xbfcc480c, 0x0005c000 + .word 0xbd39a294, 0xd5e44e76 + .word 0xbfcc3418, 0xc7cb7000 + .word 0xbd234b5d, 0xe46e0516 + .word 0xbfcc2028, 0xab17f000 + .word 0xbd3368f8, 0x8d51c29d + .word 0xbfcc0c3b, 0xa8f3a000 + .word 0xbd3ac339, 0x48e7f56a + .word 0xbfcbf851, 0xc0675000 + .word 0xbd257be3, 0x67ef56a7 + .word 0xbfcbe46a, 0xf07c2000 + .word 0xbd350591, 0x910f505a + .word 0xbfcbd087, 0x383bd000 + .word 0xbd315a1d, 0xd355f6a5 + .word 0xbfcbbca6, 0x96b07000 + .word 0xbd3d0045, 0xea3f2624 + .word 0xbfcba8c9, 0x0ae4a000 + .word 0xbd3a32e7, 0xf44432da + .word 0xbfcb94ee, 0x93e36000 + .word 0xbd2f2a06, 0xe2db48a3 + .word 0xbfcb8117, 0x30b82000 + .word 0xbd1e9068, 0x3b9cd768 + .word 0xbfcb6d42, 0xe06ec000 + .word 0xbd302afe, 0x254869ba + .word 0xbfcb5971, 0xa213a000 + .word 0xbd39b50e, 0x83aa91df + .word 0xbfcb45a3, 0x74b39000 + .word 0xbd3701df, 0x22138fc3 + .word 0xbfcb31d8, 0x575bc000 + .word 0xbd3c794e, 0x562a63cb + .word 0xbfcb1e10, 0x4919e000 + .word 0xbd3fa006, 0x2597f33a + .word 0xbfcb0a4b, 0x48fc1000 + .word 0xbd368c69, 0x51e3338a + .word 0xbfcaf689, 0x5610d000 + .word 0xbd375beb, 0xba042b64 + .word 0xbfcae2ca, 0x6f672000 + .word 0xbd37a8d5, 0xae54f550 + .word 0xbfcacf0e, 0x940e7000 + .word 0xbd2800e3, 0xa7e64e07 + .word 0xbfcabb55, 0xc3169000 + .word 0xbd1d6694, 0xd43acc9f + .word 0xbfcaa79f, 0xfb8fc000 + .word 0xbd3a8bf1, 0x1c0d8aaa + .word 0xbfca93ed, 0x3c8ad000 + .word 0xbd33c6de, 0x57d4ef4c + .word 0xbfca803d, 0x8518d000 + .word 0xbd3e09d1, 0x87f293cc + .word 0xbfca6c90, 0xd44b7000 + .word 0xbce38901, 0xf909e74b + .word 0xbfca58e7, 0x29348000 + .word 0xbd3e867d, 0x504551b1 + .word 0xbfca4540, 0x82e6a000 + .word 0xbd360a77, 0xc81f7171 + .word 0xbfca319c, 0xe074a000 + .word 0xbcbd7dba, 0xe650d5b3 + .word 0xbfca1dfc, 0x40f1b000 + .word 0xbd2fc3e1, 0xff6190fe + .word 0xbfca0a5e, 0xa371a000 + .word 0xbd322191, 0x988b2e31 + .word 0xbfc9f6c4, 0x07089000 + .word 0xbd29904d, 0x6865817a + .word 0xbfc9e32c, 0x6acb0000 + .word 0xbd3e5e8d, 0xbc0fb4ac + .word 0xbfc9cf97, 0xcdce0000 + .word 0xbd3d862f, 0x10c414e3 + .word 0xbfc9bc06, 0x2f26f000 + .word 0xbd3874d8, 0x1809e6d5 + .word 0xbfc9a877, 0x8deba000 + .word 0xbd3470fa, 0x3efec390 + .word 0xbfc994eb, 0xe9325000 + .word 0xbd2a9c9d, 0x28bcbe25 + .word 0xbfc98163, 0x4011a000 + .word 0xbd34eadd, 0x9e9045e2 + .word 0xbfc96ddd, 0x91a0b000 + .word 0xbd32ac6b, 0x11cf6f2b + .word 0xbfc95a5a, 0xdcf70000 + .word 0xbd07f228, 0x58a0ff6f + .word 0xbfc946db, 0x212c6000 + .word 0xbd36cf76, 0x74ca02ba + .word 0xbfc9335e, 0x5d594000 + .word 0xbd33115c, 0x3abd47da + .word 0xbfc91fe4, 0x90965000 + .word 0xbd30369c, 0xf30a1c32 + .word 0xbfc90c6d, 0xb9fcb000 + .word 0xbd39b282, 0xa239ca0d + .word 0xbfc8f8f9, 0xd8a60000 + .word 0xbd2af16c, 0x8230ceca + .word 0xbfc8e588, 0xebac2000 + .word 0xbd3b7d5c, 0xab2d1140 + .word 0xbfc8d21a, 0xf2299000 + .word 0xbd14d652, 0x74757226 + .word 0xbfc8beaf, 0xeb38f000 + .word 0xbd3d1855, 0x6aa2da66 + .word 0xbfc8ab47, 0xd5f5a000 + .word 0xbd187eb8, 0x505d468f + .word 0xbfc897e2, 0xb17b1000 + .word 0xbd334a64, 0x63f9a0b1 + .word 0xbfc88480, 0x7ce56000 + .word 0xbd1c77ce, 0xf4a8712c + .word 0xbfc87121, 0x3750e000 + .word 0xbd3328eb, 0x42f9af75 + .word 0xbfc85dc4, 0xdfda7000 + .word 0xbd3785ab, 0x048301ba + .word 0xbfc84a6b, 0x759f5000 + .word 0xbd02ebfe, 0xa903cfb8 + .word 0xbfc83714, 0xf7bd0000 + .word 0xbd2ed83a, 0xf85a2ced + .word 0xbfc823c1, 0x6551a000 + .word 0xbd1e0ddb, 0x9a631e83 + .word 0xbfc81070, 0xbd7b9000 + .word 0xbcafe80a, 0x6682e646 + .word 0xbfc7fd22, 0xff599000 + .word 0xbd3a9d05, 0x02ea120c + .word 0xbfc7e9d8, 0x2a0b0000 + .word 0xbd116849, 0xfa40e4f0 + .word 0xbfc7d690, 0x3caf5000 + .word 0xbd359fca, 0x741e7f15 + .word 0xbfc7c34b, 0x3666a000 + .word 0xbd3175c9, 0x81b45e10 + .word 0xbfc7b009, 0x16515000 + .word 0xbd146280, 0xd3e606a3 + .word 0xbfc79cc9, 0xdb902000 + .word 0xbd1e00d0, 0x375e70bd + .word 0xbfc7898d, 0x85444000 + .word 0xbd38e67b, 0xe3dbaf3f + .word 0xbfc77654, 0x128f6000 + .word 0xbd0274ba, 0xdf268e7c + .word 0xbfc7631d, 0x82935000 + .word 0xbd350c41, 0x1c1d060f + .word 0xbfc74fe9, 0xd4729000 + .word 0xbd249736, 0xd91da11e + .word 0xbfc73cb9, 0x074fd000 + .word 0xbd04cab7, 0x97ffd2cc + .word 0xbfc7298b, 0x1a4e3000 + .word 0xbd15accc, 0xe43ce383 + .word 0xbfc71660, 0x0c914000 + .word 0xbce51b15, 0x7cec3838 + .word 0xbfc70337, 0xdd3ce000 + .word 0xbd206a17, 0x8a5eab9c + .word 0xbfc6f012, 0x8b756000 + .word 0xbd357739, 0x0d31ef0f + .word 0xbfc6dcf0, 0x165f8000 + .word 0xbd1b9566, 0x9a33e4c6 + .word 0xbfc6c9d0, 0x7d203000 + .word 0xbd3f8e30, 0x14099349 + .word 0xbfc6b6b3, 0xbedd1000 + .word 0xbd1a8f73, 0xa64d3813 + .word 0xbfc6a399, 0xdabbd000 + .word 0xbd1c1b2c, 0x6657a967 + .word 0xbfc69082, 0xcfe2b000 + .word 0xbd2da1e7, 0x20b79662 + .word 0xbfc67d6e, 0x9d785000 + .word 0xbd2dc2ef, 0x9eb1f25a + .word 0xbfc66a5d, 0x42a3a000 + .word 0xbd3a6893, 0x3aa00298 + .word 0xbfc6574e, 0xbe8c1000 + .word 0xbd19cf8b, 0x2c3c2e78 + .word 0xbfc64443, 0x10594000 + .word 0xbd22f605, 0xb0281916 + .word 0xbfc6313a, 0x37335000 + .word 0xbd3aec82, 0xac378565 + .word 0xbfc61e34, 0x3242d000 + .word 0xbd32bb2d, 0x97ecd861 + .word 0xbfc60b31, 0x00b09000 + .word 0xbd21d752, 0x6cee0fd8 + .word 0xbfc5f830, 0xa1a5c000 + .word 0xbd352268, 0x98ffc1bc + .word 0xbfc5e533, 0x144c1000 + .word 0xbd2c63e8, 0x189ade2b + .word 0xbfc5d238, 0x57cd7000 + .word 0xbd23530a, 0x5ba6e7ac + .word 0xbfc5bf40, 0x6b543000 + .word 0xbd3b63f7, 0x0525d9f9 + .word 0xbfc5ac4b, 0x4e0b2000 + .word 0xbd351709, 0xd7275f36 + .word 0xbfc59958, 0xff1d5000 + .word 0xbd178be9, 0xa258d7eb + .word 0xbfc58669, 0x7db62000 + .word 0xbd39e26c, 0x65e8cb44 + .word 0xbfc5737c, 0xc9018000 + .word 0xbd39baa7, 0xa6b887f6 + .word 0xbfc56092, 0xe02ba000 + .word 0xbd245850, 0x06899d98 + .word 0xbfc54dab, 0xc2610000 + .word 0xbd2746fe, 0xe5c8d0d8 + .word 0xbfc53ac7, 0x6ece9000 + .word 0xbd39ca8a, 0x2a8725d5 + .word 0xbfc527e5, 0xe4a1b000 + .word 0xbd2633e8, 0xe5697dc7 + .word 0xbfc51507, 0x2307f000 + .word 0xbd306b11, 0xecc0d77b + .word 0xbfc5022b, 0x292f6000 + .word 0xbd348a05, 0xff36a25b + .word 0xbfc4ef51, 0xf6466000 + .word 0xbd3bc83d, 0x21c8cd53 + .word 0xbfc4dc7b, 0x897bc000 + .word 0xbd0c79b6, 0x0ae1ff0f + .word 0xbfc4c9a7, 0xe1fe8000 + .word 0xbcff39f7, 0x50dbbb30 + .word 0xbfc4b6d6, 0xfefe2000 + .word 0xbd1522ec, 0xf56e7952 + .word 0xbfc4a408, 0xdfaa7000 + .word 0xbd33b41f, 0x86e5dd72 + .word 0xbfc4913d, 0x8333b000 + .word 0xbd258379, 0x54fdb678 + .word 0xbfc47e74, 0xe8ca5000 + .word 0xbd3ef836, 0xa48fdfcf + .word 0xbfc46baf, 0x0f9f5000 + .word 0xbd3b6d8c, 0xbe1bdef9 + .word 0xbfc458eb, 0xf6e3f000 + .word 0xbcf5c0fe, 0x1f2b8094 + .word 0xbfc4462b, 0x9dc9b000 + .word 0xbd1ede9d, 0x63b93e7a + .word 0xbfc4336e, 0x03829000 + .word 0xbd3ac363, 0xa859c2af + .word 0xbfc420b3, 0x2740f000 + .word 0xbd3ba75f, 0x4de97ddf + .word 0xbfc40dfb, 0x08378000 + .word 0xbc9bb453, 0xc4f7b685 + .word 0xbfc3fb45, 0xa5992000 + .word 0xbd319713, 0xc0cae559 + .word 0xbfc3e892, 0xfe995000 + .word 0xbd2b6aad, 0x914d5249 + .word 0xbfc3d5e3, 0x126bc000 + .word 0xbd13fb2f, 0x85096c4b + .word 0xbfc3c335, 0xe0447000 + .word 0xbd3ae77d, 0x114a8b5f + .word 0xbfc3b08b, 0x6757f000 + .word 0xbd15485c, 0x35b37c15 + .word 0xbfc39de3, 0xa6dae000 + .word 0xbd284fc7, 0x32ce95f1 + .word 0xbfc38b3e, 0x9e027000 + .word 0xbd21e21f, 0x5747d00e + .word 0xbfc3789c, 0x4c041000 + .word 0xbd19b4f4, 0x44d31e60 + .word 0xbfc365fc, 0xb0159000 + .word 0xbcc62fa8, 0x234b7289 + .word 0xbfc3535f, 0xc96d1000 + .word 0xbd013f1c, 0x3b1fab68 + .word 0xbfc340c5, 0x97411000 + .word 0xbd20b846, 0x104c58f3 + .word 0xbfc32e2e, 0x18c86000 + .word 0xbd3e6220, 0x6c327115 + .word 0xbfc31b99, 0x4d3a4000 + .word 0xbd3f098e, 0xe3a50810 + .word 0xbfc30907, 0x33ce3000 + .word 0xbd33f323, 0x7c4d853e + .word 0xbfc2f677, 0xcbbc0000 + .word 0xbd352b30, 0x2160f40d + .word 0xbfc2e3eb, 0x143bf000 + .word 0xbd218910, 0x2710016e + .word 0xbfc2d161, 0x0c868000 + .word 0xbd039d6c, 0xcb81b4a1 + .word 0xbfc2bed9, 0xb3d49000 + .word 0xbd095245, 0x4a40d26b + .word 0xbfc2ac55, 0x095f5000 + .word 0xbd38b2e6, 0x4bce4dd6 + .word 0xbfc299d3, 0x0c606000 + .word 0xbd3d4d00, 0x79dc08d9 + .word 0xbfc28753, 0xbc11a000 + .word 0xbd37494e, 0x359302e6 + .word 0xbfc274d7, 0x17ad4000 + .word 0xbd38a65b, 0xa0967592 + .word 0xbfc2625d, 0x1e6dd000 + .word 0xbd3ead69, 0xd0f61c28 + .word 0xbfc24fe5, 0xcf8e4000 + .word 0xbd318f96, 0x26b10d30 + .word 0xbfc23d71, 0x2a49c000 + .word 0xbd100d23, 0x8fd3df5c + .word 0xbfc22aff, 0x2ddbd000 + .word 0xbd32e1ea, 0xca7cb4f0 + .word 0xbfc2188f, 0xd9807000 + .word 0xbd131786, 0x02bce3fb + .word 0xbfc20623, 0x2c73c000 + .word 0xbd2351a5, 0x02bb95f5 + .word 0xbfc1f3b9, 0x25f25000 + .word 0xbd3a822c, 0x593df273 + .word 0xbfc1e151, 0xc5391000 + .word 0xbd38e5f5, 0xf578d80e + .word 0xbfc1ceed, 0x09853000 + .word 0xbd2d47c7, 0x8dcdaa0e + .word 0xbfc1bc8a, 0xf2143000 + .word 0xbd2acd64, 0xfb955458 + .word 0xbfc1aa2b, 0x7e23f000 + .word 0xbd2ca78e, 0x44389934 + .word 0xbfc197ce, 0xacf2a000 + .word 0xbd31ab14, 0x4caf6736 + .word 0xbfc18574, 0x7dbec000 + .word 0xbd3e6744, 0x45bd9b49 + .word 0xbfc1731c, 0xefc74000 + .word 0xbcfde27c, 0xd98317fd + .word 0xbfc160c8, 0x024b2000 + .word 0xbd2ec2d2, 0xa9009e3d + .word 0xbfc14e75, 0xb489f000 + .word 0xbd3fdf84, 0x66dfe192 + .word 0xbfc13c26, 0x05c39000 + .word 0xbd318501, 0x13584d7c + .word 0xbfc129d8, 0xf5381000 + .word 0xbd1d77cc, 0x415a172e + .word 0xbfc1178e, 0x8227e000 + .word 0xbd21ef78, 0xce2d07f2 + .word 0xbfc10546, 0xabd3d000 + .word 0xbd00189b, 0x51d162e8 + .word 0xbfc0f301, 0x717cf000 + .word 0xbcff64bb, 0xe51793b4 + .word 0xbfc0e0be, 0xd264a000 + .word 0xbd3bafe2, 0x3aeb549c + .word 0xbfc0ce7e, 0xcdccc000 + .word 0xbd14652d, 0xabff5447 + .word 0xbfc0bc41, 0x62f73000 + .word 0xbd36ca04, 0x73bd9c29 + .word 0xbfc0aa06, 0x91267000 + .word 0xbd2755cc, 0x51f9bdae + .word 0xbfc097ce, 0x579d2000 + .word 0xbce33742, 0xda652881 + .word 0xbfc08598, 0xb59e3000 + .word 0xbd340d11, 0x47fb37ea + .word 0xbfc07365, 0xaa6d1000 + .word 0xbd16e172, 0x43f1226a + .word 0xbfc06135, 0x354d4000 + .word 0xbd363046, 0x28340ee9 + .word 0xbfc04f07, 0x5582d000 + .word 0xbd1a3d31, 0x4c780403 + .word 0xbfc03cdc, 0x0a51e000 + .word 0xbd381a9c, 0xf169fc5c + .word 0xbfc02ab3, 0x52ff2000 + .word 0xbd27ce63, 0x5d569b2b + .word 0xbfc0188d, 0x2ecf6000 + .word 0xbd03f965, 0x1cff9dfe + .word 0xbfc00669, 0x9d07c000 + .word 0xbd3b8775, 0x304686e1 + .word 0xbfbfe891, 0x39dbd000 + .word 0xbd159653, 0x60bdea07 + .word 0xbfbfc454, 0x5b8f0000 + .word 0xbd29cba7, 0xd5591204 + .word 0xbfbfa01c, 0x9db57000 + .word 0xbd29c32b, 0x816dd634 + .word 0xbfbf7be9, 0xfedbf000 + .word 0xbd2bcbe8, 0xb535310e + .word 0xbfbf57bc, 0x7d900000 + .word 0xbd176a6c, 0x9ea8b04e + .word 0xbfbf3394, 0x185fa000 + .word 0xbd1ea383, 0x09d097b7 + .word 0xbfbf0f70, 0xcdd99000 + .word 0xbd0718fb, 0x613960ee + .word 0xbfbeeb52, 0x9c8d1000 + .word 0xbd0b6260, 0x903c8f99 + .word 0xbfbec739, 0x830a1000 + .word 0xbcf1fcba, 0x80cdd0fe + .word 0xbfbea325, 0x7fe10000 + .word 0xbd2ef30d, 0x47e4627a + .word 0xbfbe7f16, 0x91a32000 + .word 0xbd2a7c74, 0xc871080d + .word 0xbfbe5b0c, 0xb6e22000 + .word 0xbd109021, 0x3b34d95f + .word 0xbfbe3707, 0xee304000 + .word 0xbd20f684, 0xe6766abd + .word 0xbfbe1308, 0x36208000 + .word 0xbd21aeea, 0xf90019f9 + .word 0xbfbdef0d, 0x8d466000 + .word 0xbd2b715f, 0x7da2cb17 + .word 0xbfbdcb17, 0xf2361000 + .word 0xbd226a0a, 0x5ba47956 + .word 0xbfbda727, 0x63844000 + .word 0xbd1a8940, 0x1fa71733 + .word 0xbfbd833b, 0xdfc64000 + .word 0xbd24805c, 0x07408695 + .word 0xbfbd5f55, 0x65921000 + .word 0xbcec4739, 0x830a8d2a + .word 0xbfbd3b73, 0xf37e1000 + .word 0xbd2f3501, 0x33da5007 + .word 0xbfbd1797, 0x88219000 + .word 0xbd0b219d, 0xaf7df76b + .word 0xbfbcf3c0, 0x22142000 + .word 0xbce9d2b6, 0x6ddd996f + .word 0xbfbccfed, 0xbfee1000 + .word 0xbd0d4119, 0x7f3892ad + .word 0xbfbcac20, 0x60484000 + .word 0xbd2d53ed, 0xcc4f420b + .word 0xbfbc8858, 0x01bc4000 + .word 0xbd2646d1, 0xc65aacd3 + .word 0xbfbc6494, 0xa2e41000 + .word 0xbd214bd1, 0x564189cb + .word 0xbfbc40d6, 0x425a5000 + .word 0xbd296224, 0x3a3261b9 + .word 0xbfbc1d1c, 0xdeba5000 + .word 0xbd02f7e7, 0x23a02373 + .word 0xbfbbf968, 0x769fc000 + .word 0xbd24218c, 0x8d824283 + .word 0xbfbbd5b9, 0x08a72000 + .word 0xbd2236aa, 0x3ae84f31 + .word 0xbfbbb20e, 0x936d6000 + .word 0xbd22e8af, 0x9574c8e4 + .word 0xbfbb8e69, 0x15901000 + .word 0xbd22bef7, 0xf208fbd9 + .word 0xbfbb6ac8, 0x8dad5000 + .word 0xbd2637bf, 0xea044b8d + .word 0xbfbb472c, 0xfa63e000 + .word 0xbd1246f5, 0xc7f4588b + .word 0xbfbb2396, 0x5a52f000 + .word 0xbd2e009b, 0x115ec8f8 + .word 0xbfbb0004, 0xac1a8000 + .word 0xbd1aaf97, 0x037f2b35 + .word 0xbfbadc77, 0xee5ae000 + .word 0xbd25189b, 0xec79cdf7 + .word 0xbfbab8f0, 0x1fb52000 + .word 0xbd27f69d, 0xd23d3ac2 + .word 0xbfba956d, 0x3ecad000 + .word 0xbd2cc6f2, 0x9805895f + .word 0xbfba71ef, 0x4a3e2000 + .word 0xbd1bbc94, 0x7b201fbf + .word 0xbfba4e76, 0x40b1b000 + .word 0xbd286f52, 0x51aefe0e + .word 0xbfba2b02, 0x20c8e000 + .word 0xbd17d329, 0x8e6b7dbf + .word 0xbfba0792, 0xe9277000 + .word 0xbd2958c6, 0x4d94ab90 + .word 0xbfb9e428, 0x9871e000 + .word 0xbd22c483, 0xd0942b9c + .word 0xbfb9c0c3, 0x2d4d2000 + .word 0xbd1520fd, 0x85f1e661 + .word 0xbfb99d62, 0xa65eb000 + .word 0xbd22dd17, 0xd834450a + .word 0xbfb97a07, 0x024cb000 + .word 0xbd2ce867, 0xd19bed86 + .word 0xbfb956b0, 0x3fbdd000 + .word 0xbd286fb6, 0x03fe1b67 + .word 0xbfb9335e, 0x5d594000 + .word 0xbd23115c, 0x3abd47da + .word 0xbfb91011, 0x59c6c000 + .word 0xbd27af17, 0x9df80b59 + .word 0xbfb8ecc9, 0x33aeb000 + .word 0xbd1ba18c, 0x833010ab + .word 0xbfb8c985, 0xe9b9e000 + .word 0xbd290791, 0x0379ff94 + .word 0xbfb8a647, 0x7a91d000 + .word 0xbd285181, 0x5f37adbf + .word 0xbfb8830d, 0xe4e08000 + .word 0xbd05f60b, 0x79c8f66a + .word 0xbfb85fd9, 0x27506000 + .word 0xbd248fcf, 0xccd1e7c7 + .word 0xbfb83ca9, 0x408ca000 + .word 0xbd2326c8, 0xd744c7d1 + .word 0xbfb8197e, 0x2f40e000 + .word 0xbd0f80dc, 0xf96ffdf7 + .word 0xbfb7f657, 0xf2194000 + .word 0xbd21bef9, 0x43faf4d2 + .word 0xbfb7d336, 0x87c29000 + .word 0xbd0e4461, 0xf3833832 + .word 0xbfb7b019, 0xeeea0000 + .word 0xbd275649, 0xaee848d4 + .word 0xbfb78d02, 0x263d8000 + .word 0xbd069b57, 0x94b69fb7 + .word 0xbfb769ef, 0x2c6b5000 + .word 0xbd1a35d8, 0xc73b6a55 + .word 0xbfb746e1, 0x00226000 + .word 0xbd2db25d, 0x23c3bc5b + .word 0xbfb723d7, 0xa0123000 + .word 0xbd2c3cbb, 0x84fef08e + .word 0xbfb700d3, 0x0aeac000 + .word 0xbcec1e8d, 0xa99ded32 + .word 0xbfb6ddd3, 0x3f5c7000 + .word 0xbd2aeb06, 0x82906a06 + .word 0xbfb6bad8, 0x3c188000 + .word 0xbd0daf3c, 0xc08926ae + .word 0xbfb697e1, 0xffd06000 + .word 0xbd296c57, 0x15a12bb6 + .word 0xbfb674f0, 0x89365000 + .word 0xbd24f332, 0x993a6604 + .word 0xbfb65203, 0xd6fcf000 + .word 0xbd1ea006, 0x8199326b + .word 0xbfb62f1b, 0xe7d77000 + .word 0xbd1d0cd5, 0x02538764 + .word 0xbfb60c38, 0xba799000 + .word 0xbd1172c4, 0x3aec1296 + .word 0xbfb5e95a, 0x4d979000 + .word 0xbcfcb7ce, 0x1d171711 + .word 0xbfb5c680, 0x9fe63000 + .word 0xbd23c479, 0x935581b6 + .word 0xbfb5a3ab, 0xb01ad000 + .word 0xbd2c4ae9, 0x3cd5f430 + .word 0xbfb580db, 0x7ceb5000 + .word 0xbd1c07f6, 0xcbe60d53 + .word 0xbfb55e10, 0x050e0000 + .word 0xbd0c1d74, 0x0c53c72e + .word 0xbfb53b49, 0x4739c000 + .word 0xbd221868, 0x5306aaa5 + .word 0xbfb51887, 0x42261000 + .word 0xbd0850ec, 0xb12c59ec + .word 0xbfb4f5c9, 0xf48ad000 + .word 0xbd0580c1, 0x2c81f8fd + .word 0xbfb4d311, 0x5d207000 + .word 0xbd2d58bb, 0x4fa163c2 + .word 0xbfb4b05d, 0x7aa01000 + .word 0xbd07029c, 0x6ef93715 + .word 0xbfb48dae, 0x4bc31000 + .word 0xbcb85b20, 0x8c200bea + .word 0xbfb46b03, 0xcf437000 + .word 0xbd2787a5, 0x2f0f6296 + .word 0xbfb4485e, 0x03dbd000 + .word 0xbd2f5a8d, 0xd1a4d56e + .word 0xbfb425bc, 0xe8474000 + .word 0xbd2365ac, 0x5219daef + .word 0xbfb40320, 0x7b414000 + .word 0xbd26fd84, 0xaa8157c0 + .word 0xbfb3e088, 0xbb85f000 + .word 0xbd248068, 0xbdc331fa + .word 0xbfb3bdf5, 0xa7d1e000 + .word 0xbd2cc85e, 0xa5db4ed7 + .word 0xbfb39b67, 0x3ee24000 + .word 0xbd0a759b, 0xa99f5667 + .word 0xbfb378dd, 0x7f749000 + .word 0xbd1c5044, 0xa3c7eb28 + .word 0xbfb35658, 0x68470000 + .word 0xbd2464d7, 0x0035b508 + .word 0xbfb333d7, 0xf8183000 + .word 0xbd2e96d4, 0x957e477c + .word 0xbfb3115c, 0x2da75000 + .word 0xbd25bc37, 0x00651448 + .word 0xbfb2eee5, 0x07b40000 + .word 0xbd08081e, 0xdd77c860 + .word 0xbfb2cc72, 0x84fe5000 + .word 0xbd2e38bd, 0x0cb32a28 + .word 0xbfb2aa04, 0xa4471000 + .word 0xbd1e922e, 0xa2c72d06 + .word 0xbfb2879b, 0x644f5000 + .word 0xbd1752b6, 0xf65943ec + .word 0xbfb26536, 0xc3d8c000 + .word 0xbd0b4bac, 0x097c5ba3 + .word 0xbfb242d6, 0xc1a58000 + .word 0xbd24b838, 0xac648481 + .word 0xbfb2207b, 0x5c785000 + .word 0xbd127633, 0xf0431efb + .word 0xbfb1fe24, 0x93144000 + .word 0xbd27a374, 0xe1a7c696 + .word 0xbfb1dbd2, 0x643d1000 + .word 0xbd221649, 0xb2ef8928 + .word 0xbfb1b984, 0xceb6e000 + .word 0xbd121a31, 0x2f307601 + .word 0xbfb1973b, 0xd1465000 + .word 0xbd159b45, 0x53e4c2cb + .word 0xbfb174f7, 0x6ab09000 + .word 0xbcf71031, 0x7ee2e483 + .word 0xbfb152b7, 0x99bb3000 + .word 0xbd299135, 0xbe3f3df6 + .word 0xbfb1307c, 0x5d2c7000 + .word 0xbd2357c9, 0xfa3dbf1f + .word 0xbfb10e45, 0xb3cae000 + .word 0xbd20612d, 0xaf6b9737 + .word 0xbfb0ec13, 0x9c5da000 + .word 0xbd180247, 0xe54ebd73 + .word 0xbfb0c9e6, 0x15ac4000 + .word 0xbd2c2da8, 0x0974d976 + .word 0xbfb0a7bd, 0x1e7ef000 + .word 0xbd20f926, 0xcdf8dfb4 + .word 0xbfb08598, 0xb59e3000 + .word 0xbd240d11, 0x47fb37ea + .word 0xbfb06378, 0xd9d32000 + .word 0xbd104990, 0x672b0729 + .word 0xbfb0415d, 0x89e74000 + .word 0xbd1111c0, 0x5cf1d753 + .word 0xbfb01f46, 0xc4a4a000 + .word 0xbd11157c, 0x89ecf845 + .word 0xbfaffa69, 0x11ab9000 + .word 0xbcf80464, 0xc1c0d47a + .word 0xbfafb64d, 0xaa8b6000 + .word 0xbd13830d, 0xaeb373e0 + .word 0xbfaf723b, 0x517fc000 + .word 0xbd048a79, 0x154f796a + .word 0xbfaf2e32, 0x04209000 + .word 0xbcfb9ba8, 0x2f4d6e7f + .word 0xbfaeea31, 0xc006b000 + .word 0xbd10f760, 0xd81b6242 + .word 0xbfaea63a, 0x82cc0000 + .word 0xbd19f144, 0x08e210e7 + .word 0xbfae624c, 0x4a0b5000 + .word 0xbd1c368e, 0x2e6265dd + .word 0xbfae1e67, 0x13606000 + .word 0xbd1a0d3c, 0xb7b141db + .word 0xbfadda8a, 0xdc67e000 + .word 0xbd1c9ca7, 0x364c37a2 + .word 0xbfad96b7, 0xa2bf8000 + .word 0xbd12eb81, 0xf49d3d78 + .word 0xbfad52ed, 0x6405d000 + .word 0xbd10de8b, 0x575910a6 + .word 0xbfad0f2c, 0x1dda6000 + .word 0xbd0c6fc7, 0x04385ddf + .word 0xbfaccb73, 0xcdddb000 + .word 0xbcf65c36, 0xe09f5fe2 + .word 0xbfac87c4, 0x71b12000 + .word 0xbd13799a, 0xf29d923d + .word 0xbfac441e, 0x06f72000 + .word 0xbd153c7d, 0x26143455 + .word 0xbfac0080, 0x8b530000 + .word 0xbd003c05, 0x63baea2e + .word 0xbfabbceb, 0xfc68f000 + .word 0xbd0080f2, 0xe79d07ab + .word 0xbfab7960, 0x57de2000 + .word 0xbd0f5af1, 0xf7b24d0f + .word 0xbfab35dd, 0x9b58b000 + .word 0xbd1559d3, 0x5b3d5639 + .word 0xbfaaf263, 0xc47fb000 + .word 0xbd085458, 0x172a97ad + .word 0xbfaaaef2, 0xd0fb1000 + .word 0xbcdf8346, 0xa77685c1 + .word 0xbfaa6b8a, 0xbe73a000 + .word 0xbd1e988d, 0x46e25c90 + .word 0xbfaa282b, 0x8a936000 + .word 0xbce70a67, 0xf10371d7 + .word 0xbfa9e4d5, 0x3304e000 + .word 0xbcfec4a6, 0x991acef2 + .word 0xbfa9a187, 0xb573d000 + .word 0xbd1cf746, 0xc4ec9bca + .word 0xbfa95e43, 0x0f8ce000 + .word 0xbd01774c, 0x225e2c8d + .word 0xbfa91b07, 0x3efd7000 + .word 0xbcf8a0eb, 0x0224d5a9 + .word 0xbfa8d7d4, 0x4173f000 + .word 0xbcf24a7b, 0x7a089116 + .word 0xbfa894aa, 0x149fb000 + .word 0xbcfa19a8, 0xbe97660a + .word 0xbfa85188, 0xb630f000 + .word 0xbcca0544, 0x165f80aa + .word 0xbfa80e70, 0x23d8c000 + .word 0xbd1988fa, 0x435d02ec + .word 0xbfa7cb60, 0x5b495000 + .word 0xbcfc8af3, 0x69d6d0f4 + .word 0xbfa78859, 0x5a357000 + .word 0xbd0ee9e5, 0xef898b68 + .word 0xbfa7455b, 0x1e511000 + .word 0xbcfb28ce, 0xb91e296d + .word 0xbfa70265, 0xa550e000 + .word 0xbd0ddc83, 0xb80a8c63 + .word 0xbfa6bf78, 0xecea9000 + .word 0xbd163cc0, 0x0f16f7e9 + .word 0xbfa67c94, 0xf2d4b000 + .word 0xbd16b082, 0x09f3282f + .word 0xbfa639b9, 0xb4c6b000 + .word 0xbd14f37b, 0x6b7f9673 + .word 0xbfa5f6e7, 0x3078e000 + .word 0xbd1f6f4a, 0xffdb6d69 + .word 0xbfa5b41d, 0x63a49000 + .word 0xbd0abcc4, 0x7e8a0c20 + .word 0xbfa5715c, 0x4c03c000 + .word 0xbd1dddc8, 0x80ee2760 + .word 0xbfa52ea3, 0xe7519000 + .word 0xbd16ff79, 0x68012363 + .word 0xbfa4ebf4, 0x3349e000 + .word 0xbcf37578, 0x4620c465 + .word 0xbfa4a94d, 0x2da96000 + .word 0xbd18ace0, 0x8a56ed78 + .word 0xbfa466ae, 0xd42de000 + .word 0xbcff4c64, 0x521016be + .word 0xbfa42419, 0x2495d000 + .word 0xbd05f329, 0x88dd64a6 + .word 0xbfa3e18c, 0x1ca0a000 + .word 0xbd1d23b4, 0xfdb8de39 + .word 0xbfa39f07, 0xba0eb000 + .word 0xbd1ac4a7, 0x590b95de + .word 0xbfa35c8b, 0xfaa13000 + .word 0xbccabeaf, 0x7cf59aac + .word 0xbfa31a18, 0xdc1a1000 + .word 0xbd07dd58, 0xd860ceab + .word 0xbfa2d7ae, 0x5c3c5000 + .word 0xbd175b1a, 0xe989664c + .word 0xbfa2954c, 0x78cbc000 + .word 0xbd1c3526, 0x570c1572 + .word 0xbfa252f3, 0x2f8d1000 + .word 0xbd107d35, 0xc0436cf5 + .word 0xbfa210a2, 0x7e45c000 + .word 0xbcf8ceca, 0x131bef9c + .word 0xbfa1ce5a, 0x62bc3000 + .word 0xbd04e63c, 0x6c6fccc5 + .word 0xbfa18c1a, 0xdab7b000 + .word 0xbcf22af4, 0xd32f2ac0 + .word 0xbfa149e3, 0xe4005000 + .word 0xbd1519d5, 0x96fa5c0c + .word 0xbfa107b5, 0x7c5f2000 + .word 0xbd152b81, 0xe94af0a6 + .word 0xbfa0c58f, 0xa19df000 + .word 0xbd155317, 0x53a74377 + .word 0xbfa08372, 0x51877000 + .word 0xbd1cc91e, 0xb2004222 + .word 0xbfa0415d, 0x89e74000 + .word 0xbd0111c0, 0x5cf1d753 + .word 0xbf9ffea2, 0x91136000 + .word 0xbd04dd01, 0xd7640dc2 + .word 0xbf9f7a9b, 0x16782000 + .word 0xbd00ab64, 0x9c6f9f5c + .word 0xbf9ef6a4, 0x9f98f000 + .word 0xbd0671e4, 0xe8f151a3 + .word 0xbf9e72bf, 0x2813c000 + .word 0xbd0ca2ba, 0xda22cae5 + .word 0xbf9deeea, 0xab883000 + .word 0xbd0c6e1d, 0x7741b591 + .word 0xbf9d6b27, 0x25979000 + .word 0xbd000425, 0x79723e3d + .word 0xbf9ce774, 0x91e4d000 + .word 0xbd00d7ce, 0xf3d25198 + .word 0xbf9c63d2, 0xec14a000 + .word 0xbd05e318, 0xfe7acbca + .word 0xbf9be042, 0x2fcd6000 + .word 0xbd01ec42, 0x87f2c9ca + .word 0xbf9b5cc2, 0x58b71000 + .word 0xbd01cc23, 0x715f7fd0 + .word 0xbf9ad953, 0x627b6000 + .word 0xbd0ab5a1, 0x1a805efd + .word 0xbf9a55f5, 0x48c5c000 + .word 0xbcf0fc7b, 0x0697e1b5 + .word 0xbf99d2a8, 0x07432000 + .word 0xbcf7cf80, 0x538b441e + .word 0xbf994f6b, 0x99a24000 + .word 0xbcf1d5ef, 0x96cf7f51 + .word 0xbf98cc3f, 0xfb937000 + .word 0xbd050394, 0x323f2c7a + .word 0xbf984925, 0x28c8c000 + .word 0xbd057d17, 0x3697cf30 + .word 0xbf97c61b, 0x1cf5d000 + .word 0xbd0dc0dc, 0x1ed96ee4 + .word 0xbf974321, 0xd3d00000 + .word 0xbcfb4a69, 0x0fe94778 + .word 0xbf96c039, 0x490e3000 + .word 0xbcff7b34, 0x02fd59ca + .word 0xbf963d61, 0x78690000 + .word 0xbd07abf3, 0x89596542 + .word 0xbf95ba9a, 0x5d9ac000 + .word 0xbcacbb84, 0xe08d78ac + .word 0xbf9537e3, 0xf45f3000 + .word 0xbcf592ce, 0x96bf9299 + .word 0xbf94b53e, 0x3873e000 + .word 0xbd0b6ee9, 0xbca265c1 + .word 0xbf9432a9, 0x25980000 + .word 0xbd098139, 0x928637fe + .word 0xbf93b024, 0xb78c5000 + .word 0xbcf9a5e2, 0x3a02f82a + .word 0xbf932db0, 0xea132000 + .word 0xbd0c432c, 0x4c2257ef + .word 0xbf92ab4d, 0xb8f09000 + .word 0xbcf82c84, 0xa532c74c + .word 0xbf9228fb, 0x1fea2000 + .word 0xbd0c4f8c, 0xa12647f9 + .word 0xbf91a6b9, 0x1ac73000 + .word 0xbcec30e9, 0xb54e2dd6 + .word 0xbf912487, 0xa5507000 + .word 0xbd0edf2f, 0xf6a59c94 + .word 0xbf90a266, 0xbb508000 + .word 0xbcfa5be1, 0x7c2ec500 + .word 0xbf902056, 0x58935000 + .word 0xbd008e93, 0xe47420b7 + .word 0xbf8f3cac, 0xf1cd3000 + .word 0xbcf64d83, 0xc9a6875d + .word 0xbf8e38ce, 0x30333000 + .word 0xbcc0bbae, 0x12ebf308 + .word 0xbf8d3510, 0x63fa4000 + .word 0xbcea8d92, 0xdf000beb + .word 0xbf8c3173, 0x84c75000 + .word 0xbcfe0cc0, 0x31046026 + .word 0xbf8b2df7, 0x8a428000 + .word 0xbcf4c647, 0xa5d4542f + .word 0xbf8a2a9c, 0x6c170000 + .word 0xbce18876, 0x525971be + .word 0xbf892762, 0x21f33000 + .word 0xbcd456ba, 0x9344a27f + .word 0xbf882448, 0xa388a000 + .word 0xbcd55104, 0xb16137f1 + .word 0xbf87214f, 0xe88c0000 + .word 0xbcf27275, 0xd7338080 + .word 0xbf861e77, 0xe8b53000 + .word 0xbcff8c11, 0x507150cb + .word 0xbf851bc0, 0x9bbf4000 + .word 0xbcdae1ea, 0x5258a3c6 + .word 0xbf841929, 0xf9683000 + .word 0xbcd77c75, 0x5d013688 + .word 0xbf8316b3, 0xf9714000 + .word 0xbcfb8dcc, 0x8ba5563d + .word 0xbf82145e, 0x939ef000 + .word 0xbcce891c, 0x6274ffda + .word 0xbf811229, 0xbfb89000 + .word 0xbcf50ee4, 0x5fd053b1 + .word 0xbf801015, 0x7588d000 + .word 0xbcfce251, 0x998b505f + .word 0xbf7e1c43, 0x59bad000 + .word 0xbce9f504, 0xadbb6021 + .word 0xbf7c189c, 0xbb0e2000 + .word 0xbcdfeabb, 0x69dea7ed + .word 0xbf7a1536, 0xfeb35000 + .word 0xbcecb8e8, 0x91b69c25 + .word 0xbf781212, 0x14586000 + .word 0xbce6a81c, 0x14b9f937 + .word 0xbf760f2d, 0xebb16000 + .word 0xbcbb6835, 0x84891753 + .word 0xbf740c8a, 0x74787000 + .word 0xbce1c38e, 0xf838000c + .word 0xbf720a27, 0x9e6e0000 + .word 0xbce34d96, 0x922727aa + .word 0xbf700805, 0x59588000 + .word 0xbce66afc, 0xb31c67b2 + .word 0xbf6c0c47, 0x2a092000 + .word 0xbc657d36, 0x31cacba0 + .word 0xbf680904, 0x82898000 + .word 0xbcc701a5, 0xa9c30314 + .word 0xbf640642, 0x9be3c000 + .word 0xbcccf0de, 0xc26e96f3 + .word 0xbf600401, 0x55d58000 + .word 0xbcd13bce, 0x0ce3ddd8 + .word 0xbf580481, 0x20511000 + .word 0xbcc0a8ce, 0x7ceb0de6 + .word 0xbf500200, 0x55655000 + .word 0xbcc11266, 0xaf9afc3f + .word 0xbf400100, 0x15575000 + .word 0xbca62237, 0x79c0dc11 + .word 0x00000000, 0x00000000 + .word 0x00000000, 0x00000000 + .word 0x3f4ffc00, 0xaa8ab000 + .word 0x3c80fbc0, 0x4d051925 + .word 0x3f5ff802, 0xa9ab1000 + .word 0x3c8ccf14, 0xf1d0a9f2 + .word 0x3f67f704, 0x7d798000 + .word 0x3cbed344, 0xeb43240a + .word 0x3f6ff00a, 0xa2b10000 + .word 0x3cd78094, 0x10d6ad37 + .word 0x3f73f38a, 0x60f06000 + .word 0x3cd22569, 0x3c937494 + .word 0x3f77ee11, 0xebd82000 + .word 0x3ced274f, 0x0b48e81d + .word 0x3f7be79c, 0x70058000 + .word 0x3ced91f3, 0x4d808088 + .word 0x3f7fe02a, 0x6b106000 + .word 0x3cde23f0, 0xdda40e47 + .word 0x3f81ebde, 0x2d199000 + .word 0x3cef97c0, 0x0b723c9a + .word 0x3f83e729, 0x5d25a000 + .word 0x3cef63e0, 0x0d65eebc + .word 0x3f85e1f7, 0x03ecb000 + .word 0x3cfca09f, 0x585da1b5 + .word 0x3f87dc47, 0x5f810000 + .word 0x3cf4edba, 0x4a25e0b1 + .word 0x3f89d61a, 0xadc6b000 + .word 0x3cfb1963, 0x27b4256d + .word 0x3f8bcf71, 0x2c743000 + .word 0x3cf09782, 0x5ef65dc3 + .word 0x3f8dc84b, 0x19123000 + .word 0x3cf02950, 0x78e96cc1 + .word 0x3f8fc0a8, 0xb0fc0000 + .word 0x3cdf1e7c, 0xf6d3a69c + .word 0x3f90dc45, 0x18afc000 + .word 0x3d090f43, 0x1ff3b010 + .word 0x3f91d7f7, 0xeb9ee000 + .word 0x3d07cd8a, 0xf80670b5 + .word 0x3f92d36c, 0xefb55000 + .word 0x3cff0bb3, 0x41706c38 + .word 0x3f93cea4, 0x4346a000 + .word 0x3cf5d3bc, 0xd295bf53 + .word 0x3f94c99e, 0x04901000 + .word 0x3d0bd98c, 0xbbebe949 + .word 0x3f95c45a, 0x51b8d000 + .word 0x3cec449d, 0xe927827c + .word 0x3f96bed9, 0x48d1b000 + .word 0x3cff43be, 0x9f5bc086 + .word 0x3f97b91b, 0x07d5b000 + .word 0x3cd1aa92, 0x7f54c717 + .word 0x3f98b31f, 0xaca9b000 + .word 0x3c8c3ab4, 0x8db4decf + .word 0x3f99ace7, 0x551cc000 + .word 0x3cf45134, 0x09c1df81 + .word 0x3f9aa672, 0x1ee83000 + .word 0x3cf6a75a, 0xe2d7a49d + .word 0x3f9b9fc0, 0x27af9000 + .word 0x3cd97fbd, 0x465b7589 + .word 0x3f9c98d1, 0x8d00c000 + .word 0x3d0027ab, 0xe9d883c3 + .word 0x3f9d91a6, 0x6c543000 + .word 0x3d0987c5, 0x9633ee68 + .word 0x3f9e8a3e, 0xe30cd000 + .word 0x3d095817, 0x086b1c01 + .word 0x3f9f829b, 0x0e783000 + .word 0x3ce80267, 0xc7e09e3e + .word 0x3fa03d5d, 0x85e73000 + .word 0x3d1dde25, 0x83b4a73b + .word 0x3fa0b94f, 0x7c196000 + .word 0x3ce76769, 0x0fdd87d3 + .word 0x3fa13523, 0x78597000 + .word 0x3cef29e2, 0x4702d328 + .word 0x3fa1b0d9, 0x8923d000 + .word 0x3d12ff85, 0x945dd915 + .word 0x3fa22c71, 0xbcea8000 + .word 0x3cfd2818, 0xf87f888f + .word 0x3fa2a7ec, 0x2214e000 + .word 0x3d10e631, 0x0add3804 + .word 0x3fa32348, 0xc7001000 + .word 0x3d0a5b6e, 0x42c7927d + .word 0x3fa39e87, 0xb9feb000 + .word 0x3d1abf52, 0x02b64055 + .word 0x3fa419a9, 0x09593000 + .word 0x3d0ae6e3, 0x3ea4753a + .word 0x3fa494ac, 0xc34d9000 + .word 0x3ce1c78a, 0x56fd2473 + .word 0x3fa50f92, 0xf60f9000 + .word 0x3d12d9f6, 0x1523ffc6 + .word 0x3fa58a5b, 0xafc8e000 + .word 0x3d035231, 0xaa3d4b1d + .word 0x3fa60506, 0xfe98d000 + .word 0x3d1516fd, 0xf9ac7f28 + .word 0x3fa67f94, 0xf094b000 + .word 0x3d1b307c, 0xf9f93b5b + .word 0x3fa6fa05, 0x93c7b000 + .word 0x3d0a0af2, 0x0eb1a504 + .word 0x3fa77458, 0xf632d000 + .word 0x3d19f88c, 0x69e543dd + .word 0x3fa7ee8f, 0x25cd4000 + .word 0x3ce7bd3d, 0xcb47c2e4 + .word 0x3fa868a8, 0x3083f000 + .word 0x3d0b3b8b, 0xd96a72db + .word 0x3fa8e2a4, 0x243a1000 + .word 0x3d173dd6, 0x0284c920 + .word 0x3fa95c83, 0x0ec8e000 + .word 0x3cff5beb, 0x41d00a41 + .word 0x3fa9d644, 0xfdffa000 + .word 0x3cf3c905, 0x39a473b6 + .word 0x3faa4fe9, 0xffa3d000 + .word 0x3cf1a7b5, 0xfbfd6db2 + .word 0x3faac972, 0x21711000 + .word 0x3d1f1a7d, 0xe0264459 + .word 0x3fab42dd, 0x71197000 + .word 0x3cebec28, 0xd14c7d9f + .word 0x3fabbc2b, 0xfc44f000 + .word 0x3d005cf2, 0xdd7d04a2 + .word 0x3fac355d, 0xd0921000 + .word 0x3d1e5999, 0x357f0710 + .word 0x3facae72, 0xfb95c000 + .word 0x3cf0540d, 0xfda4e418 + .word 0x3fad276b, 0x8adb0000 + .word 0x3d16a423, 0xc78a64b0 + .word 0x3fada047, 0x8be39000 + .word 0x3cf2963d, 0x8fb7f02b + .word 0x3fae1907, 0x0c276000 + .word 0x3ca5b99b, 0x9d617a09 + .word 0x3fae91aa, 0x1914f000 + .word 0x3d10beaf, 0xf119cac5 + .word 0x3faf0a30, 0xc0116000 + .word 0x3cf5330b, 0xe64b8b77 + .word 0x3faf829b, 0x0e783000 + .word 0x3cf80267, 0xc7e09e3e + .word 0x3faffae9, 0x119b9000 + .word 0x3cf819ba, 0x13162a9c + .word 0x3fb0398d, 0x6b622000 + .word 0x3d153ac8, 0x0d00cc01 + .word 0x3fb07598, 0x3598e000 + .word 0x3d11c4c0, 0x6d2999e2 + .word 0x3fb0b194, 0xee0d1000 + .word 0x3d199ba9, 0x3da7b72e + .word 0x3fb0ed83, 0x9b552000 + .word 0x3d1bf82e, 0x4add5131 + .word 0x3fb12964, 0x4402e000 + .word 0x3d056224, 0x572ac464 + .word 0x3fb16536, 0xeea37000 + .word 0x3d25c1d0, 0xc4b82e7c + .word 0x3fb1a0fb, 0xa1bf8000 + .word 0x3d24a3fc, 0xc319d6dc + .word 0x3fb1dcb2, 0x63db1000 + .word 0x3d22889e, 0xbd3d1303 + .word 0x3fb2185b, 0x3b75a000 + .word 0x3cfce760, 0x70cdcfc5 + .word 0x3fb253f6, 0x2f0a1000 + .word 0x3d105be3, 0xeda69c04 + .word 0x3fb28f83, 0x450ed000 + .word 0x3d251aeb, 0x54232ed1 + .word 0x3fb2cb02, 0x83f5d000 + .word 0x3d2c3dc5, 0x94cae043 + .word 0x3fb30673, 0xf22c8000 + .word 0x3d24c9e2, 0x9dcf0ba5 + .word 0x3fb341d7, 0x961bd000 + .word 0x3cfd0929, 0x98376105 + .word 0x3fb37d2d, 0x76283000 + .word 0x3cfcfaab, 0x2400751e + .word 0x3fb3b875, 0x98b1b000 + .word 0x3d1bb7d4, 0xd6a6b9db + .word 0x3fb3f3b0, 0x04140000 + .word 0x3cee2474, 0xacdfcec5 + .word 0x3fb42edc, 0xbea64000 + .word 0x3d1bc0ee, 0xea7c9acd + .word 0x3fb469fb, 0xcebb5000 + .word 0x3d26cc78, 0x9e4ae327 + .word 0x3fb4a50d, 0x3aa1b000 + .word 0x3cd003d9, 0xeed183bb + .word 0x3fb4e011, 0x08a35000 + .word 0x3d25cb9f, 0xbe58b5c9 + .word 0x3fb51b07, 0x3f061000 + .word 0x3d207ed2, 0x4f1cd0d4 + .word 0x3fb555ef, 0xe40b5000 + .word 0x3ce692f1, 0x90d1c46b + .word 0x3fb590ca, 0xfdf01000 + .word 0x3d28509e, 0xae455754 + .word 0x3fb5cb98, 0x92ed4000 + .word 0x3d17be44, 0xa64fc52f + .word 0x3fb60658, 0xa9375000 + .word 0x3ce8763b, 0xdd389ef2 + .word 0x3fb6410b, 0x46fe7000 + .word 0x3d256038, 0x61a13976 + .word 0x3fb67bb0, 0x726ec000 + .word 0x3cef724b, 0x69ef5912 + .word 0x3fb6b648, 0x31afe000 + .word 0x3d1033d7, 0xb22085b8 + .word 0x3fb6f0d2, 0x8ae56000 + .word 0x3d269737, 0xc93373da + .word 0x3fb72b4f, 0x842ea000 + .word 0x3d21f666, 0x7fe6c45a + .word 0x3fb765bf, 0x23a6b000 + .word 0x3d2c2687, 0xf9477b53 + .word 0x3fb7a021, 0x6f649000 + .word 0x3d2c2499, 0x430831ff + .word 0x3fb7da76, 0x6d7b1000 + .word 0x3d066422, 0x240644d8 + .word 0x3fb814be, 0x23f8c000 + .word 0x3ccb2381, 0xda82fdfd + .word 0x3fb84ef8, 0x98e82000 + .word 0x3d205465, 0xb72d106e + .word 0x3fb88925, 0xd24fa000 + .word 0x3d2c55f5, 0x76088ff3 + .word 0x3fb8c345, 0xd6319000 + .word 0x3d2641eb, 0x596854cc + .word 0x3fb8fd58, 0xaa8c2000 + .word 0x3cf136fe, 0x4348da4e + .word 0x3fb9375e, 0x55595000 + .word 0x3d2dbb86, 0xe70186c9 + .word 0x3fb97156, 0xdc8f6000 + .word 0x3d0f01f3, 0x28123425 + .word 0x3fb9ab42, 0x46203000 + .word 0x3d0d66df, 0x661e3e7b + .word 0x3fb9e520, 0x97f9c000 + .word 0x3d235fac, 0xb52dd050 + .word 0x3fba1ef1, 0xd8061000 + .word 0x3d29a82e, 0xdbf2f796 + .word 0x3fba58b6, 0x0c2b2000 + .word 0x3d091c65, 0x1d1b06b1 + .word 0x3fba926d, 0x3a4ad000 + .word 0x3d158d94, 0x2f48aa71 + .word 0x3fbacc17, 0x68433000 + .word 0x3d0561f1, 0x7d2016d1 + .word 0x3fbb05b4, 0x9bee4000 + .word 0x3d0ff22c, 0x18f84a5e + .word 0x3fbb3f44, 0xdb221000 + .word 0x3d2fa2a7, 0xb1bc135d + .word 0x3fbb78c8, 0x2bb0e000 + .word 0x3d2b4210, 0x878cf032 + .word 0x3fbbb23e, 0x9368e000 + .word 0x3d22e9cf, 0x954c48ea + .word 0x3fbbeba8, 0x18146000 + .word 0x3d1d921d, 0x248382a6 + .word 0x3fbc2504, 0xbf79d000 + .word 0x3d1c5f13, 0x43bd2b70 + .word 0x3fbc5e54, 0x8f5bc000 + .word 0x3d1d0c57, 0x585fbe06 + .word 0x3fbc9797, 0x8d78e000 + .word 0x3d223fde, 0xd105cef9 + .word 0x3fbcd0cd, 0xbf8c1000 + .word 0x3d0f0a6d, 0xa86eba18 + .word 0x3fbd09f7, 0x2b4c4000 + .word 0x3d2048c0, 0x00354e33 + .word 0x3fbd4313, 0xd66cb000 + .word 0x3d0aeaf2, 0x1bb2a3b2 + .word 0x3fbd7c23, 0xc69cb000 + .word 0x3d0a046c, 0x8b35e23e + .word 0x3fbdb527, 0x0187d000 + .word 0x3d224ef0, 0xad5c303f + .word 0x3fbdee1d, 0x8cd5e000 + .word 0x3d2ae4bf, 0x1ac200ee + .word 0x3fbe2707, 0x6e2af000 + .word 0x3d072f4f, 0x543fff10 + .word 0x3fbe5fe4, 0xab272000 + .word 0x3d240a2c, 0x11600366 + .word 0x3fbe98b5, 0x49671000 + .word 0x3d119dd2, 0x27143a5b + .word 0x3fbed179, 0x4e837000 + .word 0x3d20175e, 0x45b17dbe + .word 0x3fbf0a30, 0xc0116000 + .word 0x3d05330b, 0xe64b8b77 + .word 0x3fbf42db, 0xa3a22000 + .word 0x3d29da91, 0x9a4127e6 + .word 0x3fbf7b79, 0xfec37000 + .word 0x3d2bbd9e, 0x05da04c0 + .word 0x3fbfb40b, 0xd6ff4000 + .word 0x3d2c0bec, 0xb7b53b5b + .word 0x3fbfec91, 0x31dbe000 + .word 0x3d257554, 0x5ca333f2 + .word 0x3fc01285, 0x0a6df000 + .word 0x3d395e79, 0xadfe901b + .word 0x3fc02ebb, 0x42bf3000 + .word 0x3d3a95c1, 0x68c7fc69 + .word 0x3fc04aeb, 0x449f6000 + .word 0x3d2afa90, 0x65ccd35c + .word 0x3fc06715, 0x12ca5000 + .word 0x3d32dc54, 0x3191fae2 + .word 0x3fc08338, 0xaffa2000 + .word 0x3d30533c, 0xac823e27 + .word 0x3fc09f56, 0x1ee71000 + .word 0x3d33867d, 0x4754172c + .word 0x3fc0bb6d, 0x6247a000 + .word 0x3d35464f, 0x3ccd04b3 + .word 0x3fc0d77e, 0x7cd08000 + .word 0x3d3cb2cd, 0x2ee2f482 + .word 0x3fc0f389, 0x7134b000 + .word 0x3d02e530, 0xbb6149cf + .word 0x3fc10f8e, 0x42253000 + .word 0x3d336263, 0xde634e7c + .word 0x3fc12b8c, 0xf2518000 + .word 0x3d348a4a, 0x13c0a0fc + .word 0x3fc14785, 0x84674000 + .word 0x3d156345, 0x1027c750 + .word 0x3fc16377, 0xfb124000 + .word 0x3d091e1a, 0xbf41763e + .word 0x3fc17f64, 0x58fca000 + .word 0x3d2843fa, 0xd093c8dc + .word 0x3fc19b4a, 0xa0ced000 + .word 0x3d03bedb, 0x4ef663a7 + .word 0x3fc1b72a, 0xd52f6000 + .word 0x3d2e80a4, 0x1811a396 + .word 0x3fc1d304, 0xf8c35000 + .word 0x3d164aec, 0x82ebbef7 + .word 0x3fc1eed9, 0x0e2dc000 + .word 0x3d161563, 0x7097648f + .word 0x3fc20aa7, 0x18102000 + .word 0x3d3f2c94, 0x348552fe + .word 0x3fc2266f, 0x190a5000 + .word 0x3d3596fa, 0xa3df8c05 + .word 0x3fc24231, 0x13ba5000 + .word 0x3cfc5ff8, 0x71162641 + .word 0x3fc25ded, 0x0abc6000 + .word 0x3d35a385, 0x4f176449 + .word 0x3fc279a3, 0x00ab4000 + .word 0x3d3ef432, 0xb3235108 + .word 0x3fc29552, 0xf81ff000 + .word 0x3d248d30, 0x1771c408 + .word 0x3fc2b0fc, 0xf3b1a000 + .word 0x3d177ca3, 0xe30a59ea + .word 0x3fc2cca0, 0xf5f5f000 + .word 0x3d128439, 0xb9403b82 + .word 0x3fc2e83f, 0x0180d000 + .word 0x3cee7aa7, 0xaf63c632 + .word 0x3fc303d7, 0x18e47000 + .word 0x3d3fa5fd, 0x28c704d4 + .word 0x3fc31f69, 0x3eb19000 + .word 0x3d32cc6c, 0x8d2e3482 + .word 0x3fc33af5, 0x75770000 + .word 0x3d3c9ecc, 0xa2fe72a5 + .word 0x3fc3567b, 0xbfc22000 + .word 0x3d3250d2, 0x53991a1f + .word 0x3fc371fc, 0x201e8000 + .word 0x3d3ee877, 0x9b2d8abc + .word 0x3fc38d76, 0x99164000 + .word 0x3d1844a5, 0x9e39bb70 + .word 0x3fc3a8eb, 0x2d31a000 + .word 0x3d1bafb7, 0x7d5d503e + .word 0x3fc3c459, 0xdef76000 + .word 0x3d3edc86, 0xf6b70d33 + .word 0x3fc3dfc2, 0xb0ecc000 + .word 0x3d28a72a, 0x62b8c13f + .word 0x3fc3fb25, 0xa5952000 + .word 0x3d3195be, 0x6b358ff7 + .word 0x3fc41682, 0xbf727000 + .word 0x3d377fdc, 0x7bf03db2 + .word 0x3fc431da, 0x01050000 + .word 0x3d304837, 0x836e0391 + .word 0x3fc44d2b, 0x6ccb7000 + .word 0x3d3a3ccf, 0xa7b2a1f1 + .word 0x3fc46877, 0x0542f000 + .word 0x3d03f5d0, 0x3957bc10 + .word 0x3fc483bc, 0xcce6e000 + .word 0x3d1eea52, 0x723f6369 + .word 0x3fc49efc, 0xc6313000 + .word 0x3d3cde14, 0xcc15551b + .word 0x3fc4ba36, 0xf39a5000 + .word 0x3d279568, 0x981bcc36 + .word 0x3fc4d56b, 0x5798e000 + .word 0x3d380580, 0x15a96555 + .word 0x3fc4f099, 0xf4a23000 + .word 0x3cf640d0, 0x50150d92 + .word 0x3fc50bc2, 0xcd29c000 + .word 0x3d1ada57, 0x28db8d4f + .word 0x3fc526e5, 0xe3a1b000 + .word 0x3d20de8b, 0x90075b8f + .word 0x3fc54203, 0x3a7a8000 + .word 0x3d268d68, 0xed855f0e + .word 0x3fc55d1a, 0xd4232000 + .word 0x3d3add94, 0xdda647e8 + .word 0x3fc5782c, 0xb3091000 + .word 0x3d28b739, 0x5d0d777d + .word 0x3fc59338, 0xd9982000 + .word 0x3cf0ba68, 0xb7555d4a + .word 0x3fc5ae3f, 0x4a3aa000 + .word 0x3d21ea25, 0xf012a8b9 + .word 0x3fc5c940, 0x07597000 + .word 0x3d15c9ad, 0xccb7337a + .word 0x3fc5e43b, 0x135bd000 + .word 0x3d278a96, 0x6224c79e + .word 0x3fc5ff30, 0x70a79000 + .word 0x3d1e9e43, 0x9f105039 + .word 0x3fc61a20, 0x21a0e000 + .word 0x3d3dd9dd, 0x1bdf3cdd + .word 0x3fc6350a, 0x28aaa000 + .word 0x3d2d5ec0, 0xab8163af + .word 0x3fc64fee, 0x8825f000 + .word 0x3d3896fc, 0xa298884b + .word 0x3fc66acd, 0x4272a000 + .word 0x3d3aa1bd, 0xbfc6c785 + .word 0x3fc685a6, 0x59eef000 + .word 0x3d3706ab, 0x49f7e6f6 + .word 0x3fc6a079, 0xd0f7a000 + .word 0x3d35a3f8, 0x448d14f5 + .word 0x3fc6bb47, 0xa9e80000 + .word 0x3d19f64d, 0x23ea3296 + .word 0x3fc6d60f, 0xe719d000 + .word 0x3d10e46a, 0xa3b2e266 + .word 0x3fc6f0d2, 0x8ae56000 + .word 0x3d369737, 0xc93373da + .word 0x3fc70b8f, 0x97a1a000 + .word 0x3d34ea64, 0xf6a95bef + .word 0x3fc72647, 0x0fa3f000 + .word 0x3d211641, 0xe3178b76 + .word 0x3fc740f8, 0xf5403000 + .word 0x3d2e9326, 0xcdfceabe + .word 0x3fc75ba5, 0x4ac8e000 + .word 0x3d3ddca5, 0x8bc4a7c0 + .word 0x3fc7764c, 0x128f2000 + .word 0x3d027490, 0x3479e3d1 + .word 0x3fc790ed, 0x4ee26000 + .word 0x3d199bbd, 0x4e7746f6 + .word 0x3fc7ab89, 0x0210d000 + .word 0x3d321237, 0xc6d65ad4 + .word 0x3fc7c61f, 0x2e673000 + .word 0x3d2b8da4, 0x99c82e40 + .word 0x3fc7e0af, 0xd630c000 + .word 0x3d139e7c, 0x1d8f1034 + .word 0x3fc7fb3a, 0xfbb75000 + .word 0x3d204815, 0xb73ec551 + .word 0x3fc815c0, 0xa1435000 + .word 0x3d2fab5a, 0x0dbfc630 + .word 0x3fc83040, 0xc91bc000 + .word 0x3d3e5b71, 0xc6e66f32 + .word 0x3fc84abb, 0x75865000 + .word 0x3d0392a9, 0x058ea173 + .word 0x3fc86530, 0xa8c70000 + .word 0x3d398bb0, 0xcb4ea3e3 + .word 0x3fc87fa0, 0x6520c000 + .word 0x3d322120, 0x401202fc + .word 0x3fc89a0a, 0xacd4e000 + .word 0x3d2c0bfb, 0xda8f5a72 + .word 0x3fc8b46f, 0x82236000 + .word 0x3d12d9f2, 0x102dd7c9 + .word 0x3fc8cece, 0xe74ad000 + .word 0x3d16917d, 0x56f5912d + .word 0x3fc8e928, 0xde886000 + .word 0x3d3a8154, 0xb13d72d5 + .word 0x3fc9037d, 0x6a180000 + .word 0x3d230dea, 0x57c1c8d9 + .word 0x3fc91dcc, 0x8c340000 + .word 0x3d37bc6a, 0xbddeff46 + .word 0x3fc93816, 0x47159000 + .word 0x3d267385, 0x2b8b8c4f + .word 0x3fc9525a, 0x9cf45000 + .word 0x3d2ad1d9, 0x04c1d4e3 + .word 0x3fc96c99, 0x9006a000 + .word 0x3d2a88d5, 0x9cbb452c + .word 0x3fc986d3, 0x22818000 + .word 0x3cf93b56, 0x4dd44000 + .word 0x3fc9a107, 0x56988000 + .word 0x3d264aa6, 0x242cd098 + .word 0x3fc9bb36, 0x2e7df000 + .word 0x3d3706ab, 0xaf18f802 + .word 0x3fc9d55f, 0xac62d000 + .word 0x3ce732c0, 0x789487af + .word 0x3fc9ef83, 0xd2769000 + .word 0x3d3467a4, 0x26031900 + .word 0x3fca09a2, 0xa2e79000 + .word 0x3d311331, 0x195f76e6 + .word 0x3fca23bc, 0x1fe2b000 + .word 0x3d258c64, 0xdc46c1ea + .word 0x3fca3dd0, 0x4b938000 + .word 0x3d297da1, 0x366e2c5a + .word 0x3fca57df, 0x28244000 + .word 0x3d3b99c8, 0xca1d9abb + .word 0x3fca71e8, 0xb7bdf000 + .word 0x3d377a9a, 0xc887d66f + .word 0x3fca8bec, 0xfc882000 + .word 0x3d3e3185, 0xcf21b9cf + .word 0x3fcaa5eb, 0xf8a93000 + .word 0x3d2abead, 0x92d5cae2 + .word 0x3fcabfe5, 0xae461000 + .word 0x3d125c2b, 0x1a83b18e + .word 0x3fcad9da, 0x1f827000 + .word 0x3d1df520, 0xdff03ebe + .word 0x3fcaf3c9, 0x4e80b000 + .word 0x3d3fe5b1, 0x9cc03270 + .word 0x3fcb0db3, 0x3d620000 + .word 0x3d3fee14, 0x38eab906 + .word 0x3fcb2797, 0xee463000 + .word 0x3d105dd5, 0xbe4bfd5c + .word 0x3fcb4177, 0x634ba000 + .word 0x3d355d01, 0x5666069f + .word 0x3fcb5b51, 0x9e8fb000 + .word 0x3d2691ba, 0x27fdc19e + .word 0x3fcb7526, 0xa22e4000 + .word 0x3d2c0dbf, 0x2e785490 + .word 0x3fcb8ef6, 0x70420000 + .word 0x3d387533, 0x321788e0 + .word 0x3fcba8c1, 0x0ae46000 + .word 0x3d3a32e2, 0x9eee9d85 + .word 0x3fcbc286, 0x742d8000 + .word 0x3d39ac53, 0xf39d121c + .word 0x3fcbdc46, 0xae344000 + .word 0x3d3625b4, 0x023d6505 + .word 0x3fcbf601, 0xbb0e4000 + .word 0x3d2386a9, 0x47c378b5 + .word 0x3fcc0fb7, 0x9ccfd000 + .word 0x3d272000, 0xcc2eb551 + .word 0x3fcc2968, 0x558c1000 + .word 0x3d318146, 0x108e3ae0 + .word 0x3fcc4313, 0xe754e000 + .word 0x3d3279be, 0x74cad7d6 + .word 0x3fcc5cba, 0x543ae000 + .word 0x3d20929d, 0xecb454fc + .word 0x3fcc765b, 0x9e4d6000 + .word 0x3d31ab6b, 0x36976f6c + .word 0x3fcc8ff7, 0xc79a9000 + .word 0x3d344358, 0x4bb03de6 + .word 0x3fcca98e, 0xd22f5000 + .word 0x3d3e9673, 0xe735df63 + .word 0x3fccc320, 0xc0176000 + .word 0x3d240903, 0x9a653794 + .word 0x3fccdcad, 0x935d1000 + .word 0x3d3cbe01, 0xf966cb77 + .word 0x3fccf635, 0x4e09c000 + .word 0x3d277123, 0x9a07d55b + .word 0x3fcd0fb7, 0xf2255000 + .word 0x3d3ca15a, 0x9bf3989b + .word 0x3fcd2935, 0x81b6b000 + .word 0x3d1f363f, 0xb5d55685 + .word 0x3fcd42ad, 0xfec35000 + .word 0x3d3a28ff, 0xc09fef63 + .word 0x3fcd5c21, 0x6b4fb000 + .word 0x3d3722b7, 0x221acbf2 + .word 0x3fcd758f, 0xc95ef000 + .word 0x3d3a97bd, 0x5d2fa755 + .word 0x3fcd8ef9, 0x1af31000 + .word 0x3d3abbe8, 0x0f26ce1f + .word 0x3fcda85d, 0x620ce000 + .word 0x3d240194, 0xc16cc7ec + .word 0x3fcdc1bc, 0xa0abe000 + .word 0x3d38fac1, 0xa628ccc6 + .word 0x3fcddb16, 0xd8ce9000 + .word 0x3d384421, 0xa3bed1d1 + .word 0x3fcdf46c, 0x0c722000 + .word 0x3d3a5e82, 0xb0b79039 + .word 0x3fce0dbc, 0x3d92a000 + .word 0x3d359233, 0xf0529bf1 + .word 0x3fce2707, 0x6e2af000 + .word 0x3d172f4f, 0x543fff10 + .word 0x3fce404d, 0xa034b000 + .word 0x3d2cf022, 0x3ecbb0ce + .word 0x3fce598e, 0xd5a87000 + .word 0x3d3c5d96, 0x861c2cec + .word 0x3fce72cb, 0x107da000 + .word 0x3d1dd48c, 0xcdf5471c + .word 0x3fce8c02, 0x52aa5000 + .word 0x3d34bfd2, 0x3f8b8c80 + .word 0x3fcea534, 0x9e23a000 + .word 0x3d381b93, 0x4c73ccb5 + .word 0x3fcebe61, 0xf4dd7000 + .word 0x3d3615d6, 0x67811ada + .word 0x3fced78a, 0x58ca8000 + .word 0x3d16f1b5, 0x3793387e + .word 0x3fcef0ad, 0xcbdc5000 + .word 0x3d326ca4, 0x31bca86e + .word 0x3fcf09cc, 0x50036000 + .word 0x3d3da094, 0x18d999db + .word 0x3fcf22e5, 0xe72f1000 + .word 0x3ce7561d, 0x7d037c19 + .word 0x3fcf3bfa, 0x934d6000 + .word 0x3d2d9f2a, 0x937b903b + .word 0x3fcf550a, 0x564b7000 + .word 0x3d366e0e, 0x2fb6fe81 + .word 0x3fcf6e15, 0x32153000 + .word 0x3d0b2b44, 0x29d89c5c + .word 0x3fcf871b, 0x28955000 + .word 0x3ce14052, 0xb5b2204b + .word 0x3fcfa01c, 0x3bb57000 + .word 0x3d397823, 0x81478a1f + .word 0x3fcfb918, 0x6d5e3000 + .word 0x3d3c551a, 0xaa8cd86f + .word 0x3fcfd20f, 0xbf76f000 + .word 0x3d3b8ea9, 0x234e4064 + .word 0x3fcfeb02, 0x33e60000 + .word 0x3d2f316e, 0x32d5e8c7 + .word 0x3fd001f7, 0xe6484000 + .word 0x3d38a957, 0x40c9abbc + .word 0x3fd00e6c, 0x45ad5000 + .word 0x3cdcc68d, 0x52e01203 + .word 0x3fd01ade, 0x39139000 + .word 0x3d4deed9, 0xe6647d5c + .word 0x3fd0274d, 0xc16c2000 + .word 0x3d2979e8, 0x9cf835c2 + .word 0x3fd033ba, 0xdfa74000 + .word 0x3d0c30bc, 0x1485bdff + .word 0x3fd04025, 0x94b4d000 + .word 0x3cf036b8, 0x9ef42d7f + .word 0x3fd04c8d, 0xe1841000 + .word 0x3d4c0328, 0xb5da628f + .word 0x3fd058f3, 0xc703e000 + .word 0x3d478bcc, 0xa196e4a9 + .word 0x3fd06557, 0x46227000 + .word 0x3d0131df, 0xb4868d6a + .word 0x3fd071b8, 0x5fcd5000 + .word 0x3d421a3a, 0x2e0ff2f8 + .word 0x3fd07e17, 0x14f1c000 + .word 0x3d40819c, 0xd863da16 + .word 0x3fd08a73, 0x667c5000 + .word 0x3d3ebc1d, 0x40c5a329 + .word 0x3fd096cd, 0x55591000 + .word 0x3d3f998d, 0x20550a31 + .word 0x3fd0a324, 0xe2739000 + .word 0x3d0c6bee, 0x7ef4030e + .word 0x3fd0af7a, 0x0eb6c000 + .word 0x3d23ccf9, 0x4945adad + .word 0x3fd0bbcc, 0xdb0d2000 + .word 0x3d32f32c, 0xcc5dcdfb + .word 0x3fd0c81d, 0x4860a000 + .word 0x3d40d218, 0x5ff17467 + .word 0x3fd0d46b, 0x579ab000 + .word 0x3d3d2c81, 0xf640e1e6 + .word 0x3fd0e0b7, 0x09a43000 + .word 0x3d32a038, 0xa7862f2a + .word 0x3fd0ed00, 0x5f657000 + .word 0x3d4b48e2, 0xb5e955ff + .word 0x3fd0f947, 0x59c66000 + .word 0x3d4356cf, 0x407bf3a5 + .word 0x3fd1058b, 0xf9ae4000 + .word 0x3d45aa31, 0x3f415699 + .word 0x3fd111ce, 0x4003e000 + .word 0x3d4c99b9, 0x1ed29693 + .word 0x3fd11e0e, 0x2dad9000 + .word 0x3d496e01, 0xdc0cc691 + .word 0x3fd12a4b, 0xc3911000 + .word 0x3d452c57, 0xcf5c66d4 + .word 0x3fd13687, 0x0293a000 + .word 0x3d4160bd, 0xb314c76f + .word 0x3fd142bf, 0xeb9a0000 + .word 0x3d31ce61, 0x85b58a9e + .word 0x3fd14ef6, 0x7f886000 + .word 0x3d40b42c, 0xd101b436 + .word 0x3fd15b2a, 0xbf428000 + .word 0x3d489c71, 0x2d927594 + .word 0x3fd1675c, 0xababa000 + .word 0x3d38380e, 0x731f55c4 + .word 0x3fd1738c, 0x45a66000 + .word 0x3d431c8b, 0x7fe69f45 + .word 0x3fd17fb9, 0x8e150000 + .word 0x3d42baba, 0x2c5aecbe + .word 0x3fd18be4, 0x85d93000 + .word 0x3d3c167f, 0x6f3604ab + .word 0x3fd1980d, 0x2dd42000 + .word 0x3d2b7b3a, 0x7a361c9a + .word 0x3fd1a433, 0x86e67000 + .word 0x3d4e857a, 0xf9cb1f55 + .word 0x3fd1b057, 0x91f07000 + .word 0x3d46915c, 0xc91d50e9 + .word 0x3fd1bc79, 0x4fd1c000 + .word 0x3d419879, 0xc5c22c21 + .word 0x3fd1c898, 0xc1699000 + .word 0x3d43f5f7, 0x8d1cea80 + .word 0x3fd1d4b5, 0xe796a000 + .word 0x3d222a5b, 0xd197bac2 + .word 0x3fd1e0d0, 0xc3371000 + .word 0x3d3af8f2, 0xa9b0d4a0 + .word 0x3fd1ece9, 0x5528a000 + .word 0x3d4cf630, 0x9ec96b89 + .word 0x3fd1f8ff, 0x9e48a000 + .word 0x3d27946c, 0x040cbe77 + .word 0x3fd20513, 0x9f73b000 + .word 0x3cf6e15e, 0x1609e0a4 + .word 0x3fd21125, 0x59861000 + .word 0x3d382e78, 0xba2950c4 + .word 0x3fd21d34, 0xcd5b9000 + .word 0x3d3b552f, 0xb28badaa + .word 0x3fd22941, 0xfbcf7000 + .word 0x3d42cb44, 0x850a7b4f + .word 0x3fd2354c, 0xe5bc8000 + .word 0x3d414389, 0x7cfeacce + .word 0x3fd24155, 0x8bfd1000 + .word 0x3d300fff, 0x3228fcad + .word 0x3fd24d5b, 0xef6ae000 + .word 0x3d4ff114, 0x3f81b02a + .word 0x3fd25960, 0x10df7000 + .word 0x3d38e7bc, 0x224ea3e3 + .word 0x3fd26561, 0xf1338000 + .word 0x3d38b488, 0x66faa45f + .word 0x3fd27161, 0x913f8000 + .word 0x3d34f4f1, 0xf61564b4 + .word 0x3fd27d5e, 0xf1db5000 + .word 0x3d4e6dc8, 0xb8735361 + .word 0x3fd2895a, 0x13de8000 + .word 0x3d3a8d7a, 0xd24c13f0 + .word 0x3fd29552, 0xf81ff000 + .word 0x3d348d30, 0x1771c408 + .word 0x3fd2a149, 0x9f762000 + .word 0x3d479220, 0x57062a92 + .word 0x3fd2ad3e, 0x0ab73000 + .word 0x3d2b972e, 0x488c359f + .word 0x3fd2b930, 0x3ab89000 + .word 0x3d4a493b, 0x4a5013d7 + .word 0x3fd2c520, 0x304f8000 + .word 0x3d230852, 0x8c342f39 + .word 0x3fd2d10d, 0xec508000 + .word 0x3d360c61, 0xf7088353 + .word 0x3fd2dcf9, 0x6f8fd000 + .word 0x3d20b4a2, 0x8e33c9ce + .word 0x3fd2e8e2, 0xbae11000 + .word 0x3d4a6138, 0x5992350a + .word 0x3fd2f4c9, 0xcf17a000 + .word 0x3d371f04, 0x9374b87b + .word 0x3fd300ae, 0xad063000 + .word 0x3d342f56, 0x8b75fcac + .word 0x3fd30c91, 0x557f1000 + .word 0x3d4d7ad4, 0xebd75d15 + .word 0x3fd31871, 0xc9544000 + .word 0x3d184fab, 0x94cecfd9 + .word 0x3fd32450, 0x09570000 + .word 0x3d3d271b, 0x9bdae59d + .word 0x3fd3302c, 0x16586000 + .word 0x3d36217d, 0xc2a3e08b + .word 0x3fd33c05, 0xf128d000 + .word 0x3d4b51be, 0x71fc7961 + .word 0x3fd347dd, 0x9a987000 + .word 0x3d4aa9ac, 0x8ace9fdc + .word 0x3fd353b3, 0x1376d000 + .word 0x3d4d99ca, 0x0327b24d + .word 0x3fd35f86, 0x5c932000 + .word 0x3d427c10, 0xd8af2d5b + .word 0x3fd36b57, 0x76bc1000 + .word 0x3d116978, 0x5a9c223f + .word 0x3fd37726, 0x62bfd000 + .word 0x3d40b5e4, 0xa9d627ef + .word 0x3fd382f3, 0x216c4000 + .word 0x3d4df3c5, 0xbc5cb012 + .word 0x3fd38ebd, 0xb38ed000 + .word 0x3d290582, 0xe67d4ca0 + .word 0x3fd39a86, 0x19f45000 + .word 0x3d18ee51, 0x937354f5 + .word 0x3fd3a64c, 0x55694000 + .word 0x3d37a71c, 0xbcd735d0 + .word 0x3fd3b210, 0x66b9b000 + .word 0x3d461f09, 0x33f754f9 + .word 0x3fd3bdd2, 0x4eb14000 + .word 0x3d46d425, 0xb478c893 + .word 0x3fd3c992, 0x0e1b2000 + .word 0x3d141c28, 0xaa680b76 + .word 0x3fd3d54f, 0xa5c1f000 + .word 0x3d3c3e1c, 0xd9a395e3 + .word 0x3fd3e10b, 0x16701000 + .word 0x3d3f3bcf, 0x145429c7 + .word 0x3fd3ecc4, 0x60ef5000 + .word 0x3d4e9fd7, 0x9d83ecff + .word 0x3fd3f87b, 0x86093000 + .word 0x3d451014, 0x55d3b3bc + .word 0x3fd40430, 0x8686a000 + .word 0x3d3f8ef4, 0x3049f7d3 + .word 0x3fd40fe3, 0x63303000 + .word 0x3d3e5c5f, 0xe79f05c6 + .word 0x3fd41b94, 0x1cce0000 + .word 0x3d47dcb7, 0xf60de01c + .word 0x3fd42742, 0xb427d000 + .word 0x3d433c6c, 0x7ea3ecc5 + .word 0x3fd432ef, 0x2a04e000 + .word 0x3d40276b, 0x3674752a + .word 0x3fd43e99, 0x7f2c1000 + .word 0x3d1c3f72, 0x40c41a04 + .word 0x3fd44a41, 0xb463c000 + .word 0x3d31ee28, 0xf37cf612 + .word 0x3fd455e7, 0xca720000 + .word 0x3d1ad8c6, 0x36629aed + .word 0x3fd4618b, 0xc21c5000 + .word 0x3d4d84fa, 0x16f66f66 + .word 0x3fd46d2d, 0x9c280000 + .word 0x3d359b27, 0x5f67f75a + .word 0x3fd478cd, 0x5959b000 + .word 0x3d2ec89b, 0xf0c8d098 + .word 0x3fd4846a, 0xfa75b000 + .word 0x3d4a7057, 0x47219c8d + .word 0x3fd49006, 0x80400000 + .word 0x3d43a198, 0x00f2f83a + .word 0x3fd49b9f, 0xeb7c1000 + .word 0x3d3dac1c, 0x58ab60d7 + .word 0x3fd4a737, 0x3cecf000 + .word 0x3d432ee5, 0x8a0655db + .word 0x3fd4b2cc, 0x75555000 + .word 0x3d43f81a, 0x1c3a02db + .word 0x3fd4be5f, 0x95777000 + .word 0x3d4141b6, 0x993293ee + .word 0x3fd4c9f0, 0x9e152000 + .word 0x3d487888, 0x63c7f488 + .word 0x3fd4d57f, 0x8fefe000 + .word 0x3d23f926, 0x7fd06868 + .word 0x3fd4e10c, 0x6bc8a000 + .word 0x3cf8283f, 0x1636f061 + .word 0x3fd4ec97, 0x32600000 + .word 0x3d234d7a, 0xaf04d104 + .word 0x3fd4f81f, 0xe4763000 + .word 0x3d4a00c2, 0x6f2c03dd + .word 0x3fd503a6, 0x82cb1000 + .word 0x3d4965cd, 0xc3a41929 + .word 0x3fd50f2b, 0x0e1e0000 + .word 0x3d3a0940, 0x8c47b8d8 + .word 0x3fd51aad, 0x872df000 + .word 0x3d405a13, 0x927ac19f + .word 0x3fd5262d, 0xeeb98000 + .word 0x3d40f230, 0x47bb5b00 + .word 0x3fd531ac, 0x457ee000 + .word 0x3d3df83b, 0x7d931501 + .word 0x3fd53d28, 0x8c3bd000 + .word 0x3d4ddd8d, 0x029240a7 + .word 0x3fd548a2, 0xc3add000 + .word 0x3d23167e, 0x63081cf7 + .word 0x3fd5541a, 0xec91b000 + .word 0x3d4f3f4a, 0xa91c688a + .word 0x3fd55f91, 0x07a43000 + .word 0x3d4dc337, 0x10e416b4 + .word 0x3fd56b05, 0x15a18000 + .word 0x3d29247b, 0xbc4a23fc + .word 0x3fd57677, 0x17455000 + .word 0x3d44d8a9, 0x356d941b + .word 0x3fd581e7, 0x0d4b2000 + .word 0x3d4c19c3, 0xc9da4e1c + .word 0x3fd58d54, 0xf86e0000 + .word 0x3d2791f3, 0x0a795215 + .word 0x3fd598c0, 0xd9687000 + .word 0x3d43d05b, 0x4793492e + .word 0x3fd5a42a, 0xb0f4c000 + .word 0x3d4fc338, 0xa1a4108b + .word 0x3fd5af92, 0x7fccd000 + .word 0x3d4c7f9a, 0x01400711 + .word 0x3fd5baf8, 0x46aa1000 + .word 0x3d46328b, 0x83c602e0 + .word 0x3fd5c65c, 0x06459000 + .word 0x3d4300fc, 0xff3f88cd + .word 0x3fd5d1bd, 0xbf580000 + .word 0x3d4394a1, 0x1b1c1ee4 + .word 0x3fd5dd1d, 0x7299b000 + .word 0x3d43a84f, 0x3bf518f5 + .word 0x3fd5e87b, 0x20c29000 + .word 0x3d3527d1, 0x8f7738fa + .word 0x3fd5f3d6, 0xca8a2000 + .word 0x3d37af84, 0x8e19cc75 + .word 0x3fd5ff30, 0x70a79000 + .word 0x3d2e9e43, 0x9f105039 + .word 0x3fd60a88, 0x13d1a000 + .word 0x3d36e9b9, 0xc879af55 + .word 0x3fd615dd, 0xb4bec000 + .word 0x3d13c7ca, 0x90bc04b2 + .word 0x3fd62131, 0x5424e000 + .word 0x3d463e81, 0xdaacbccc + .word 0x3fd62c82, 0xf2b9c000 + .word 0x3d3e54bd, 0xbd7c8a98 + .word 0x3fd637d2, 0x91329000 + .word 0x3d450450, 0x865165ea + .word 0x3fd64320, 0x30444000 + .word 0x3d3efe02, 0x7a01d7df + .word 0x3fd64e6b, 0xd0a35000 + .word 0x3d2afe80, 0x69d61295 + .word 0x3fd659b5, 0x7303e000 + .word 0x3d1f281d, 0xb0af8efc + .word 0x3fd664fd, 0x1819b000 + .word 0x3d418e55, 0xe463b5fe + .word 0x3fd67042, 0xc0983000 + .word 0x3d4c6148, 0xdbdcf10d + .word 0x3fd67b86, 0x6d327000 + .word 0x3d438fd6, 0x3ea11c64 + .word 0x3fd686c8, 0x1e9b1000 + .word 0x3d32bb11, 0x0af84054 + .word 0x3fd69207, 0xd5845000 + .word 0x3d43a44f, 0x4861e4ab + .word 0x3fd69d45, 0x92a03000 + .word 0x3d38b1bd, 0xbf97ffa6 + .word 0x3fd6a881, 0x56a03000 + .word 0x3d420e9b, 0xd9d37351 + .word 0x3fd6b3bb, 0x22359000 + .word 0x3d30f625, 0x7a933268 + .word 0x3fd6bef2, 0xf6111000 + .word 0x3d48f8fc, 0x947d5965 + .word 0x3fd6ca28, 0xd2e34000 + .word 0x3d430ad0, 0xb8c49166 + .word 0x3fd6d55c, 0xb95c3000 + .word 0x3d39b9c8, 0xae9a6ee2 + .word 0x3fd6e08e, 0xaa2ba000 + .word 0x3d1e38c1, 0x39318d71 + .word 0x3fd6ebbe, 0xa600e000 + .word 0x3d4cce14, 0xc7dd17dd + .word 0x3fd6f6ec, 0xad8b2000 + .word 0x3d249058, 0xfdf08376 + .word 0x3fd70218, 0xc178e000 + .word 0x3d42a947, 0x0e225428 + .word 0x3fd70d42, 0xe2789000 + .word 0x3d21aead, 0x337ee287 + .word 0x3fd7186b, 0x11381000 + .word 0x3d1934e2, 0x677d272b + .word 0x3fd72391, 0x4e650000 + .word 0x3d0c1d52, 0xbdc87d8a + .word 0x3fd72eb5, 0x9aac9000 + .word 0x3d4dd010, 0xd08a7a15 +!! TBL - end + +! constants: + .align 64 +CONSTANTS: + .word 0x40000000,0x00000000 + .word 0x3fe55555,0x555571da + .word 0x3fd99999,0x8702be3a + .word 0x3fd24af7,0x3f4569b1 + .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20 + .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20 + .word 0xfffffc00,0x00000000 ! ELEVENBIT + .word 0x43200000 + .word 0xfff00000 + .word 0xc0190200 ! ELEVENBIT + .word 0x0200 ! ELEVENBIT + +#define two 0x00 +#define A1 0x08 +#define A2 0x10 +#define A3 0x18 +#define ln2hi 0x20 +#define ln2lo 0x28 +#define mask 0x30 +#define ox43200000 0x38 +#define oxfff00000 0x3c +#define oxc0194000 0x40 +#define ox4000 0x44 + + +! local storage indices + +#define jnk STACK_BIAS-0x8 +#define tmp2 STACK_BIAS-0x10 +#define tmp1 STACK_BIAS-0x18 +#define tmp0 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x28 +#define tmp4 STACK_BIAS-0x30 +#define tmp5 STACK_BIAS-0x38 +#define tmp6 STACK_BIAS-0x40 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 + +! g1 TBL + +! l0 j0 +! l1 j1 +! l2 j2 +! l3 +! l4 0x94000 +! l5 CONSTANTS +! l6 0x000fffff +! l7 0x7ff00000 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 used in primary range bounds check +! o4 used in primary range bounds check +! o5 used in .rangeI check section as temporary +! o7 NOT USED + +! f0 u0,q0 +! f2 v0,(two-v0)-u0,z0 +! f4 n0,f0,q0 +! f6 s0 +! f8 q +! f10 u1,q1 +! f12 v1,(two-v1)-u1,z1 +! f14 n1,f1,q1 +! f16 s1 +! f18 t ! now tmp0 storage +! f20 u2,q2 +! f22 v2,(two-v2)-u2,q2 +! f24 n2,f2,q2 +! f26 s2 +! f28 0xfff00000 +! f29 0x43200000 +! f30 0x4000 +! f31 0xc0194000 +! f32 t0 +! f34 h0,f0-(c0-h0) +! f36 c0 +! f38 A1 +! f40 two +! f42 t1 +! f44 h1,f1-(c1-h1) +! f46 c1 +! f48 A2 +! f50 0xffff8000... or 0xfffffc00 for 6 or 11 bit tbl resp +! f52 t2 +! f54 h2,f2-(c2-h2) +! f56 c2 +! f58 A3 now tmp1 storage +! f60 ln2hi +! f62 ln2lo +!-------------------------------------------------------------------- +!-------------------------------------------------------------------- +! PREFETCH info +#define PREFETCH_MULT_READS 0 +!-------------------------------------------------------------------- +!-------------------------------------------------------------------- +! define pipes for easier reading + +#define ICNT %i0 + +#define XPTR %i1 +#define XSTR %i2 +#define YPTR %i3 +#define YSTR %i4 + +#define RANGE_LO %l6 +#define RANGE_HI %l7 + +#define P0_X1 %f0 +#define P0_f1 %f1 +#define P0_f2 %f2 +#define P0_f3 %f3 +#define P0_f4 %f4 +#define P0_f5 %f5 +#define P0_f6 %f6 +#define P0_f7 %f7 +!#define P0_f8 %f8 +#define T0_f8 %f8 +#define P0_f9 %f9 + +#define P1_X2 %f10 +#define P1_f11 %f11 +#define P1_f12 %f12 +#define P1_f13 %f13 +#define P1_f14 %f14 +#define P1_f15 %f15 +#define P1_f16 %f16 +#define P1_f17 %f17 + +!#define P1_f18 %f18 +#define T1_f18 %f18 + +#define P1_f19 %f19 + +#define P2_X3 %f20 +#define P2_f21 %f21 +#define P2_f22 %f22 +#define P2_f23 %f23 +#define P2_f24 %f24 +#define P2_f25 %f25 +#define P2_f26 %f26 +#define P2_f27 %f27 +#define INF_f28 %f28 +#define CONSTE432_f29 %f29 + +#define CONST_f30 %f30 + +#define TTOPMSK %f31 + +#define P0_f32 %f32 +#define P0_f34 %f34 +#define P0_f36 %f36 + +#define P1_f42 %f42 +#define P1_f44 %f44 +#define P1_f46 %f46 + +#define P2_f52 %f52 +#define P2_f54 %f54 +#define P2_f56 %f56 + +#define G1_TBL %g1 +#define L5_CONSTANTS %l5 +#define FP40_TWO %f40 +#define FP38_A1 %f38 +#define FP48_A2 %f48 +#define FP50_MASK %f50 +!!!#define FP58_A3 %f58 +#define T2_f58 %f58 +#define FP60_LN2HI %f60 +#define FP62_LN2LO %f62 + + +!-------------------------------------------------------------------- + + ENTRY(__vlog_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,CONSTANTS,l5) + PIC_SET(l7,TBL,o0) + mov %o0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + + ld [XPTR],%l0 ! quickly !X1 + + sethi %hi(0x90200),%l4 ! ELEVENBIT + or %l4,%lo(0x90200),%l4 ! ELEVENBIT + ldd [XPTR],P0_X1 ! u.l[0] = *x !X1 + sethi %hi(0x000fffff),RANGE_LO + or RANGE_LO,%lo(0x000fffff),RANGE_LO + sethi %hi(0x7ff00000),RANGE_HI + ldd [L5_CONSTANTS+two],FP40_TWO + fzero P1_X2 + fzero P2_X3 + ldd [L5_CONSTANTS+A1],FP38_A1 + ldd [L5_CONSTANTS+A2],FP48_A2 + ldd [L5_CONSTANTS+ln2hi],FP60_LN2HI + ldd [L5_CONSTANTS+ln2lo],FP62_LN2LO + ldd [L5_CONSTANTS+mask],FP50_MASK + ld [L5_CONSTANTS+ox43200000],CONSTE432_f29 + ld [L5_CONSTANTS+oxfff00000],INF_f28 + ld [L5_CONSTANTS+oxc0194000],TTOPMSK + fpadd32s P0_X1,TTOPMSK,P0_f2 ! X+TTOP !X1 START + ld [L5_CONSTANTS+ox4000],CONST_f30 + sll XSTR,3,XSTR ! scale strides + sll YSTR,3,YSTR + add %fp,jnk,%o0 ! precondition loop + fands P0_f2,INF_f28,P0_f2 ! (X+TTOP)&INF->n X1 +! st P0_X1,[%fp+tmp0] !BYPASS in + fzero P0_f4 + fzero P0_f6 +! ld [%fp+tmp0],%l0 !BYPASS out ix X1 + add %fp,jnk,%o1 + add %fp,jnk,%o2 + fzero P0_f32 + fzero P0_f34 + fzero P0_f36 + fzero P1_f12 + sub %l0,RANGE_HI,%o3 ! bounds for X1 + sub RANGE_LO,%l0,%o4 ! bounds for X1 + fzero P1_f14 + fzero P1_f16 + sub YPTR,YSTR,YPTR + fzero P1_f42 + mov %g0,%l1 ! zero out for first pass + mov %g0,%l2 ! zero out for first pass + fzero P1_f44 + fzero P1_f46 + fzero T0_f8 + fzero T1_f18 + fzero T2_f58 + fzero P2_f24 + fzero P2_f26 + fzero P2_f52 + fzero P2_f54 + fzero P2_f56 + ba .loop0 + std P2_f26,[%fp+tmp2] + + .align 16 +! -- 16 byte aligned +.loop0: +!############################# AREA 1 (0-19) ###################################! +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<=0x7ff00000 ! X1 +! delay slot + nop + ! x , n , reduction + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 1 + fpsub32s P0_X1,P0_f2,P0_X1 ! X - n -> x ! X1 + add XPTR,XSTR,XPTR ! x += stridex + add YPTR,YSTR,YPTR ! y += stridey ! + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 2 +.cont0: + ! n*l2lo , lylo + faddd P0_f4,P0_f34,P0_f34 !n*l2lo,lylo ! X1-2 + ! TBL calc + add %l0,%l4,%l0 ! j = ix + 0x94000 X1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 3 + fsubd FP40_TWO,P2_f24,P2_f24 ! two - xT ! X3-2 + + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>11)&0x1f0 !ELEVENBIT ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 7 + fsubd P2_f24,P2_X3,P2_f24 ! (two - xT) - x ! !X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< xT i.e 11bit value of x + fand P0_f4,FP50_MASK,P0_f4 ! xT ! X1 + + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 9 + faddd P0_f36,P0_f32,P0_f36 ! + (x-xT) X1-2 + and %l0,0x3ff,%l0 ! ELEVENBIT ! X1 + st P1_X2,[%fp+tmp0] !BYPASS in ! X2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 10 + fmuld P1_f46,P1_f44,P1_f46 ! s^2*A2+A1 , s^2 X2-2 + ldd [G1_TBL+%l1],P1_f44 !lylo ! X2-2 + sub %l1,8,%l1 ! get back ptr to lyhi X2-2 + faddd P1_f12,P1_f44,P1_f44 !n*l2lo,lylo ! X2-2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 11 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< s + fdivd P0_f32,P0_f6,P0_f6 ! -> s ! X1 + faddd P1_f46,P1_f44,P1_f46 ! + n*l2lo+lylo X2-2 + ldd [G1_TBL+%l1],P1_f44 ! ld lyhi ! X2-2 + mov %l3,%l1 ! BYPASS temp ! X2 + ! wrap !!! done for X0 + std P0_f36,[%o0] ! X1-2 FINI + mov YPTR,%o0 ! X1-2 INC + + addcc ICNT,-1,ICNT ! + ble,pn %icc,.endloop0 ! +! delay slot + nop + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 21 +! -- 16 byte aligned +.loop1: + sub %l1,RANGE_HI,%o3 ! bounds for X2 + sub RANGE_LO,%l1,%o4 ! bounds for X2 + andcc %o3,%o4,%o4 ! X2 + bge,pn %icc,.range1 ! ix<=0x000fffff or >=0x7ff00000 ! X2 +! delay slot + nop + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 22 + fpsub32s P1_X2,P1_f12,P1_X2 ! X - n -> x ! X2 + add XPTR,XSTR,XPTR ! x += stridex + add YPTR,YSTR,YPTR ! y += stridey ! +.cont1: + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 23 + fmuld P2_f54,FP48_A2,P2_f56 ! s^2,A2! X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>11)&0x1f0 !ELEVENBIT ! X2 + faddd P2_f56,P2_f24,P2_f56 ! + 2-xT-x X3-2 + + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< s ! X2 +! -- 16 byte aligned + addcc ICNT,-1,ICNT ! + ble,pn %icc,.endloop1 ! + nop +.loop2: + + sub %l2,RANGE_HI,%o3 ! bounds for X3 + sub RANGE_LO,%l2,%o4 ! bounds for X3 + andcc %o3,%o4,%o4 ! X3 + bge,pn %icc,.range2 ! ix<=0x000fffff or >=0x7ff00000 ! X3 +! delay slot + nop +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 42 + fpsub32s P2_X3,P2_f22,P2_X3 ! X - n -> x ! X3 + add XPTR,XSTR,XPTR ! x += stridex + add YPTR,YSTR,YPTR ! y += stridey ! +.cont2: + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 43 + sll %l1,4,%l1 ! ELEVENBIT ! X2 + fmuld T2_f58,FP60_LN2HI,T2_f58 ! n*l2hi ! X3-2 + faddd P2_f56,P2_f54,P2_f56 ! + n*l2lo+lylo X3-2 + + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>11)&0x1f0 !ELEVENBIT ! X3 + ldda [XPTR]%asi,P0_X1 ! X1-nextX START + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 45 + st P0_X1,[%fp+tmp0] !BYPASS in ! X1-nextX + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 46 + fpadd32s P2_X3,CONST_f30,P2_f24 ! x round up X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 47 + add %l1,8,%l1 ! X2 + faddd P2_f56,P2_f52,P2_f56 ! + (x-xT) X3-2 + +!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< n + fands P0_f2,INF_f28,P0_f2 ! X1-nextX + ! n * l2lo + fmuld T0_f8,FP62_LN2LO,P0_f4 ! n*l2lo ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 58 + fsubd P2_X3,P2_f24,P2_f52 ! x-xT ! X3 +!BEST ld [%fp+tmp0],%l3 !BYPASS out ! X1-nextX + ld [%fp+tmp0],%l3 !BYPASS out ! X1-nextX + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 59 + fitod P2_f22,T2_f58 ! (double) n ! X3 + std P2_f56,[%o2] ! X3 FINI + mov YPTR,%o2 ! X3 INC + +!############################# AREA 4 (OVERFLOW) ###################################! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 60 + ! s * (s^2(s^2*A1+A1) + (2-xT-x)) + fmuld P0_f6,P0_f36,P0_f36 ! s*(POLY) ! X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 61 + fmuld P1_f16,P1_f16,P1_f44 ! z = s * s ! !X2 + ! lyhi + ldd [G1_TBL+%l0],P0_f6 ! ld lyhi ! X1 + mov %l3,%l0 ! BYPASS tmp for X1 ! X1 + sub %l0,RANGE_HI,%o3 ! bounds for X1 + sub RANGE_LO,%l0,%o4 ! bounds for X1 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 62 + addcc ICNT,-1,ICNT ! +! FALL THROUGH if running out of X array here + bg,pt %icc,.loop0 !62 +! delay slot + fdivd P2_f52,P2_f26,P2_f26 ! -> s ! X3 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 63 +!LOSTC + + + + + + ! Once we get to the last element, we loop three more times to finish + ! the computations in progress. This means we will load past the end + ! of the argument vector, but since we use non-faulting loads and never + ! use the data, the only potential problem is cache miss. (Strictly + ! speaking, since we pad the argument vector with twos, we incorrectly + ! raise inexact if the actual argument vector is all ones.) + .endloop2: + sethi %hi(0x40000000),%l0 ! "next argument" = two + sub %l0,RANGE_HI,%o3 ! bnds chk x1 !54 + sub RANGE_LO,%l0,%o4 ! bounds chk x1 !54 + fmovd FP40_TWO,P0_X1 + cmp ICNT,-3 + bg,a,pt %icc,.loop0 + ! delay slot + fpadd32s P0_X1,TTOPMSK,P0_f2 ! n=(ix+0xc0194000)&0xfff00000 + ret + restore + + .align 16 + .endloop0: + sethi %hi(0x40000000),%l1 ! "next argument" = two + fmovd FP40_TWO,P1_X2 + cmp ICNT,-3 + bg,a,pt %icc,.loop1 + ! delay slot + fpadd32s P1_X2,TTOPMSK,P1_f12 ! n=(ix+0xc0194000)&0xfff00000 + ret + restore + + .align 16 + .endloop1: + sethi %hi(0x40000000),%l2 ! "next argument" = two + fmovd FP40_TWO,P2_X3 + cmp ICNT,-3 + bg,a,pt %icc,.loop2 + ! delay slot + fpadd32s P2_X3,TTOPMSK,P2_f22 ! n=(ix+0xc0194000)&0xfff00000 + ret + restore + + + .align 16 + .range0: + cmp %l0,RANGE_HI + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 + ! delay slot + ld [XPTR+4],%o5 + !THERE + fxtod P0_X1,P0_X1 ! scale by 2**1074 w/o trapping + st P0_X1,[%fp+tmp0] !BYPASS in + add XPTR,XSTR,XPTR ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1f ! if x == 0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + ! HERE + fpadd32s P0_X1,TTOPMSK,P0_f2 ! n = (ix + 0xc0194000) & 0xfff00000 + fands P0_f2,INF_f28,P0_f2 + fpsub32s P0_X1,P0_f2,P0_X1 ! u.l[0] -= n + ld [%fp+tmp0],%l0 !BYPASS out + ba,pt %icc,.cont0 + ! delay slot + fpsub32s P0_f2,CONSTE432_f29,P0_f2 ! n -= 0x43200000 + 1: + fdivs CONSTE432_f29,P0_f1,P0_f2 ! raise div-by-zero + ba,pt %icc,3f + ! delay slot + st INF_f28,[YPTR] ! store -inf + 2: + sll %l0,1,%l0 ! lop off sign bit + add XPTR,XSTR,XPTR ! x += stridex + orcc %l0,%o5,%g0 + be,pn %icc,1b ! if x == -0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fzero P0_f2 ! *y = (x < 0.0? 0.0 : x) * inf + fcmpd %fcc0,P0_X1,P0_f2 + fmovdl %fcc0,P0_f2,P0_X1 + fand INF_f28,FP50_MASK,P0_f2 + fnegd P0_f2,P0_f2 + fmuld P0_X1,P0_f2,P0_X1 + st P0_X1,[YPTR] + 3: + addcc ICNT,-1,ICNT + ble,pn %icc,.endloop2 + ! delay slot + st P0_f1,[YPTR+4] + ld [XPTR],%l0 ! get next argument + sub %l0,RANGE_HI,%o3 ! bnds chk x1 !54 + sub RANGE_LO,%l0,%o4 ! bounds chk x1 !54 + ldd [XPTR],P0_X1 + fpadd32s P0_X1,TTOPMSK,P0_f2 ! n=(ix+0xc0194000)&0xfff00000 + ba,pt %icc,.loop0 + ! delay slot + fands P0_f2,INF_f28,P0_f2 !58 + + + .align 16 + .range1: + cmp %l1,RANGE_HI + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 + ! delay slot + ld [XPTR+4],%o5 + fxtod P1_X2,P1_X2 ! scale by 2**1074 w/o trapping + st P1_X2,[%fp+tmp1] + add XPTR,XSTR,XPTR ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1f ! if x == 0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fpadd32s P1_X2,TTOPMSK,P1_f12 ! n = (ix + 0xc0194000) & 0xfff00000 + fands P1_f12,INF_f28,P1_f12 + fpsub32s P1_X2,P1_f12,P1_X2 ! u.l[0] -= n + ld [%fp+tmp1],%l1 + ba,pt %icc,.cont1 + ! delay slot + fpsub32s P1_f12,CONSTE432_f29,P1_f12 ! n -= 0x43200000 + 1: + fdivs CONSTE432_f29,P1_f11,P1_f12 ! raise div-by-zero + ba,pt %icc,3f + ! delay slot + st INF_f28,[YPTR] ! store -inf + 2: + sll %l1,1,%l1 ! lop off sign bit + add XPTR,XSTR,XPTR ! x += stridex + orcc %l1,%o5,%g0 + be,pn %icc,1b ! if x == -0 + ! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fzero P1_f12 ! *y = (x < 0.0? 0.0 : x) * inf + fcmpd %fcc0,P1_X2,P1_f12 + fmovdl %fcc0,P1_f12,P1_X2 + fand INF_f28,FP50_MASK,P1_f12 + fnegd P1_f12,P1_f12 + fmuld P1_X2,P1_f12,P1_X2 + st P1_X2,[YPTR] + 3: + addcc ICNT,-1,ICNT + ble,pn %icc,.endloop0 + ! delay slot + st P1_f11,[YPTR+4] + ld [XPTR],%l1 ! get next argument + ldd [XPTR],P1_X2 + fpadd32s P1_X2,TTOPMSK,P1_f12 ! X + TTOP + ba,pt %icc,.loop1 + ! delay slot + fands P1_f12,INF_f28,P1_f12 ! & INF + + + .align 16 +.range2: + cmp %l2,RANGE_HI + bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 +! delay slot + ld [XPTR+4],%o5 + fxtod P2_X3,P2_X3 ! scale by 2**1074 w/o trapping + st P2_X3,[%fp+tmp2] + add XPTR,XSTR,XPTR ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1f ! if x == 0 +! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fpadd32s P2_X3,TTOPMSK,P2_f22 ! n = (ix + 0xc0194000) & 0xfff00000 + fands P2_f22,INF_f28,P2_f22 + fpsub32s P2_X3,P2_f22,P2_X3 ! u.l[0] -= n + ld [%fp+tmp2],%l2 + ba,pt %icc,.cont2 +! delay slot + fpsub32s P2_f22,CONSTE432_f29,P2_f22 ! n -= 0x43200000 +1: + fdivs CONSTE432_f29,P2_f21,P2_f22 ! raise div-by-zero + ba,pt %icc,3f +! delay slot + st INF_f28,[YPTR] ! store -inf +2: + sll %l2,1,%l2 ! lop off sign bit + add XPTR,XSTR,XPTR ! x += stridex + orcc %l2,%o5,%g0 + be,pn %icc,1b ! if x == -0 +! delay slot + add YPTR,YSTR,YPTR ! y += stridey + fzero P2_f22 ! *y = (x < 0.0? 0.0 : x) * inf + fcmpd %fcc0,P2_X3,P2_f22 + fmovdl %fcc0,P2_f22,P2_X3 + fand INF_f28,FP50_MASK,P2_f22 + fnegd P2_f22,P2_f22 + fmuld P2_X3,P2_f22,P2_X3 + st P2_X3,[YPTR] +3: + addcc ICNT,-1,ICNT + ble,pn %icc,.endloop1 +! delay slot + st P2_f21,[YPTR+4] + ld [XPTR],%l2 ! get next argument + ldd [XPTR],P2_X3 + fpadd32s P2_X3,TTOPMSK,P2_f22 ! X + TTOP + ba,pt %icc,.loop2 +! delay slot + fands P2_f22,INF_f28,P2_f22 ! X3 + nop !ld [XPTR+4],P2_f21 + + SET_SIZE(__vlog_ultra3) + diff --git a/usr/src/libm/src/mvec/vis/__vlogf.S b/usr/src/libm/src/mvec/vis/__vlogf.S new file mode 100644 index 0000000..a6fcd21 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vlogf.S @@ -0,0 +1,1276 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vlogf.S 1.11 06/01/23 SMI" + + .file "__vlogf.S" + +#include "libm.h" + + RO_DATA + .align 64 +!! CONST_TBL[2*i] = 127*log(2) - log(1+i/32), i = [0, 32] +!! CONST_TBL[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] + +.CONST_TBL: + .word 0x405601e6, 0x78fc457b, 0x3e800000, 0x00000000, + .word 0x4055ffee, 0x4f4b5df8, 0x3e7f07c1, 0xf07c1f08, + .word 0x4055fe05, 0x32e4434f, 0x3e7e1e1e, 0x1e1e1e1e, + .word 0x4055fc2a, 0x44598c21, 0x3e7d41d4, 0x1d41d41d, + .word 0x4055fa5c, 0xb720babf, 0x3e7c71c7, 0x1c71c71c, + .word 0x4055f89b, 0xcf803581, 0x3e7bacf9, 0x14c1bad0, + .word 0x4055f6e6, 0xe0c3f1b1, 0x3e7af286, 0xbca1af28, + .word 0x4055f53d, 0x4badcb50, 0x3e7a41a4, 0x1a41a41a, + .word 0x4055f39e, 0x7d18782e, 0x3e799999, 0x9999999a, + .word 0x4055f209, 0xecc5965c, 0x3e78f9c1, 0x8f9c18fa, + .word 0x4055f07f, 0x1c5099d5, 0x3e786186, 0x18618618, + .word 0x4055eefd, 0x9641645e, 0x3e77d05f, 0x417d05f4, + .word 0x4055ed84, 0xed3a291d, 0x3e7745d1, 0x745d1746, + .word 0x4055ec14, 0xbb3ced72, 0x3e76c16c, 0x16c16c17, + .word 0x4055eaac, 0xa10589ab, 0x3e7642c8, 0x590b2164, + .word 0x4055e94c, 0x45758439, 0x3e75c988, 0x2b931057, + .word 0x4055e7f3, 0x550f85e3, 0x3e755555, 0x55555555, + .word 0x4055e6a1, 0x818078ec, 0x3e74e5e0, 0xa72f0539, + .word 0x4055e556, 0x8134aae1, 0x3e747ae1, 0x47ae147b, + .word 0x4055e412, 0x0ef783b7, 0x3e741414, 0x14141414, + .word 0x4055e2d3, 0xe99c9674, 0x3e73b13b, 0x13b13b14, + .word 0x4055e19b, 0xd3b0f9d9, 0x3e73521c, 0xfb2b78c1, + .word 0x4055e069, 0x9333fb26, 0x3e72f684, 0xbda12f68, + .word 0x4055df3c, 0xf1565bd0, 0x3e729e41, 0x29e4129e, + .word 0x4055de15, 0xba3f64fa, 0x3e724924, 0x92492492, + .word 0x4055dcf3, 0xbcd73219, 0x3e71f704, 0x7dc11f70, + .word 0x4055dbd6, 0xca95a75a, 0x3e71a7b9, 0x611a7b96, + .word 0x4055dabe, 0xb7559927, 0x3e715b1e, 0x5f75270d, + .word 0x4055d9ab, 0x592bb896, 0x3e711111, 0x11111111, + .word 0x4055d89c, 0x8840e4fe, 0x3e70c971, 0x4fbcda3b, + .word 0x4055d792, 0x1eaf8df0, 0x3e708421, 0x08421084, + .word 0x4055d68b, 0xf863da3d, 0x3e704104, 0x10410410, + .word 0x4055d589, 0xf2fe5107, 0x3e700000, 0x00000000, + .word 0xbfcffb16, 0xbfa3db6e, ! K3 = -2.49850123953105416108e-01 + .word 0x3fd5561b, 0xa4b3110b, ! K2 = 3.33380614127478394992e-01 + .word 0xbfe00000, 0x0b666d0b, ! K1 = -5.00000021234343492201e-01 + .word 0x3fefffff, 0xff3fd118, ! K0 = 9.99999998601683029714e-01 + .word 0x3fe62e42, 0xfefa39ef, ! LN2 = 6.931471805599452862e-01 + .word 0xbf800000, 0x7f800000, ! MONE = -1.0f ; INF + +! local storage indices +#define tmp0 STACK_BIAS-0x8 +#define tmp1 STACK_BIAS-0x10 +#define tmp2 STACK_BIAS-0x18 +#define tmp3 STACK_BIAS-0x20 +#define tmp4 STACK_BIAS-0x28 +#define tmp5 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +#define ZERO %f28 +#define K3 %f30 +#define K2 %f32 +#define K1 %f34 +#define K0 %f36 +#define LN2 %f38 + +#define stridex %o0 +#define stridex2 %o1 +#define stridey %o2 +#define x0 %o3 +#define x1 %o4 +#define y %o5 + +#define ind0 %i0 +#define ind1 %i1 +#define ind2 %i2 +#define ind3 %i3 +#define MASK_0x007fffff %i4 +#define MASK_0xfffc0000 %i5 +#define CONST_0x20000 %o7 +#define MASK_0x7f800000 %l3 + +#define ival0 %l0 +#define iy0 %l1 +#define ival1 %l2 +#define iy1 %l1 +#define ival2 %l4 +#define iy2 %l5 +#define ival3 %l6 +#define iy3 %l2 +#define counter %l7 + +#define LOGFTBL %g5 +#define LOGFTBL_P8 %g1 + +! register use + +! i0 ind0 +! i1 ind1 +! i2 ind2 +! i3 ind3 +! i4 0x007fffff +! i5 0xfffc0000 + +! l0 ival0 +! l1 iy0, iy1 +! l2 ival1, iy3 +! l3 0x7f800000 +! l4 ival2 +! l5 iy2 +! l6 ival3 +! l7 cycle counter + +! o0 stridex +! o1 stridex * 2 +! o2 stridey +! o3 x +! o4 x +! o5 y +! o7 0x20000 + +! g1 CONST_TBL +! g5 CONST_TBL + 8 + +! f2 +! f4 +! f6 +! f8 +! f9 +! f10 +! f12 +! f14 +! f16 +! f18 +! f19 +! f20 +! f22 +! f24 +! f26 +! f28 ZERO = 0 +! f30 K3 = -2.49850123953105416108e-01 +! f32 K2 = 3.33380614127478394992e-01 +! f34 K1 = -5.00000021234343492201e-01 +! f36 K0 = 9.99999998601683029714e-01 +! f38 LN2 = 6.931471805599452862e-01 +! f40 +! f42 +! f44 +! f46 +! f48 +! f50 +! f52 +! f54 +! f56 +! f58 +! f60 +! f62 + + +! !!!!! Algorithm !!!!! +! +! double exp, ty, yy, ldtmp0, ldtmp1; +! double dtmp0, dtmp1, dtmp2, dtmp3, dtmp4, dtmp5; +! float value; +! int ival, iy, i, ind, iexp; +! double K3 = -2.49850123953105416108e-01; +! double K2 = 3.33380614127478394992e-01; +! double K1 = -5.00000021234343492201e-01; +! double K0 = 9.99999998601683029714e-01; +! double LN2 = 6.931471805599452862e-01; +! double ZERO = 0; +! float INF; +! +! ival = *(int*)(x); +! if (ival >= 0x7f800000) goto spec; +! if (ival <= 0x7fffff) goto spec; +! *(float*)&*(float*)&exp = *(float*)(x); +! exp = vis_fpack32(ZERO, exp); +! iy = ival & 0x007fffff; +! ival = iy + 0x20000; +! ival = ival & 0xfffc0000; +! i = ival >> 14; +! ind = i & (-8); +! iy = iy - ival; +! ty = LN2 * (double)(*(int*)&exp); +! ldtmp0 = *(double*)((char*)CONST_TBL+ind); +! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); +! ty = ty - ldtmp0; +! yy = (double) iy; +! yy = yy * ldtmp1; +! dtmp0 = K3 * yy; +! dtmp1 = dtmp0 + K2; +! dtmp2 = dtmp1 * yy; +! dtmp3 = dtmp2 + K1; +! dtmp4 = dtmp3 * yy; +! dtmp5 = dtmp4 + K0; +! yy = dtmp5 * yy; +! yy = yy + ty; +! y[0] = (float)(yy); +! return; +! +!spec: +! if ((ival & 0x7fffffff) >= 0x7f800000) { /* X = NaN or Inf */ +! value = *(float*) &ival; +! y[0] = (value < 0.0f? 0.0f : value) * value; +! return; +! } else if (ival <= 0) { +! y[0] = ((ival & 0x7fffffff) == 0) ? +! -1.0f / 0f. : 0f. /0f.; /* X = +-0 : X < 0 */ +! return; +! } else { /* Denom. number */ +! value = (float) ival; +! ival = *(int*) &value; +! iexp = (ival >> 23) - 149; +! iy = ival & 0x007fffff; +! ival = iy + 0x20000; +! ival = ival & 0xfffc0000; +! i = ival >> 14; +! ind = i & (-8); +! iy = iy - ival; +! ty = LN2 * (double)iexp; +! ldtmp0 = *(double*)((char*)CONST_TBL+ind); +! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); +! ty = ty - ldtmp0; +! yy = (double) iy; +! yy = yy * ldtmp1; +! dtmp0 = K3 * yy; +! dtmp1 = dtmp0 + K2; +! dtmp2 = dtmp1 * yy; +! dtmp3 = dtmp2 + K1; +! dtmp4 = dtmp3 * yy; +! dtmp5 = dtmp4 + K0; +! yy = dtmp5 * yy; +! yy = yy + ty; +! y[0] = (float)(yy); +! return; +! } +!-------------------------------------------------------------------- + + ENTRY(__vlogf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + wr %g0,0,%gsr + + st %i0,[%fp+tmp0] + stx %i1,[%fp+tmp5] + + sra %i2,0,%l4 + ldd [LOGFTBL+528],K3 + add %i3,0,y + sllx %l4,2,stridex + sllx %l4,3,stridex2 + ldd [LOGFTBL+536],K2 + sra %i4,0,%l3 + ldd [LOGFTBL+544],K1 + sllx %l3,2,stridey + sethi %hi(0x7ffc00),MASK_0x007fffff + add MASK_0x007fffff,1023,MASK_0x007fffff + ldd [LOGFTBL+552],K0 + sethi %hi(0xfffc0000),MASK_0xfffc0000 + ldd [LOGFTBL+560],LN2 + sethi %hi(0x20000),CONST_0x20000 + fzero ZERO + sethi %hi(0x7f800000),MASK_0x7f800000 + sub y,stridey,y + +.begin: + ld [%fp+tmp0],counter + ldx [%fp+tmp5],x0 + st %g0,[%fp+tmp0] +.begin1: + add x0,stridex2,x1! x += 2*stridex + subcc counter,1,counter + bneg,pn %icc,.end + lda [x0]0x82,ival0 ! (Y0_0) ival = *(int*)(x) + + add LOGFTBL,8,LOGFTBL_P8 + lda [stridex+x0]0x82,ival1 ! (Y1_0) ival = *(int*)(x) + + cmp ival0,MASK_0x7f800000 ! (Y0_0) if (ival >= 0x7f800000) + lda [x1]0x82,ival2 ! (Y2_0) ival = *(int*)(x); + + bge,pn %icc,.spec ! (Y0_0) if (ival >= 0x7f800000) + nop + + cmp ival0,MASK_0x007fffff ! (Y0_0) if (ival <= 0x7fffff) + ble,pn %icc,.spec ! (Y0_0) if (ival <= 0x7fffff) + nop + + cmp ival1,MASK_0x7f800000 ! (Y1_0) if (ival >= 0x7f800000) + and ival0,MASK_0x007fffff,iy0 ! (Y0_0) iy = ival & 0x007fffff + + + add iy0,CONST_0x20000,ival0 ! (Y0_0) ival = iy + 0x20000 + + and ival0,MASK_0xfffc0000,ival0 ! (Y0_0) ival = ival & 0xfffc0000 + bge,pn %icc,.update2 ! (Y1_0) if (ival >= 0x7f800000) + nop +.cont2: + sub iy0,ival0,iy0 ! (Y0_0) iy = iy - ival + cmp ival1,MASK_0x007fffff ! (Y1_0) if (ival <= 0x7fffff) + lda [stridex+x1]0x82,ival3 ! (Y3_0) ival = *(int*)(x) + + st iy0,[%fp+tmp1] ! (Y0_0) (double) iy + ble,pn %icc,.update3 ! (Y1_0) if (ival <= 0x7fffff) + nop +.cont3: + cmp ival2,MASK_0x7f800000 ! (Y2_0) if (ival >= 0x7f800000) + and ival1,MASK_0x007fffff,iy1 ! (Y1_0) iy = ival & 0x007fffff + bge,pn %icc,.update4 ! (Y2_0) if (ival >= 0x7f800000) + nop +.cont4: + cmp ival2,MASK_0x007fffff ! (Y2_0) if (ival <= 0x7fffff) + ble,pn %icc,.update5 ! (Y2_0) if (ival <= 0x7fffff) + nop +.cont5: + add iy1,CONST_0x20000,ival1 ! (Y1_0) ival = iy + 0x20000 + and ival2,MASK_0x007fffff,iy2 ! (Y2_0) iy = ival & 0x007fffff + + and ival1,MASK_0xfffc0000,ival1 ! (Y1_0) ival = ival & 0xfffc0000 + add iy2,CONST_0x20000,ival2 ! (Y2_0) ival = iy + 0x20000 + + sub iy1,ival1,iy1 ! (Y1_0) iy = iy - ival + and ival2,MASK_0xfffc0000,ival2 ! (Y2_0) ival = ival & 0xfffc0000 + + cmp ival3,MASK_0x7f800000 ! (Y3_0) (ival >= 0x7f800000) + sub iy2,ival2,iy2 ! (Y2_0) iy = iy - ival + st iy1,[%fp+tmp3] ! (Y1_0) (double) iy + + st iy2,[%fp+tmp2] ! (Y2_0) (double) iy + bge,pn %icc,.update6 ! (Y3_0) (ival >= 0x7f800000) + nop +.cont6: + cmp ival3,MASK_0x007fffff ! (Y3_0) if (ival <= 0x7fffff) + ld [%fp+tmp1],%f2 ! (Y0_0) (double) iy + ble,pn %icc,.update7 ! (Y3_0) if (ival <= 0x7fffff) + sra ival0,14,ival0 ! (Y0_0) i = ival >> 14; +.cont7: + sra ival1,14,ind1 ! (Y1_0) i = ival >> 14; + ld [%fp+tmp3],%f4 ! (Y1_0) (double) iy + + sra ival2,14,ival2 ! (Y2_0) i = ival >> 14; + and ival0,-8,ind0 ! (Y0_0) ind = i & (-8) + lda [x0]0x82,%f6 ! (Y0_0) *(float*)&exp = *(float*)(x) + + and ind1,-8,ind1 ! (Y1_0) ind = i & (-8) + ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f2,%f48 ! (Y0_0) yy = (double) iy + + and ival3,MASK_0x007fffff,iy3 ! (Y3_0) iy = ival & 0x007fffff + lda [stridex+x0]0x82,%f8 ! (Y1_0) *(float*)&exp = *(float*)(x) + + add iy3,CONST_0x20000,ival3 ! (Y3_0) iy + 0x20000 + ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f4,%f26 ! (Y1_0) yy = (double) iy + + sub y,stridey,y ! y += stridey + and ival3,MASK_0xfffc0000,ival3 ! (Y3_0) ival = ival & 0xfffc0000 + lda [x1]0x82,%f10 ! (Y2_0) *(float*)&exp = *(float*)(x) + + add x1,stridex2,x0 ! x += 2*stridex + sub iy3,ival3,iy3 ! (Y3_0) iy = iy - ival + ld [%fp+tmp2],%f2 ! (Y2_0) (double) iy + fmuld %f48,%f14,%f46 ! (Y0_0) yy = yy * ldtmp1 + + lda [stridex+x1]0x82,%f12 ! (Y3_0) *(float*)&exp = *(float*)(x) + fmuld %f26,%f16,%f62 ! (Y1_0) yy = yy * ldtmp1 + + sra ival3,14,ival3 ! (Y3_0) i = ival >> 14; + lda [x0]0x82,ival0 ! (Y0_1) ival = *(int*)(x) + + add x0,stridex2,x1 ! x += 2*stridex + st iy3,[%fp+tmp3] ! (Y3_0) (double) iy + fmuld K3,%f46,%f22 ! (Y0_0) dtmp0 = K3 * yy + + and ival2,-8,ind2 ! (Y2_0) ind = i & (-8) + lda [stridex+x0]0x82,ival1 ! (Y1_1) ival = *(int*)(x) + + cmp ival0,MASK_0x7f800000 ! (Y0_1) if (ival >= 0x7f800000) + lda [x1]0x82,ival2 ! (Y2_1) ival = *(int*)(x); + fmuld K3,%f62,%f50 ! (Y1_0) dtmp0 = K3 * yy + + bge,pn %icc,.update8 ! (Y0_1) if (ival >= 0x7f800000) + nop +.cont8: + cmp ival0,MASK_0x007fffff ! (Y0_1) if (ival <= 0x7fffff) + ble,pn %icc,.update9 ! (Y0_1) if (ival <= 0x7fffff) + faddd %f22,K2,%f48 ! (Y0_0) dtmp1 = dtmp0 + K2 + +.cont9: + cmp ival1,MASK_0x7f800000 ! (Y1_1) if (ival >= 0x7f800000) + and ival0,MASK_0x007fffff,iy0 ! (Y0_1) iy = ival & 0x007fffff + + add iy0,CONST_0x20000,ival0 ! (Y0_1) ival = iy + 0x20000 + ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); + fpack32 ZERO,%f6,%f6 ! (Y0_0) exp = vis_fpack32(ZERO, exp) + + and ival0,MASK_0xfffc0000,ival0 ! (Y0_1) ival = ival & 0xfffc0000 + faddd %f50,K2,%f26 ! (Y1_0) dtmp1 = dtmp0 + K2 + bge,pn %icc,.update10 ! (Y1_1) if (ival >= 0x7f800000) + nop +.cont10: + sub iy0,ival0,iy0 ! (Y0_1) iy = iy - ival + and ival3,-8,ind3 ! (Y3_0) ind = i & (-8) + ld [%fp+tmp3],%f4 ! (Y3_0) (double) iy + + cmp ival1,MASK_0x007fffff ! (Y1_1) if (ival <= 0x7fffff) + lda [stridex+x1]0x82,ival3 ! (Y3_1) ival = *(int*)(x) + fmuld %f48,%f46,%f50 ! (Y0_0) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y2_0) yy = (double) iy + + st iy0,[%fp+tmp1] ! (Y0_1) (double) iy + ble,pn %icc,.update11 ! (Y1_1) if (ival <= 0x7fffff) + nop +.cont11: + cmp ival2,MASK_0x7f800000 ! (Y2_1) if (ival >= 0x7f800000) + and ival1,MASK_0x007fffff,iy1 ! (Y1_1) iy = ival & 0x007fffff + bge,pn %icc,.update12 ! (Y2_1) if (ival >= 0x7f800000) + fmuld %f26,%f62,%f42 ! (Y1_0) dtmp2 = dtmp1 * yy +.cont12: + cmp ival2,MASK_0x007fffff ! (Y2_1) if (ival <= 0x7fffff) + ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + ble,pn %icc,.update13 ! (Y2_1) if (ival <= 0x7fffff) + fitod %f4,%f26 ! (Y3_0) yy = (double) iy +.cont13: + add iy1,CONST_0x20000,ival1 ! (Y1_1) ival = iy + 0x20000 + and ival2,MASK_0x007fffff,iy2 ! (Y2_1) iy = ival & 0x007fffff + + and ival1,MASK_0xfffc0000,ival1 ! (Y1_1) ival = ival & 0xfffc0000 + add iy2,CONST_0x20000,ival2 ! (Y2_1) ival = iy + 0x20000 + fmuld %f48,%f14,%f44 ! (Y2_0) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y0_0) dtmp3 = dtmp2 + K1 + + cmp ival3,MASK_0x7f800000 ! (Y3_1) if (ival >= 0x7f800000) + sub iy1,ival1,iy1 ! (Y1_1) iy = iy - ival + and ival2,MASK_0xfffc0000,ival2 ! (Y2_1) ival = ival & 0xfffc0000 + fpack32 ZERO,%f8,%f8 ! (Y1_0) exp = vis_fpack32(ZERO, exp) + + sub iy2,ival2,iy2 ! (Y2_1) iy = iy - ival + st iy1,[%fp+tmp3] ! (Y1_1) (double) iy + fmuld %f26,%f16,%f60 ! (Y3_0) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y1_0) dtmp3 = dtmp2 + K1 + + st iy2,[%fp+tmp2] ! (Y2_1) (double) iy + fmuld K3,%f44,%f22 ! (Y2_0) dtmp0 = K3 * yy + bge,pn %icc,.update14 ! (Y3_1) if (ival >= 0x7f800000) + fitod %f6,%f40 ! (Y0_0) (double)(*(int*)&exp) +.cont14: + cmp ival3,MASK_0x007fffff ! (Y3_1) if (ival <= 0x7fffff) + ldd [LOGFTBL+ind1],%f58 ! (Y1_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld %f50,%f46,%f52 ! (Y0_0) dtmp4 = dtmp3 * yy + fitod %f8,%f56 ! (Y1_0) (double)(*(int*)&exp) + + ld [%fp+tmp1],%f2 ! (Y0_1) (double) iy + fmuld K3,%f60,%f50 ! (Y3_0) dtmp0 = K3 * yy + ble,pn %icc,.update15 ! (Y3_1) if (ival <= 0x7fffff) + nop +.cont15: + subcc counter,7,counter + fmuld %f54,%f62,%f54 ! (Y1_0) dtmp4 = dtmp3 * yy + + sra ival0,14,ival0 ! (Y0_1) i = ival >> 14; + bneg,pn %icc,.tail + faddd %f22,K2,%f48 ! (Y2_0) dtmp1 = dtmp0 + K2 + + ba .main_loop + nop + + .align 16 +.main_loop: + sra ival2,14,ival2 ! (Y2_1) i = ival >> 14; + ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp) + faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0 + + sra ival1,14,ind1 ! (Y1_1) i = ival >> 14; + ld [%fp+tmp3],%f4 ! (Y1_1) (double) iy + fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp) + faddd %f50,K2,%f26 ! (Y3_0) dtmp1 = dtmp0 + K2 + + and ival0,-8,ind0 ! (Y0_1) ind = i & (-8) + lda [x0]0x82,%f6 ! (Y0_1) *(float*)&exp = *(float*)(x) + fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp) + faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0 + + and ind1,-8,ind1 ! (Y1_1) ind = i & (-8) + ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y0_1) yy = (double) iy + + and ival3,MASK_0x007fffff,iy3 ! (Y3_1) iy = ival & 0x007fffff + lda [stridex+x0]0x82,%f8 ! (Y1_1) *(float*)&exp = *(float*)(x) + fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0 + + add iy3,CONST_0x20000,ival3 ! (Y3_1) iy + 0x20000 + ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f26,%f60,%f42 ! (Y3_0) dtmp2 = dtmp1 * yy + fitod %f4,%f26 ! (Y1_1) yy = (double) iy + + and ival3,MASK_0xfffc0000,ival3 ! (Y3_1) ival = ival & 0xfffc0000 + lda [x1]0x82,%f10 ! (Y2_1) *(float*)&exp = *(float*)(x) + fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0 + + sub iy3,ival3,iy3 ! (Y3_1) iy = iy - ival + ld [%fp+tmp2],%f2 ! (Y2_1) (double) iy + fmuld %f48,%f14,%f46 ! (Y0_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1 + + add x1,stridex2,x0 ! x += 2*stridex + st iy3,[%fp+tmp3] ! (Y3_1) (double) iy + fpack32 ZERO,%f12,%f20 ! (Y3_0) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty + + add y,stridey,y ! y += stridey + lda [stridex+x1]0x82,%f12 ! (Y3_1) *(float*)&exp = *(float*)(x) + fmuld %f26,%f16,%f62 ! (Y1_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y3_0) dtmp3 = dtmp2 + K1 + + sra ival3,14,ival3 ! (Y3_1) i = ival >> 14; + add y,stridey,y ! y += stridey + lda [x0]0x82,ival0 ! (Y0_2) ival = *(int*)(x) + faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty + + add x0,stridex2,x1 ! x += 2*stridex + ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld K3,%f46,%f22 ! (Y0_1) dtmp0 = K3 * yy + fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp) + + and ival2,-8,ind2 ! (Y2_1) ind = i & (-8) + lda [stridex+x0]0x82,ival1 ! (Y1_2) ival = *(int*)(x) + fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy + fitod %f20,%f56 ! (Y3_0) (double)(*(int*)&exp) + + cmp ival0,MASK_0x7f800000 ! (Y0_2) if (ival >= 0x7f800000) + lda [x1]0x82,ival2 ! (Y2_2) ival = *(int*)(x); + fmuld K3,%f62,%f50 ! (Y1_1) dtmp0 = K3 * yy + fdtos %f48,%f4 ! (Y0_0) (float)(yy) + + st %f4,[y] ! (Y0_0) write into memory + fmuld %f54,%f60,%f54 ! (Y3_0) dtmp4 = dtmp3 * yy + bge,pn %icc,.update16 ! (Y0_2) if (ival >= 0x7f800000) + fdtos %f24,%f4 ! (Y1_0) (float)(yy) +.cont16: + cmp ival0,MASK_0x007fffff ! (Y0_2) if (ival <= 0x7fffff + ldd [LOGFTBL+ind3],%f58 ! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + ble,pn %icc,.update17 ! (Y0_2) if (ival <= 0x7fffff + faddd %f22,K2,%f48 ! (Y0_1) dtmp1 = dtmp0 + K2 +.cont17: + cmp ival1,MASK_0x7f800000 ! (Y1_2) if (ival >= 0x7f800000) + and ival0,MASK_0x007fffff,iy0 ! (Y0_2) iy = ival & 0x007fffff + st %f4,[stridey+y] ! (Y1_0) write into memory + fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp) + + add iy0,CONST_0x20000,ival0 ! (Y0_2) ival = iy + 0x20000 + ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); + faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0 + fpack32 ZERO,%f6,%f6 ! (Y0_1) exp = vis_fpack32(ZERO, exp) + + and ival0,MASK_0xfffc0000,ival0 ! (Y0_2) ival = ival & 0xfffc0000 + faddd %f50,K2,%f26 ! (Y1_1) dtmp1 = dtmp0 + K2 + bge,pn %icc,.update18 ! (Y1_2) if (ival >= 0x7f800000) + fmuld LN2,%f56,%f56 ! (Y3_0) ty = LN2 * (double)(*(int*)&exp) +.cont18: + sub iy0,ival0,iy0 ! (Y0_2) iy = iy - ival + and ival3,-8,ind3 ! (Y3_1) ind = i & (-8) + ld [%fp+tmp3],%f4 ! (Y3_1) (double) iy + faddd %f54,K0,%f24 ! (Y3_0) dtmp5 = dtmp4 + K0 + + cmp ival1,MASK_0x007fffff ! (Y1_2) if (ival <= 0x7fffff) + lda [stridex+x1]0x82,ival3 ! (Y3_2) ival = *(int*)(x) + fmuld %f48,%f46,%f50 ! (Y0_1) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y2_1) yy = (double) iy + + st iy0,[%fp+tmp1] ! (Y0_2) (double) iy + fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy + ble,pn %icc,.update19 ! (Y1_2) if (ival <= 0x7fffff) + fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0 +.cont19: + cmp ival2,MASK_0x7f800000 ! (Y2_2) if (ival >= 0x7f800000) + and ival1,MASK_0x007fffff,iy1 ! (Y1_2) iy = ival & 0x007fffff + bge,pn %icc,.update20 ! (Y2_2) if (ival >= 0x7f800000) + fmuld %f26,%f62,%f42 ! (Y1_1) dtmp2 = dtmp1 * yy +.cont20: + cmp ival2,MASK_0x007fffff ! (Y2_2) if (ival <= 0x7fffff) + ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + ble,pn %icc,.update21 ! (Y2_2) if (ival <= 0x7fffff) + fitod %f4,%f26 ! (Y3_1) yy = (double) iy +.cont21: + add iy1,CONST_0x20000,ival1 ! (Y1_2) ival = iy + 0x20000 + and ival2,MASK_0x007fffff,iy2 ! (Y2_2) iy = ival & 0x007fffff + fmuld %f24,%f60,%f24 ! (Y3_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y3_0) ty = ty - ldtmp0 + + and ival1,MASK_0xfffc0000,ival1 ! (Y1_2) ival = ival & 0xfffc0000 + add iy2,CONST_0x20000,ival2 ! (Y2_2) ival = iy + 0x20000 + fmuld %f48,%f14,%f44 ! (Y2_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y0_1) dtmp3 = dtmp2 + K1 + + sub iy1,ival1,iy1 ! (Y1_2) iy = iy - ival + and ival2,MASK_0xfffc0000,ival2 ! (Y2_2) ival = ival & 0xfffc0000 + fpack32 ZERO,%f8,%f8 ! (Y1_1) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty + + sub iy2,ival2,iy2 ! (Y2_2) iy = iy - ival + st iy1,[%fp+tmp3] ! (Y1_2) (double) iy + fmuld %f26,%f16,%f60 ! (Y3_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y1_1) dtmp3 = dtmp2 + K1 + + cmp ival3,MASK_0x7f800000 ! (Y3_2) if (ival >= 0x7f800000) + add y,stridey,y ! y += stridey + st iy2,[%fp+tmp2] ! (Y2_2) (double) iy + faddd %f24,%f58,%f24 ! (Y3_0) yy = yy + ty + + add y,stridey,y ! y += stridey + fmuld K3,%f44,%f22 ! (Y2_1) dtmp0 = K3 * yy + bge,pn %icc,.update22 ! (Y3_2) if (ival >= 0x7f800000) + fitod %f6,%f40 ! (Y0_1)(double)(*(int*)&exp) +.cont22: + cmp ival3,MASK_0x007fffff ! (Y3_2) if (ival <= 0x7fffff) + ldd [LOGFTBL+ind1],%f58 ! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld %f50,%f46,%f52 ! (Y0_1) dtmp4 = dtmp3 * yy + fitod %f8,%f56 ! (Y1_1) (double)(*(int*)&exp) + + ld [%fp+tmp1],%f2 ! (Y0_2) (double) iy + fmuld K3,%f60,%f50 ! (Y3_1) dtmp0 = K3 * yy + ble,pn %icc,.update23 ! (Y3_2) if (ival <= 0x7fffff) + fdtos %f48,%f4 ! (Y2_0) (float)(yy) +.cont23: + subcc counter,4,counter ! update cycle counter + st %f4,[y] ! (Y2_0) write into memory + fmuld %f54,%f62,%f54 ! (Y1_1) dtmp4 = dtmp3 * yy + fdtos %f24,%f4 ! (Y3_0)(float)(yy) + + sra ival0,14,ival0 ! (Y0_2) i = ival >> 14; + st %f4,[stridey+y] ! (Y3_0) write into memory + bpos,pt %icc,.main_loop + faddd %f22,K2,%f48 ! (Y2_1) dtmp1 = dtmp0 + K2 + +.tail: + addcc counter,7,counter + add y,stridey,y ! y += stridey + bneg,pn %icc,.end_loop + + sra ival2,14,ival2 ! (Y2_1) i = ival >> 14; + ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp) + faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0 + + sra ival1,14,ind1 ! (Y1_1) i = ival >> 14; + ld [%fp+tmp3],%f4 ! (Y1_1) (double) iy + fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp) + faddd %f50,K2,%f26 ! (Y3_0) dtmp1 = dtmp0 + K2 + + and ival0,-8,ind0 ! (Y0_1) ind = i & (-8) + lda [x0]0x82,%f6 ! (Y0_1) *(float*)&exp = *(float*)(x) + fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp) + faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0 + + and ind1,-8,ind1 ! (Y1_1) ind = i & (-8) + ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y0_1) yy = (double) iy + + and ival3,MASK_0x007fffff,ival1 ! (Y3_1) iy = ival & 0x007fffff + lda [stridex+x0]0x82,%f8 ! (Y1_1) *(float*)&exp = *(float*)(x) + fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0 + + add iy3,CONST_0x20000,ival3 ! (Y3_1) iy + 0x20000 + ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fmuld %f26,%f60,%f42 ! (Y3_0) dtmp2 = dtmp1 * yy + fitod %f4,%f26 ! (Y1_1) yy = (double) iy + + and ival3,MASK_0xfffc0000,ival3 ! (Y3_1) ival = ival & 0xfffc0000 + lda [x1]0x82,%f10 ! (Y2_1) *(float*)&exp = *(float*)(x) + fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0 + + sub iy3,ival3,iy3 ! (Y3_1) iy = iy - ival + ld [%fp+tmp2],%f2 ! (Y2_1) (double) iy + fmuld %f48,%f14,%f46 ! (Y0_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1 + + add x1,stridex2,x0 ! x += 2*stridex + st iy3,[%fp+tmp3] ! (Y3_1) (double) iy + fpack32 ZERO,%f12,%f20 ! (Y3_0) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty + + lda [stridex+x1]0x82,%f12 ! (Y3_1) *(float*)&exp = *(float*)(x) + fmuld %f26,%f16,%f62 ! (Y1_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y3_0) dtmp3 = dtmp2 + K1 + + sra ival3,14,ival3 ! (Y3_1) i = ival >> 14; + add y,stridey,y ! y += stridey + faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty + + subcc counter,1,counter + ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld K3,%f46,%f22 ! (Y0_1) dtmp0 = K3 * yy + fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp) + + and ival2,-8,ind2 ! (Y2_1) ind = i & (-8) + fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy + fitod %f20,%f56 ! (Y3_0) (double)(*(int*)&exp) + + fmuld K3,%f62,%f50 ! (Y1_1) dtmp0 = K3 * yy + fdtos %f48,%f4 ! (Y0_0) (float)(yy) + + st %f4,[y] ! (Y0_0) write into memory + fmuld %f54,%f60,%f54 ! (Y3_0) dtmp4 = dtmp3 * yy + bneg,pn %icc,.end_loop + fdtos %f24,%f4 ! (Y1_0) (float)(yy) + + add y,stridey,y ! y += stridey + subcc counter,1,counter + ldd [LOGFTBL+ind3],%f58 ! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + faddd %f22,K2,%f48 ! (Y0_1) dtmp1 = dtmp0 + K2 + + st %f4,[y] ! (Y1_0) write into memory + bneg,pn %icc,.end_loop + fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp) + + ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8); + faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0 + fpack32 ZERO,%f6,%f6 ! (Y0_1) exp = vis_fpack32(ZERO, exp) + + faddd %f50,K2,%f26 ! (Y1_1) dtmp1 = dtmp0 + K2 + fmuld LN2,%f56,%f56 ! (Y3_0) ty = LN2 * (double)(*(int*)&exp) + + and ival3,-8,ind3 ! (Y3_1) ind = i & (-8) + ld [%fp+tmp3],%f4 ! (Y3_1) (double) iy + faddd %f54,K0,%f24 ! (Y3_0) dtmp5 = dtmp4 + K0 + + fmuld %f48,%f46,%f50 ! (Y0_1) dtmp2 = dtmp1 * yy + fitod %f2,%f48 ! (Y2_1) yy = (double) iy + + fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0 + + fmuld %f26,%f62,%f42 ! (Y1_1) dtmp2 = dtmp1 * yy + + ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f4,%f26 ! (Y3_1) yy = (double) iy + + fmuld %f24,%f60,%f24 ! (Y3_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y3_0) ty = ty - ldtmp0 + + fmuld %f48,%f14,%f44 ! (Y2_1) yy = yy * ldtmp1 + faddd %f50,K1,%f50 ! (Y0_1) dtmp3 = dtmp2 + K1 + + fpack32 ZERO,%f8,%f8 ! (Y1_1) exp = vis_fpack32(ZERO, exp) + faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty + + fmuld %f26,%f16,%f60 ! (Y3_1) yy = yy * ldtmp1 + faddd %f42,K1,%f54 ! (Y1_1) dtmp3 = dtmp2 + K1 + + add y,stridey,y ! y += stridey + faddd %f24,%f58,%f24 ! (Y3_0) yy = yy + ty + + subcc counter,1,counter + fmuld K3,%f44,%f22 ! (Y2_1) dtmp0 = K3 * yy + fitod %f6,%f40 ! (Y0_1)(double)(*(int*)&exp) + + ldd [LOGFTBL+ind1],%f58 ! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld %f50,%f46,%f52 ! (Y0_1) dtmp4 = dtmp3 * yy + fitod %f8,%f56 ! (Y1_1) (double)(*(int*)&exp) + + fmuld K3,%f60,%f50 ! (Y3_1) dtmp0 = K3 * yy + fdtos %f48,%f4 ! (Y2_0) (float)(yy) + + st %f4,[y] ! (Y2_0) write into memory + fmuld %f54,%f62,%f54 ! (Y1_1) dtmp4 = dtmp3 * yy + bneg,pn %icc,.end_loop + fdtos %f24,%f4 ! (Y3_0)(float)(yy) + + subcc counter,1,counter ! update cycle counter + add y,stridey,y + + st %f4,[y] ! (Y3_0) write into memory + bneg,pn %icc,.end_loop + faddd %f22,K2,%f48 ! (Y2_1) dtmp1 = dtmp0 + K2 + + ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp) + faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0 + + fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp) + + fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp) + faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0 + + fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy + + fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0 + + fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy + fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0 + + subcc counter,1,counter + faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1 + + faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty + + add y,stridey,y ! y += stridey + faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty + + ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind) + fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp) + + fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy + + fdtos %f48,%f4 ! (Y0_0) (float)(yy) + + st %f4,[y] ! (Y0_0) write into memory + bneg,pn %icc,.end_loop + fdtos %f24,%f4 ! (Y1_0) (float)(yy) + + add y,stridey,y ! y += stridey + subcc counter,1,counter + st %f4,[y] ! (Y1_0) write into memory + bneg,pn %icc,.end_loop + fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp) + + faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0 + + fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0 + + add y,stridey,y ! y += stridey + faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty + + fdtos %f48,%f4 ! (Y2_0) (float)(yy) + + st %f4,[y] ! (Y2_0) write into memory +.end_loop: + ba .begin + nop + +.end: + ret + restore %g0,0,%o0 + + .align 16 +.update2: + cmp counter,0 + ble .cont2 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont2 + nop + + .align 16 +.update3: + cmp counter,0 + ble .cont3 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont3 + nop + + .align 16 +.update4: + cmp counter,1 + ble .cont4 + nop + + stx x1,[%fp+tmp5] + sub counter,1,counter + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont4 + nop + + .align 16 +.update5: + cmp counter,1 + ble .cont5 + nop + + stx x1,[%fp+tmp5] + sub counter,1,counter + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont5 + nop + + .align 16 +.update6: + cmp counter,2 + ble .cont6 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,2,counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont6 + nop + + .align 16 +.update7: + cmp counter,2 + ble .cont7 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,2,counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont7 + nop + + .align 16 +.update8: + cmp counter,3 + ble .cont8 + nop + + stx x0,[%fp+tmp5] + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont8 + nop + + .align 16 +.update9: + cmp counter,3 + ble .cont9 + nop + + stx x0,[%fp+tmp5] + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont9 + nop + + .align 16 +.update10: + cmp counter,4 + ble .cont10 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0, stridex, x0 + sub counter,4,counter + st counter,[%fp+tmp0] + or %g0,4,counter + ba .cont10 + nop + + .align 16 +.update11: + cmp counter,4 + ble .cont11 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + sub counter,4,counter + st counter,[%fp+tmp0] + or %g0,4,counter + ba .cont11 + nop + + .align 16 +.update12: + cmp counter,5 + ble .cont12 + nop + + stx x1,[%fp+tmp5] + sub counter,5,counter + st counter,[%fp+tmp0] + or %g0,5,counter + ba .cont12 + nop + + .align 16 +.update13: + cmp counter,5 + ble .cont13 + nop + + stx x1,[%fp+tmp5] + sub counter,5,counter + st counter,[%fp+tmp0] + or %g0,5,counter + ba .cont13 + nop + + .align 16 +.update14: + cmp counter,6 + ble .cont14 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1, stridex, x1 + sub counter,6,counter + st counter,[%fp+tmp0] + or %g0,6,counter + ba .cont14 + nop + + .align 16 +.update15: + cmp counter,6 + ble .cont15 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1, stridex, x1 + sub counter,6,counter + st counter,[%fp+tmp0] + or %g0,6,counter + ba .cont15 + nop + + .align 16 +.update16: + cmp counter,0 + ble,pt %icc, .cont16 + nop + + stx x0,[%fp+tmp5] + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont16 + nop + + .align 16 +.update17: + cmp counter,0 + ble,pt %icc, .cont17 + nop + + stx x0,[%fp+tmp5] + st counter,[%fp+tmp0] + or %g0,0,counter + ba .cont17 + nop + + .align 16 +.update18: + cmp counter,1 + ble,pt %icc, .cont18 + nop + + add x0,stridex,x0 + stx x0,[%fp+tmp5] + sub x0,stridex,x0 + sub counter,1,counter + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont18 + nop + + .align 16 +.update19: + cmp counter,1 + ble,pt %icc, .cont19 + nop + + add x0,stridex,x0 + sub counter,1,counter + stx x0,[%fp+tmp5] + sub x0, stridex, x0 + st counter,[%fp+tmp0] + or %g0,1,counter + ba .cont19 + nop + + .align 16 +.update20: + cmp counter,2 + ble,pt %icc, .cont20 + nop + + stx x1,[%fp+tmp5] + sub counter,2,counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont20 + nop + + .align 16 +.update21: + cmp counter,2 + ble,pt %icc, .cont21 + nop + + stx x1,[%fp+tmp5] + sub counter, 2, counter + st counter,[%fp+tmp0] + or %g0,2,counter + ba .cont21 + nop + + .align 16 +.update22: + cmp counter,3 + ble,pt %icc, .cont22 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont22 + nop + + .align 16 +.update23: + cmp counter,3 + ble,pt %icc, .cont23 + nop + + add x1,stridex,x1 + stx x1,[%fp+tmp5] + sub x1,stridex,x1 + sub counter,3,counter + st counter,[%fp+tmp0] + or %g0,3,counter + ba .cont23 + nop + + .align 16 +.spec: + or %g0,1,ind3 ! ind3 = 1 + sll ind3,31,ind3 ! ind3 = 0x8000000 + add x0,stridex,x0 ! x += stridex + sub ind3,1,ind3 ! ind3 = 0x7ffffff + add y,stridey,y ! y += stridey + and ival0,ind3,iy0 ! ival & 0x7fffffff + cmp iy0,MASK_0x7f800000 ! if ((ival & 0x7fffffff) >= 0x7f800000) + bge,pn %icc, .spec0 ! if ((ival & 0x7fffffff) >= 0x7f800000) + st ival0,[%fp+tmp1] + cmp ival0,0 ! if (ival <= 0) + ble,pn %icc,.spec1 ! if (ival <= 0) + nop + + ld [%fp+tmp1],%f12 + fitos %f12,%f14 ! value = (float) ival + st %f14,[%fp+tmp2] ! ival = *(int*) &value + ld [%fp+tmp2],ival0 ! ival = *(int*) &value + + and ival0,MASK_0x007fffff,iy0 ! iy = ival & 0x007fffff + sra ival0,23,ival2 ! iexp = ival >> 23 + + add iy0,CONST_0x20000,ival0 ! ival = iy + 0x20000 + sub ival2,149,ival2 ! iexp = iexp - 149 + + and ival0,MASK_0xfffc0000,ival0 ! ival = ival & 0xfffc0000 + st ival2,[%fp+tmp2] ! (double) iexp + + sub iy0,ival0,iy0 ! iy = iy - ival + + sra ival0,14,ival0 ! i = ival >> 14; + st iy0,[%fp+tmp1] ! (double) iy + + and ival0,-8,ind0 ! ind = i & (-8) + ld [%fp+tmp1],%f2 ! (double) iy + + ldd [LOGFTBL_P8+ind0],%f14 ! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8) + fitod %f2,%f48 ! yy = (double) iy + + fmuld %f48,%f14,%f46 ! yy = yy * ldtmp1 + + ld [%fp+tmp2],%f6 ! (double) iexp + fmuld K3,%f46,%f22 ! dtmp0 = K3 * yy + + ldd [LOGFTBL+ind0],%f42 ! ldtmp0 = *(double*)((char*)CONST_TBL+ind) + faddd %f22,K2,%f48 ! dtmp1 = dtmp0 + K2 + + fmuld %f48,%f46,%f50 ! dtmp2 = dtmp1 * yy + + faddd %f50,K1,%f50 ! dtmp3 = dtmp2 + K1 + + fitod %f6,%f40 ! (double) iexp + fmuld %f50,%f46,%f52 ! dtmp4 = dtmp3 * yy + + fmuld LN2,%f40,%f40 ! ty = LN2 * (double) iexp + faddd %f52,K0,%f22 ! dtmp5 = dtmp4 + K0 + + fmuld %f22,%f46,%f22 ! yy = dtmp5 * yy + fsubd %f40,%f42,%f40 ! ty = ty - ldtmp0 + + faddd %f22,%f40,%f48 ! yy = yy + ty + + fdtos %f48,%f4 ! (float)(yy) + + ba .begin1 + st %f4,[y] ! write into memory + + .align 16 +.spec0: + ld [%fp+tmp1],%f12 ! value = *(float*) &ival + fzeros %f2 ! y[0] = (value < 0.0f? + fcmps %fcc0,%f12,%f2 ! 0.0f : value) * value + fmovsug %fcc0,%f12,%f2 + fmuls %f12,%f2,%f2 + ba .begin1 + st %f2,[y] ! write into memory + + .align 16 +.spec1: + cmp iy0,0 ! if ((ival & 0x7fffffff) == 0) + bne,pn %icc,.spec2 ! if ((ival & 0x7fffffff) == 0) + nop + ld [LOGFTBL+568],%f4 + fdivs %f4,ZERO,%f6 ! y[0] = -1.0f / 0f + ba .begin1 + st %f6,[y] ! write into memory + + .align 16 +.spec2: + fdivs ZERO,ZERO,%f6 ! y[0] = 0f / 0f + ba .begin1 + st %f6,[y] ! write into memory + + SET_SIZE(__vlogf) + diff --git a/usr/src/libm/src/mvec/vis/__vpow.S b/usr/src/libm/src/mvec/vis/__vpow.S new file mode 100644 index 0000000..a86d776 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vpow.S @@ -0,0 +1,4352 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vpow.S 1.8 06/01/23 SMI" + + .file "__vpow.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + +! __mt_constlog2[2*i] = high order rounded 32 bits log2(1+i/256)*256, i = [0, 255] +! __mt_constlog2[2*i+1] = low order least bits log2(1+i/256)*256, i = [0, 255] + + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000, + .word 0x3ff709c4, 0x00000000, 0x3e9b5eab, 0x1dd2b66f, + .word 0x4006fe51, 0x00000000, 0xbea2443d, 0xeba01c72, + .word 0x40113631, 0x00000000, 0x3e97a97b, 0x0c4bb41a, + .word 0x4016e797, 0x00000000, 0xbebe8f4b, 0x759d6476, + .word 0x401c9364, 0x00000000, 0xbeb15ebc, 0x1e666460, + .word 0x40211cd2, 0x00000000, 0xbeb57665, 0xf6893f5d, + .word 0x4023ed31, 0x00000000, 0xbecae5e9, 0x7677f62d, + .word 0x4026bad3, 0x00000000, 0x3ecd63bf, 0x61cc4d82, + .word 0x402985c0, 0x00000000, 0xbebe5b57, 0x35cfaf8e, + .word 0x402c4dfb, 0x00000000, 0xbec1bd55, 0x2842c1c2, + .word 0x402f138a, 0x00000000, 0xbecf336b, 0x18178cbe, + .word 0x4030eb39, 0x00000000, 0xbed81758, 0x19530c23, + .word 0x40324b5b, 0x00000000, 0x3edf84d6, 0x8f2268b4, + .word 0x4033aa30, 0x00000000, 0xbec16c07, 0x1e93fd97, + .word 0x403507b8, 0x00000000, 0x3ecb019d, 0xdb6a796a, + .word 0x403663f7, 0x00000000, 0xbe94dbb3, 0xa60cceb2, + .word 0x4037beef, 0x00000000, 0xbeda51d7, 0x5fb0ef94, + .word 0x403918a1, 0x00000000, 0x3edb918c, 0xd6ab9c8d, + .word 0x403a7112, 0x00000000, 0xbec065bd, 0xb60a5dd4, + .word 0x403bc842, 0x00000000, 0x3ed02b6a, 0xee98ecb1, + .word 0x403d1e35, 0x00000000, 0xbebca47d, 0x25b2f4c7, + .word 0x403e72ec, 0x00000000, 0x3eb17fa5, 0xb21cbdb6, + .word 0x403fc66a, 0x00000000, 0x3eae1601, 0x49209a69, + .word 0x40408c59, 0x00000000, 0xbeecc961, 0x871a7611, + .word 0x404134e2, 0x00000000, 0xbee2ddbe, 0x74803297, + .word 0x4041dcd2, 0x00000000, 0xbeea2ab5, 0x212856eb, + .word 0x40428429, 0x00000000, 0x3ee2c1e9, 0x8fe35da3, + .word 0x40432aea, 0x00000000, 0xbecd8751, 0xe5e0ae0d, + .word 0x4043d114, 0x00000000, 0x3eeb66a2, 0x98fc02ce, + .word 0x404476aa, 0x00000000, 0xbea9f022, 0xcb3b1c5b, + .word 0x40451bac, 0x00000000, 0xbeebe168, 0xdd6dd3fe, + .word 0x4045c01a, 0x00000000, 0x3edcfdeb, 0x43cfd006, + .word 0x404663f7, 0x00000000, 0xbea4dbb3, 0xa60cceb2, + .word 0x40470743, 0x00000000, 0xbed5887e, 0xc06b1ff2, + .word 0x4047a9ff, 0x00000000, 0xbedc17d1, 0x108740d9, + .word 0x40484c2c, 0x00000000, 0xbed7e87e, 0x268116ee, + .word 0x4048edcb, 0x00000000, 0xbec7cad4, 0x944a32be, + .word 0x40498edd, 0x00000000, 0x3eadf9c3, 0x7c0beb3a, + .word 0x404a2f63, 0x00000000, 0x3ed1905c, 0x35651c43, + .word 0x404acf5e, 0x00000000, 0x3ed6da76, 0x49f7f08f, + .word 0x404b6ecf, 0x00000000, 0x3ec75f95, 0xe96bed8d, + .word 0x404c0db7, 0x00000000, 0xbed91359, 0x08df8ec9, + .word 0x404cac16, 0x00000000, 0x3ede3b86, 0xe44b6265, + .word 0x404d49ee, 0x00000000, 0x3ee30c96, 0x5bf23d2d, + .word 0x404de740, 0x00000000, 0xbecc4eb7, 0xf11e41be, + .word 0x404e840c, 0x00000000, 0xbec8b195, 0xb338360c, + .word 0x404f2053, 0x00000000, 0x3edc9047, 0x93a3ba95, + .word 0x404fbc17, 0x00000000, 0xbee1bf65, 0xfd7715ca, + .word 0x40502bac, 0x00000000, 0xbef76cbe, 0x67113a18, + .word 0x4050790b, 0x00000000, 0xbee227e7, 0xfb487e73, + .word 0x4050c629, 0x00000000, 0x3efd550a, 0xa3a93ec8, + .word 0x40511308, 0x00000000, 0xbee2967a, 0x451a7b48, + .word 0x40515fa6, 0x00000000, 0x3efdaec2, 0x3fd65f8e, + .word 0x4051ac06, 0x00000000, 0xbef35b83, 0xe3eb5ce3, + .word 0x4051f826, 0x00000000, 0xbec24ee3, 0xd9a82f2e, + .word 0x40524408, 0x00000000, 0xbef53c7e, 0x319f6e92, + .word 0x40528fab, 0x00000000, 0x3eead993, 0x41b181d1, + .word 0x4052db11, 0x00000000, 0xbead932a, 0x8487642e, + .word 0x40532639, 0x00000000, 0x3ef8daca, 0x0d66b8f9, + .word 0x40537125, 0x00000000, 0xbee8ad99, 0x09933766, + .word 0x4053bbd4, 0x00000000, 0xbef7d788, 0xc15a9f3d, + .word 0x40540646, 0x00000000, 0x3eed8d82, 0x24bad97a, + .word 0x4054507d, 0x00000000, 0xbe922b03, 0xc6b2a5f6, + .word 0x40549a78, 0x00000000, 0x3ef2f346, 0xe2bf924b, + .word 0x4054e439, 0x00000000, 0xbeffc5c1, 0x258110a4, + .word 0x40552dbe, 0x00000000, 0xbead9b4a, 0x641184f9, + .word 0x40557709, 0x00000000, 0x3edb3378, 0xcab10782, + .word 0x4055c01a, 0x00000000, 0x3eecfdeb, 0x43cfd006, + .word 0x405608f2, 0x00000000, 0xbef2f5ad, 0xd49a43fc, + .word 0x40565190, 0x00000000, 0xbedb9884, 0x591add87, + .word 0x405699f5, 0x00000000, 0x3ee2466a, 0x5c3462a4, + .word 0x4056e222, 0x00000000, 0xbee93179, 0x90d43957, + .word 0x40572a16, 0x00000000, 0x3eebe5e0, 0xc14a1a6d, + .word 0x405771d3, 0x00000000, 0xbef16041, 0x3106e405, + .word 0x4057b958, 0x00000000, 0xbef4eb95, 0x4eea2724, + .word 0x405800a5, 0x00000000, 0x3ef8c587, 0x150cabae, + .word 0x405847bc, 0x00000000, 0x3ee9ec30, 0xc6e3e04a, + .word 0x40588e9c, 0x00000000, 0x3efcb82c, 0x89692d99, + .word 0x4058d546, 0x00000000, 0x3efced70, 0xdc6acf42, + .word 0x40591bbb, 0x00000000, 0xbefdb83a, 0x3dd2d353, + .word 0x405961f9, 0x00000000, 0x3eb49d02, 0x6e33d676, + .word 0x4059a802, 0x00000000, 0x3eec8f11, 0x979a5db7, + .word 0x4059edd6, 0x00000000, 0x3efd66c9, 0x77e236c7, + .word 0x405a3376, 0x00000000, 0x3ec4fec0, 0xa13af882, + .word 0x405a78e1, 0x00000000, 0x3ef1bdef, 0xbd14a081, + .word 0x405abe18, 0x00000000, 0x3efe5fc7, 0xd238691d, + .word 0x405b031c, 0x00000000, 0xbed01f9b, 0xcb999fe9, + .word 0x405b47ec, 0x00000000, 0xbec18efa, 0xbeb7d722, + .word 0x405b8c89, 0x00000000, 0xbee203bc, 0xc3346511, + .word 0x405bd0f3, 0x00000000, 0xbed6186f, 0xcf54bbd3, + .word 0x405c152a, 0x00000000, 0x3efb0932, 0xb9700973, + .word 0x405c5930, 0x00000000, 0xbef4b5a9, 0x2a606047, + .word 0x405c9d03, 0x00000000, 0xbec26b70, 0x98590071, + .word 0x405ce0a5, 0x00000000, 0xbefb7169, 0xe0cda8bd, + .word 0x405d2415, 0x00000000, 0xbeebfa06, 0xc156f521, + .word 0x405d6754, 0x00000000, 0xbedfcd15, 0xf101c142, + .word 0x405daa62, 0x00000000, 0x3ee10327, 0xdc8093a5, + .word 0x405ded40, 0x00000000, 0xbee5dee4, 0xd9d8a273, + .word 0x405e2fed, 0x00000000, 0x3eee84b9, 0x4c06f913, + .word 0x405e726b, 0x00000000, 0xbef7862a, 0xcb7ceb98, + .word 0x405eb4b8, 0x00000000, 0x3ef1f456, 0xf394f972, + .word 0x405ef6d6, 0x00000000, 0x3efcca38, 0x881f4780, + .word 0x405f38c5, 0x00000000, 0x3ef9ef31, 0x50343f8e, + .word 0x405f7a85, 0x00000000, 0x3efa32c1, 0xb3b3864c, + .word 0x405fbc17, 0x00000000, 0xbef1bf65, 0xfd7715ca, + .word 0x405ffd7a, 0x00000000, 0xbef95f00, 0x19518ce0, + .word 0x40601f57, 0x00000000, 0x3ef3b932, 0x6ff91960, + .word 0x40603fdb, 0x00000000, 0xbf0d1a19, 0xa0331af3, + .word 0x40606047, 0x00000000, 0x3ee9f24e, 0xb23e991f, + .word 0x4060809d, 0x00000000, 0xbedb011f, 0x855b4988, + .word 0x4060a0dc, 0x00000000, 0x3efa7c70, 0xfde006c7, + .word 0x4060c105, 0x00000000, 0x3e9ac754, 0xcb104aea, + .word 0x4060e117, 0x00000000, 0x3f0d535f, 0x0444ebab, + .word 0x40610114, 0x00000000, 0xbf03ab0d, 0xc56138c9, + .word 0x406120fa, 0x00000000, 0xbef630f3, 0xfc695a97, + .word 0x406140ca, 0x00000000, 0xbec5786a, 0xf187a96b, + .word 0x40616084, 0x00000000, 0x3f012578, 0x0181e2b3, + .word 0x40618029, 0x00000000, 0xbef846b4, 0x4ad8a38b, + .word 0x40619fb8, 0x00000000, 0xbf01c336, 0xf7a3a78f, + .word 0x4061bf31, 0x00000000, 0x3eee95d0, 0x0de3b514, + .word 0x4061de95, 0x00000000, 0x3eed9cbb, 0xa6187a4d, + .word 0x4061fde4, 0x00000000, 0xbef678bf, 0x6cdedf51, + .word 0x40621d1d, 0x00000000, 0x3f06edb5, 0x668c543d, + .word 0x40623c42, 0x00000000, 0xbef5ec6c, 0x1bfbf89a, + .word 0x40625b51, 0x00000000, 0x3f062dcf, 0x4115a1a3, + .word 0x40627a4c, 0x00000000, 0x3ec6172f, 0xe015e13c, + .word 0x40629932, 0x00000000, 0xbed30dd5, 0x3f5c184c, + .word 0x4062b803, 0x00000000, 0x3f01cfde, 0xb43cfd00, + .word 0x4062d6c0, 0x00000000, 0x3ee35013, 0x8064a94e, + .word 0x4062f568, 0x00000000, 0x3f0d7acf, 0xc98509e3, + .word 0x406313fd, 0x00000000, 0xbf0d7932, 0x43718371, + .word 0x4063327c, 0x00000000, 0x3f0aad27, 0x29b21ae5, + .word 0x406350e8, 0x00000000, 0x3ef92b83, 0xec743665, + .word 0x40636f40, 0x00000000, 0xbec249ba, 0x76fee235, + .word 0x40638d84, 0x00000000, 0xbeefd0a2, 0xf6d7e41e, + .word 0x4063abb4, 0x00000000, 0xbec57f7a, 0x64ccd537, + .word 0x4063c9d0, 0x00000000, 0x3f09242b, 0x8488b305, + .word 0x4063e7d9, 0x00000000, 0x3efbcfb8, 0x0b357154, + .word 0x406405cf, 0x00000000, 0xbf0cb1c2, 0xd10504b4, + .word 0x406423b0, 0x00000000, 0x3f0fa61a, 0xaa59c1d8, + .word 0x4064417f, 0x00000000, 0x3ef26410, 0xb256d8d7, + .word 0x40645f3b, 0x00000000, 0xbf09d77e, 0x31d6ca00, + .word 0x40647ce3, 0x00000000, 0xbeda5fb4, 0xf23978de, + .word 0x40649a78, 0x00000000, 0x3f02f346, 0xe2bf924b, + .word 0x4064b7fb, 0x00000000, 0xbf0106da, 0x1aa0e9e7, + .word 0x4064d56a, 0x00000000, 0x3f06ccf3, 0xb1129b7c, + .word 0x4064f2c7, 0x00000000, 0x3f006a7c, 0xcf9dd420, + .word 0x40651012, 0x00000000, 0xbf0e3dd5, 0xc1c885ae, + .word 0x40652d49, 0x00000000, 0x3f00b91e, 0x4253bd27, + .word 0x40654a6f, 0x00000000, 0xbf0cd6af, 0x1c9393cd, + .word 0x40656781, 0x00000000, 0x3f0ee1ac, 0x0b1ec5ea, + .word 0x40658482, 0x00000000, 0x3ef34c4e, 0x99e1c6c6, + .word 0x4065a171, 0x00000000, 0xbf06d01c, 0xa8f50e5f, + .word 0x4065be4d, 0x00000000, 0x3ed96a28, 0x6955d67e, + .word 0x4065db17, 0x00000000, 0x3f0d4210, 0x4f127092, + .word 0x4065f7d0, 0x00000000, 0xbed7c3ec, 0xa28e69ca, + .word 0x40661477, 0x00000000, 0xbf07f393, 0xbdd98c47, + .word 0x4066310c, 0x00000000, 0xbf0c2ab3, 0xedefe569, + .word 0x40664d8f, 0x00000000, 0xbef44732, 0x0833c207, + .word 0x40666a01, 0x00000000, 0xbf0c6e1d, 0xcd0cb449, + .word 0x40668661, 0x00000000, 0xbefb4848, 0x3c643a24, + .word 0x4066a2b0, 0x00000000, 0xbf08697c, 0x3d7dfd9b, + .word 0x4066beed, 0x00000000, 0x3ef12866, 0xd705c554, + .word 0x4066db19, 0x00000000, 0x3f0a9d86, 0x52765f7c, + .word 0x4066f735, 0x00000000, 0xbf0d0e8e, 0x7a165e04, + .word 0x4067133f, 0x00000000, 0xbf093aa4, 0xe106ba60, + .word 0x40672f38, 0x00000000, 0xbf04bace, 0x940d18ba, + .word 0x40674b20, 0x00000000, 0xbef4d8fc, 0x561c8d44, + .word 0x406766f7, 0x00000000, 0x3ef5931e, 0xf6e6f15b, + .word 0x406782be, 0x00000000, 0xbf000896, 0x6a210de0, + .word 0x40679e74, 0x00000000, 0xbf05dbfe, 0x780eccdb, + .word 0x4067ba19, 0x00000000, 0xbecb2bf4, 0x6fd85522, + .word 0x4067d5ae, 0x00000000, 0xbefd2fc3, 0xaddfdee2, + .word 0x4067f132, 0x00000000, 0x3ef0c167, 0x8ae89767, + .word 0x40680ca6, 0x00000000, 0x3ef034a6, 0xfc6488d1, + .word 0x4068280a, 0x00000000, 0xbef520c7, 0xc69211fe, + .word 0x4068435d, 0x00000000, 0x3f05328d, 0xdcedf39e, + .word 0x40685ea1, 0x00000000, 0xbf03d361, 0x367bde41, + .word 0x406879d4, 0x00000000, 0xbebc2624, 0x7a0cdfbb, + .word 0x406894f7, 0x00000000, 0x3f02c1bb, 0xe2d01ba9, + .word 0x4068b00b, 0x00000000, 0xbf043a4a, 0xd5c7a4dd, + .word 0x4068cb0e, 0x00000000, 0x3efda59d, 0xded9b445, + .word 0x4068e602, 0x00000000, 0x3eb11eb3, 0x043f5602, + .word 0x406900e6, 0x00000000, 0x3ee60002, 0xccfe43f5, + .word 0x40691bbb, 0x00000000, 0xbf0db83a, 0x3dd2d353, + .word 0x4069367f, 0x00000000, 0x3f0b682a, 0xcba73219, + .word 0x40695135, 0x00000000, 0xbef53d8e, 0x8e4c59c3, + .word 0x40696bdb, 0x00000000, 0xbef6a9a5, 0x050809db, + .word 0x40698671, 0x00000000, 0x3f0db68e, 0x0ba15359, + .word 0x4069a0f9, 0x00000000, 0xbef6278f, 0xd810b546, + .word 0x4069bb71, 0x00000000, 0xbec528c6, 0xcdef4d8d, + .word 0x4069d5da, 0x00000000, 0xbeb57f7a, 0x64ccd537, + .word 0x4069f034, 0x00000000, 0xbee33716, 0xa9ae332f, + .word 0x406a0a7f, 0x00000000, 0xbef2d9f7, 0x698ce769, + .word 0x406a24bb, 0x00000000, 0xbef48c02, 0x44aa8cfc, + .word 0x406a3ee8, 0x00000000, 0xbed8e3cf, 0xc25f0ce6, + .word 0x406a5906, 0x00000000, 0x3f0044c5, 0x590979a0, + .word 0x406a7316, 0x00000000, 0xbef7e86f, 0x9c2154fb, + .word 0x406a8d17, 0x00000000, 0xbf03a076, 0x2ed351cd, + .word 0x406aa709, 0x00000000, 0xbed4ffd6, 0x59064390, + .word 0x406ac0ed, 0x00000000, 0xbf04d9bb, 0x3135f0b1, + .word 0x406adac2, 0x00000000, 0xbee8ee37, 0xcd2ea9d3, + .word 0x406af489, 0x00000000, 0xbf02ba1b, 0x4a95229c, + .word 0x406b0e41, 0x00000000, 0x3ef35e64, 0x35ebd377, + .word 0x406b27eb, 0x00000000, 0x3f02fe3c, 0x2291b5ad, + .word 0x406b4187, 0x00000000, 0x3efa5480, 0x45ecbc5d, + .word 0x406b5b15, 0x00000000, 0xbedee0d3, 0x3432f2c3, + .word 0x406b7495, 0x00000000, 0xbf0c2ab3, 0x496d2d24, + .word 0x406b8e06, 0x00000000, 0x3ef04439, 0x848e9d1e, + .word 0x406ba76a, 0x00000000, 0xbf03186d, 0xa6fc41e0, + .word 0x406bc0bf, 0x00000000, 0x3f05fc8d, 0x8164754e, + .word 0x406bda07, 0x00000000, 0x3eecc67e, 0x6db516de, + .word 0x406bf341, 0x00000000, 0x3ee14464, 0xa6bcdf48, + .word 0x406c0c6d, 0x00000000, 0x3f011f17, 0x74d8b66a, + .word 0x406c258c, 0x00000000, 0xbefd4cdb, 0xebaa4121, + .word 0x406c3e9d, 0x00000000, 0xbf074797, 0xeab3259d, + .word 0x406c57a0, 0x00000000, 0xbee44a49, 0xa82ed669, + .word 0x406c7096, 0x00000000, 0xbf045b87, 0x8e27d0d9, + .word 0x406c897e, 0x00000000, 0xbec7c929, 0xc9e33277, + .word 0x406ca259, 0x00000000, 0xbef1ab66, 0x74e5008e, + .word 0x406cbb26, 0x00000000, 0x3f09333f, 0x3d6bb35f, + .word 0x406cd3e7, 0x00000000, 0xbf07cd5d, 0xbe4f6f23, + .word 0x406cec9a, 0x00000000, 0xbf0848eb, 0x7f40a752, + .word 0x406d053f, 0x00000000, 0x3f0b4982, 0x259cc626, + .word 0x406d1dd8, 0x00000000, 0x3ee9b4c3, 0xf0c92723, + .word 0x406d3664, 0x00000000, 0xbf036033, 0x8ab5a1f2, + .word 0x406d4ee2, 0x00000000, 0x3f015971, 0x8aacb6ec, + .word 0x406d6754, 0x00000000, 0xbeefcd15, 0xf101c142, + .word 0x406d7fb9, 0x00000000, 0xbf0bd935, 0x64ee1bf6, + .word 0x406d9810, 0x00000000, 0x3f090f59, 0x8530f102, + .word 0x406db05b, 0x00000000, 0x3f0a28be, 0xd929effb, + .word 0x406dc89a, 0x00000000, 0xbf053002, 0xa4e86631, + .word 0x406de0cb, 0x00000000, 0x3efcb99c, 0x5233429f, + .word 0x406df8f0, 0x00000000, 0x3ef04357, 0x9625f7a4, + .word 0x406e1108, 0x00000000, 0x3f0b6bdd, 0x258a7b23, + .word 0x406e2914, 0x00000000, 0x3ef70700, 0xa00fdd55, + .word 0x406e4113, 0x00000000, 0x3f0bab95, 0x4f46b93f, + .word 0x406e5906, 0x00000000, 0x3efe4411, 0x672b0c89, + .word 0x406e70ed, 0x00000000, 0xbf06e041, 0xe4467502, + .word 0x406e88c7, 0x00000000, 0xbf032765, 0x63557797, + .word 0x406ea094, 0x00000000, 0x3f0d7b8f, 0x0e7b8e75, + .word 0x406eb856, 0x00000000, 0xbeccd5dc, 0x13cad28e, + .word 0x406ed00b, 0x00000000, 0x3f0222fb, 0x08d5c3f2, + .word 0x406ee7b4, 0x00000000, 0x3f0c6cea, 0x541f5b70, + .word 0x406eff52, 0x00000000, 0xbf0fd40b, 0x070e6c33, + .word 0x406f16e3, 0x00000000, 0xbf0f8922, 0x73f1379b, + .word 0x406f2e68, 0x00000000, 0xbf0fa051, 0xeebd4f74, + .word 0x406f45e1, 0x00000000, 0xbf0d0c3e, 0x6aac6ca9, + .word 0x406f5d4e, 0x00000000, 0xbf04c432, 0x5068bc88, + .word 0x406f74af, 0x00000000, 0xbede20a0, 0xa450bc93, + .word 0x406f8c04, 0x00000000, 0x3f08f3a3, 0x1a23946e, + .word 0x406fa34e, 0x00000000, 0x3ee177c2, 0x3362928c, + .word 0x406fba8c, 0x00000000, 0x3ec71513, 0x7cfebaa0, + .word 0x406fd1be, 0x00000000, 0x3f031fca, 0xbe50ac88, + .word 0x406fe8e5, 0x00000000, 0xbedd485c, 0xbfb44c3b, +! + .word 0x01a56e1f, 0xc2f8f359, ! _TINY = 1.0e-300 + .word 0x7e37e43c, 0x8800759c, ! _HUGE = 1.0e+300 + .word 0x3f6d94ae, 0x0bf85de6, ! KA1_LO = (1.41052154268147309568e-05*256) + .word 0x40871540, 0x00000000, ! KA1_HI = (2.8853759765625e+00*256) + .word 0x3cd5d528, 0x93bc7fec, ! KB5 = 1.21195555854068860923e-15 + .word 0x3e2c6b08, 0xd71f5d1e, ! KB3 = 3.30830268126604677436e-09 + .word 0x3ecebfbd, 0xff82c4ed, ! KB2 = 3.66556559691003767877e-06 + .word 0x3f662e42, 0xfefa39ef, ! KB1 = 2.70760617406228636578e-03 +! +! __mt_constexp2[2*i] = high order bits 2^(i/256), i = [0, 255] +! __mt_constexp2[2*i+1] = least bits 2^(i/256), i = [0, 255] + + .word 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, + .word 0x3ff00b1a, 0xfa5abcbf, 0xbc84f6b2, 0xa7609f71, + .word 0x3ff0163d, 0xa9fb3335, 0x3c9b6129, 0x9ab8cdb7, + .word 0x3ff02168, 0x143b0281, 0xbc82bf31, 0x0fc54eb6, + .word 0x3ff02c9a, 0x3e778061, 0xbc719083, 0x535b085d, + .word 0x3ff037d4, 0x2e11bbcc, 0x3c656811, 0xeeade11a, + .word 0x3ff04315, 0xe86e7f85, 0xbc90a31c, 0x1977c96e, + .word 0x3ff04e5f, 0x72f654b1, 0x3c84c379, 0x3aa0d08c, + .word 0x3ff059b0, 0xd3158574, 0x3c8d73e2, 0xa475b465, + .word 0x3ff0650a, 0x0e3c1f89, 0xbc95cb7b, 0x5799c397, + .word 0x3ff0706b, 0x29ddf6de, 0xbc8c91df, 0xe2b13c27, + .word 0x3ff07bd4, 0x2b72a836, 0x3c832334, 0x54458700, + .word 0x3ff08745, 0x18759bc8, 0x3c6186be, 0x4bb284ff, + .word 0x3ff092bd, 0xf66607e0, 0xbc968063, 0x800a3fd1, + .word 0x3ff09e3e, 0xcac6f383, 0x3c914878, 0x18316136, + .word 0x3ff0a9c7, 0x9b1f3919, 0x3c85d16c, 0x873d1d38, + .word 0x3ff0b558, 0x6cf9890f, 0x3c98a62e, 0x4adc610b, + .word 0x3ff0c0f1, 0x45e46c85, 0x3c94f989, 0x06d21cef, + .word 0x3ff0cc92, 0x2b7247f7, 0x3c901edc, 0x16e24f71, + .word 0x3ff0d83b, 0x23395dec, 0xbc9bc14d, 0xe43f316a, + .word 0x3ff0e3ec, 0x32d3d1a2, 0x3c403a17, 0x27c57b52, + .word 0x3ff0efa5, 0x5fdfa9c5, 0xbc949db9, 0xbc54021b, + .word 0x3ff0fb66, 0xaffed31b, 0xbc6b9bed, 0xc44ebd7b, + .word 0x3ff10730, 0x28d7233e, 0x3c8d46eb, 0x1692fdd5, + .word 0x3ff11301, 0xd0125b51, 0xbc96c510, 0x39449b3a, + .word 0x3ff11edb, 0xab5e2ab6, 0xbc9ca454, 0xf703fb72, + .word 0x3ff12abd, 0xc06c31cc, 0xbc51b514, 0xb36ca5c7, + .word 0x3ff136a8, 0x14f204ab, 0xbc67108f, 0xba48dcf0, + .word 0x3ff1429a, 0xaea92de0, 0xbc932fbf, 0x9af1369e, + .word 0x3ff14e95, 0x934f312e, 0xbc8b91e8, 0x39bf44ab, + .word 0x3ff15a98, 0xc8a58e51, 0x3c82406a, 0xb9eeab0a, + .word 0x3ff166a4, 0x5471c3c2, 0x3c58f23b, 0x82ea1a32, + .word 0x3ff172b8, 0x3c7d517b, 0xbc819041, 0xb9d78a76, + .word 0x3ff17ed4, 0x8695bbc0, 0x3c709e3f, 0xe2ac5a64, + .word 0x3ff18af9, 0x388c8dea, 0xbc911023, 0xd1970f6c, + .word 0x3ff19726, 0x58375d2f, 0x3c94aadd, 0x85f17e08, + .word 0x3ff1a35b, 0xeb6fcb75, 0x3c8e5b4c, 0x7b4968e4, + .word 0x3ff1af99, 0xf8138a1c, 0x3c97bf85, 0xa4b69280, + .word 0x3ff1bbe0, 0x84045cd4, 0xbc995386, 0x352ef607, + .word 0x3ff1c82f, 0x95281c6b, 0x3c900977, 0x8010f8c9, + .word 0x3ff1d487, 0x3168b9aa, 0x3c9e016e, 0x00a2643c, + .word 0x3ff1e0e7, 0x5eb44027, 0xbc96fdd8, 0x088cb6de, + .word 0x3ff1ed50, 0x22fcd91d, 0xbc91df98, 0x027bb78c, + .word 0x3ff1f9c1, 0x8438ce4d, 0xbc9bf524, 0xa097af5c, + .word 0x3ff2063b, 0x88628cd6, 0x3c8dc775, 0x814a8495, + .word 0x3ff212be, 0x3578a819, 0x3c93592d, 0x2cfcaac9, + .word 0x3ff21f49, 0x917ddc96, 0x3c82a97e, 0x9494a5ee, + .word 0x3ff22bdd, 0xa27912d1, 0x3c8d34fb, 0x5577d69f, + .word 0x3ff2387a, 0x6e756238, 0x3c99b07e, 0xb6c70573, + .word 0x3ff2451f, 0xfb82140a, 0x3c8acfcc, 0x911ca996, + .word 0x3ff251ce, 0x4fb2a63f, 0x3c8ac155, 0xbef4f4a4, + .word 0x3ff25e85, 0x711ece75, 0x3c93e1a2, 0x4ac31b2c, + .word 0x3ff26b45, 0x65e27cdd, 0x3c82bd33, 0x9940e9d9, + .word 0x3ff2780e, 0x341ddf29, 0x3c9e067c, 0x05f9e76c, + .word 0x3ff284df, 0xe1f56381, 0xbc9a4c3a, 0x8c3f0d7e, + .word 0x3ff291ba, 0x7591bb70, 0xbc82cc72, 0x28401cbd, + .word 0x3ff29e9d, 0xf51fdee1, 0x3c8612e8, 0xafad1255, + .word 0x3ff2ab8a, 0x66d10f13, 0xbc995743, 0x191690a7, + .word 0x3ff2b87f, 0xd0dad990, 0xbc410adc, 0xd6381aa4, + .word 0x3ff2c57e, 0x39771b2f, 0xbc950145, 0xa6eb5124, + .word 0x3ff2d285, 0xa6e4030b, 0x3c900247, 0x54db41d5, + .word 0x3ff2df96, 0x1f641589, 0x3c9d16cf, 0xfbbce198, + .word 0x3ff2ecaf, 0xa93e2f56, 0x3c71ca0f, 0x45d52383, + .word 0x3ff2f9d2, 0x4abd886b, 0xbc653c55, 0x532bda93, + .word 0x3ff306fe, 0x0a31b715, 0x3c86f46a, 0xd23182e4, + .word 0x3ff31432, 0xedeeb2fd, 0x3c8959a3, 0xf3f3fcd1, + .word 0x3ff32170, 0xfc4cd831, 0x3c8a9ce7, 0x8e18047c, + .word 0x3ff32eb8, 0x3ba8ea32, 0xbc9c45e8, 0x3cb4f318, + .word 0x3ff33c08, 0xb26416ff, 0x3c932721, 0x843659a6, + .word 0x3ff34962, 0x66e3fa2d, 0xbc835a75, 0x930881a4, + .word 0x3ff356c5, 0x5f929ff1, 0xbc8b5cee, 0x5c4e4628, + .word 0x3ff36431, 0xa2de883b, 0xbc8c3144, 0xa06cb85e, + .word 0x3ff371a7, 0x373aa9cb, 0xbc963aea, 0xbf42eae2, + .word 0x3ff37f26, 0x231e754a, 0xbc99f5ca, 0x9eceb23c, + .word 0x3ff38cae, 0x6d05d866, 0xbc9e958d, 0x3c9904bd, + .word 0x3ff39a40, 0x1b7140ef, 0xbc99a9a5, 0xfc8e2934, + .word 0x3ff3a7db, 0x34e59ff7, 0xbc75e436, 0xd661f5e3, + .word 0x3ff3b57f, 0xbfec6cf4, 0x3c954c66, 0xe26fff18, + .word 0x3ff3c32d, 0xc313a8e5, 0xbc9efff8, 0x375d29c3, + .word 0x3ff3d0e5, 0x44ede173, 0x3c7fe8d0, 0x8c284c71, + .word 0x3ff3dea6, 0x4c123422, 0x3c8ada09, 0x11f09ebc, + .word 0x3ff3ec70, 0xdf1c5175, 0xbc8af663, 0x7b8c9bca, + .word 0x3ff3fa45, 0x04ac801c, 0xbc97d023, 0xf956f9f3, + .word 0x3ff40822, 0xc367a024, 0x3c8bddf8, 0xb6f4d048, + .word 0x3ff4160a, 0x21f72e2a, 0xbc5ef369, 0x1c309278, + .word 0x3ff423fb, 0x2709468a, 0xbc98462d, 0xc0b314dd, + .word 0x3ff431f5, 0xd950a897, 0xbc81c7dd, 0xe35f7999, + .word 0x3ff43ffa, 0x3f84b9d4, 0x3c8880be, 0x9704c003, + .word 0x3ff44e08, 0x6061892d, 0x3c489b7a, 0x04ef80d0, + .word 0x3ff45c20, 0x42a7d232, 0xbc686419, 0x82fb1f8e, + .word 0x3ff46a41, 0xed1d0057, 0x3c9c944b, 0xd1648a76, + .word 0x3ff4786d, 0x668b3237, 0xbc9c20f0, 0xed445733, + .word 0x3ff486a2, 0xb5c13cd0, 0x3c73c1a3, 0xb69062f0, + .word 0x3ff494e1, 0xe192aed2, 0xbc83b289, 0x5e499ea0, + .word 0x3ff4a32a, 0xf0d7d3de, 0x3c99cb62, 0xf3d1be56, + .word 0x3ff4b17d, 0xea6db7d7, 0xbc8125b8, 0x7f2897f0, + .word 0x3ff4bfda, 0xd5362a27, 0x3c7d4397, 0xafec42e2, + .word 0x3ff4ce41, 0xb817c114, 0x3c905e29, 0x690abd5d, + .word 0x3ff4dcb2, 0x99fddd0d, 0x3c98ecdb, 0xbc6a7833, + .word 0x3ff4eb2d, 0x81d8abff, 0xbc95257d, 0x2e5d7a52, + .word 0x3ff4f9b2, 0x769d2ca7, 0xbc94b309, 0xd25957e3, + .word 0x3ff50841, 0x7f4531ee, 0x3c7a249b, 0x49b7465f, + .word 0x3ff516da, 0xa2cf6642, 0xbc8f7685, 0x69bd93ef, + .word 0x3ff5257d, 0xe83f4eef, 0xbc7c998d, 0x43efef71, + .word 0x3ff5342b, 0x569d4f82, 0xbc807abe, 0x1db13cad, + .word 0x3ff542e2, 0xf4f6ad27, 0x3c87926d, 0x192d5f7e, + .word 0x3ff551a4, 0xca5d920f, 0xbc8d689c, 0xefede59b, + .word 0x3ff56070, 0xdde910d2, 0xbc90fb6e, 0x168eebf0, + .word 0x3ff56f47, 0x36b527da, 0x3c99bb2c, 0x011d93ad, + .word 0x3ff57e27, 0xdbe2c4cf, 0xbc90b98c, 0x8a57b9c4, + .word 0x3ff58d12, 0xd497c7fd, 0x3c8295e1, 0x5b9a1de8, + .word 0x3ff59c08, 0x27ff07cc, 0xbc97e2ce, 0xe467e60f, + .word 0x3ff5ab07, 0xdd485429, 0x3c96324c, 0x054647ad, + .word 0x3ff5ba11, 0xfba87a03, 0xbc9b77a1, 0x4c233e1a, + .word 0x3ff5c926, 0x8a5946b7, 0x3c3c4b1b, 0x816986a2, + .word 0x3ff5d845, 0x90998b93, 0xbc9cd6a7, 0xa8b45643, + .word 0x3ff5e76f, 0x15ad2148, 0x3c9ba6f9, 0x3080e65e, + .word 0x3ff5f6a3, 0x20dceb71, 0xbc89eadd, 0xe3cdcf92, + .word 0x3ff605e1, 0xb976dc09, 0xbc93e242, 0x9b56de47, + .word 0x3ff6152a, 0xe6cdf6f4, 0x3c9e4b3e, 0x4ab84c27, + .word 0x3ff6247e, 0xb03a5585, 0xbc9383c1, 0x7e40b497, + .word 0x3ff633dd, 0x1d1929fd, 0x3c984710, 0xbeb964e5, + .word 0x3ff64346, 0x34ccc320, 0xbc8c483c, 0x759d8933, + .word 0x3ff652b9, 0xfebc8fb7, 0xbc9ae3d5, 0xc9a73e09, + .word 0x3ff66238, 0x82552225, 0xbc9bb609, 0x87591c34, + .word 0x3ff671c1, 0xc70833f6, 0xbc8e8732, 0x586c6134, + .word 0x3ff68155, 0xd44ca973, 0x3c6038ae, 0x44f73e65, + .word 0x3ff690f4, 0xb19e9538, 0x3c8804bd, 0x9aeb445d, + .word 0x3ff6a09e, 0x667f3bcd, 0xbc9bdd34, 0x13b26456, + .word 0x3ff6b052, 0xfa75173e, 0x3c7a38f5, 0x2c9a9d0e, + .word 0x3ff6c012, 0x750bdabf, 0xbc728956, 0x67ff0b0d, + .word 0x3ff6cfdc, 0xddd47645, 0x3c9c7aa9, 0xb6f17309, + .word 0x3ff6dfb2, 0x3c651a2f, 0xbc6bbe3a, 0x683c88ab, + .word 0x3ff6ef92, 0x98593ae5, 0xbc90b974, 0x9e1ac8b2, + .word 0x3ff6ff7d, 0xf9519484, 0xbc883c0f, 0x25860ef6, + .word 0x3ff70f74, 0x66f42e87, 0x3c59d644, 0xd45aa65f, + .word 0x3ff71f75, 0xe8ec5f74, 0xbc816e47, 0x86887a99, + .word 0x3ff72f82, 0x86ead08a, 0xbc920aa0, 0x2cd62c72, + .word 0x3ff73f9a, 0x48a58174, 0xbc90a8d9, 0x6c65d53c, + .word 0x3ff74fbd, 0x35d7cbfd, 0x3c9047fd, 0x618a6e1c, + .word 0x3ff75feb, 0x564267c9, 0xbc902459, 0x57316dd3, + .word 0x3ff77024, 0xb1ab6e09, 0x3c9b7877, 0x169147f8, + .word 0x3ff78069, 0x4fde5d3f, 0x3c9866b8, 0x0a02162d, + .word 0x3ff790b9, 0x38ac1cf6, 0x3c9349a8, 0x62aadd3e, + .word 0x3ff7a114, 0x73eb0187, 0xbc841577, 0xee04992f, + .word 0x3ff7b17b, 0x0976cfdb, 0xbc9bebb5, 0x8468dc88, + .word 0x3ff7c1ed, 0x0130c132, 0x3c9f124c, 0xd1164dd6, + .word 0x3ff7d26a, 0x62ff86f0, 0x3c91bddb, 0xfb72b8b4, + .word 0x3ff7e2f3, 0x36cf4e62, 0x3c705d02, 0xba15797e, + .word 0x3ff7f387, 0x8491c491, 0xbc807f11, 0xcf9311ae, + .word 0x3ff80427, 0x543e1a12, 0xbc927c86, 0x626d972b, + .word 0x3ff814d2, 0xadd106d9, 0x3c946437, 0x0d151d4d, + .word 0x3ff82589, 0x994cce13, 0xbc9d4c1d, 0xd41532d8, + .word 0x3ff8364c, 0x1eb941f7, 0x3c999b9a, 0x31df2bd5, + .word 0x3ff8471a, 0x4623c7ad, 0xbc88d684, 0xa341cdfb, + .word 0x3ff857f4, 0x179f5b21, 0xbc5ba748, 0xf8b216d0, + .word 0x3ff868d9, 0x9b4492ed, 0xbc9fc6f8, 0x9bd4f6ba, + .word 0x3ff879ca, 0xd931a436, 0x3c85d2d7, 0xd2db47bd, + .word 0x3ff88ac7, 0xd98a6699, 0x3c9994c2, 0xf37cb53a, + .word 0x3ff89bd0, 0xa478580f, 0x3c9d5395, 0x4475202a, + .word 0x3ff8ace5, 0x422aa0db, 0x3c96e9f1, 0x56864b27, + .word 0x3ff8be05, 0xbad61778, 0x3c9ecb5e, 0xfc43446e, + .word 0x3ff8cf32, 0x16b5448c, 0xbc70d55e, 0x32e9e3aa, + .word 0x3ff8e06a, 0x5e0866d9, 0xbc97114a, 0x6fc9b2e6, + .word 0x3ff8f1ae, 0x99157736, 0x3c85cc13, 0xa2e3976c, + .word 0x3ff902fe, 0xd0282c8a, 0x3c9592ca, 0x85fe3fd2, + .word 0x3ff9145b, 0x0b91ffc6, 0xbc9dd679, 0x2e582524, + .word 0x3ff925c3, 0x53aa2fe2, 0xbc83455f, 0xa639db7f, + .word 0x3ff93737, 0xb0cdc5e5, 0xbc675fc7, 0x81b57ebc, + .word 0x3ff948b8, 0x2b5f98e5, 0xbc8dc3d6, 0x797d2d99, + .word 0x3ff95a44, 0xcbc8520f, 0xbc764b7c, 0x96a5f039, + .word 0x3ff96bdd, 0x9a7670b3, 0xbc5ba596, 0x7f19c896, + .word 0x3ff97d82, 0x9fde4e50, 0xbc9d185b, 0x7c1b85d1, + .word 0x3ff98f33, 0xe47a22a2, 0x3c7cabda, 0xa24c78ec, + .word 0x3ff9a0f1, 0x70ca07ba, 0xbc9173bd, 0x91cee632, + .word 0x3ff9b2bb, 0x4d53fe0d, 0xbc9dd84e, 0x4df6d518, + .word 0x3ff9c491, 0x82a3f090, 0x3c7c7c46, 0xb071f2be, + .word 0x3ff9d674, 0x194bb8d5, 0xbc9516be, 0xa3dd8233, + .word 0x3ff9e863, 0x19e32323, 0x3c7824ca, 0x78e64c6e, + .word 0x3ff9fa5e, 0x8d07f29e, 0xbc84a9ce, 0xaaf1face, + .word 0x3ffa0c66, 0x7b5de565, 0xbc935949, 0x5d1cd533, + .word 0x3ffa1e7a, 0xed8eb8bb, 0x3c9c6618, 0xee8be70e, + .word 0x3ffa309b, 0xec4a2d33, 0x3c96305c, 0x7ddc36ab, + .word 0x3ffa42c9, 0x80460ad8, 0xbc9aa780, 0x589fb120, + .word 0x3ffa5503, 0xb23e255d, 0xbc9d2f6e, 0xdb8d41e1, + .word 0x3ffa674a, 0x8af46052, 0x3c650f56, 0x30670366, + .word 0x3ffa799e, 0x1330b358, 0x3c9bcb7e, 0xcac563c7, + .word 0x3ffa8bfe, 0x53c12e59, 0xbc94f867, 0xb2ba15a9, + .word 0x3ffa9e6b, 0x5579fdbf, 0x3c90fac9, 0x0ef7fd31, + .word 0x3ffab0e5, 0x21356eba, 0x3c889c31, 0xdae94545, + .word 0x3ffac36b, 0xbfd3f37a, 0xbc8f9234, 0xcae76cd0, + .word 0x3ffad5ff, 0x3a3c2774, 0x3c97ef3b, 0xb6b1b8e5, + .word 0x3ffae89f, 0x995ad3ad, 0x3c97a1cd, 0x345dcc81, + .word 0x3ffafb4c, 0xe622f2ff, 0xbc94b2fc, 0x0f315ecd, + .word 0x3ffb0e07, 0x298db666, 0xbc9bdef5, 0x4c80e425, + .word 0x3ffb20ce, 0x6c9a8952, 0x3c94dd02, 0x4a0756cc, + .word 0x3ffb33a2, 0xb84f15fb, 0xbc62805e, 0x3084d708, + .word 0x3ffb4684, 0x15b749b1, 0xbc7f763d, 0xe9df7c90, + .word 0x3ffb5972, 0x8de5593a, 0xbc9c71df, 0xbbba6de3, + .word 0x3ffb6c6e, 0x29f1c52a, 0x3c92a8f3, 0x52883f6e, + .word 0x3ffb7f76, 0xf2fb5e47, 0xbc75584f, 0x7e54ac3b, + .word 0x3ffb928c, 0xf22749e4, 0xbc9b7216, 0x54cb65c6, + .word 0x3ffba5b0, 0x30a1064a, 0xbc9efcd3, 0x0e54292e, + .word 0x3ffbb8e0, 0xb79a6f1f, 0xbc3f52d1, 0xc9696205, + .word 0x3ffbcc1e, 0x904bc1d2, 0x3c823dd0, 0x7a2d9e84, + .word 0x3ffbdf69, 0xc3f3a207, 0xbc3c2623, 0x60ea5b52, + .word 0x3ffbf2c2, 0x5bd71e09, 0xbc9efdca, 0x3f6b9c73, + .word 0x3ffc0628, 0x6141b33d, 0xbc8d8a5a, 0xa1fbca34, + .word 0x3ffc199b, 0xdd85529c, 0x3c811065, 0x895048dd, + .word 0x3ffc2d1c, 0xd9fa652c, 0xbc96e516, 0x17c8a5d7, + .word 0x3ffc40ab, 0x5fffd07a, 0x3c9b4537, 0xe083c60a, + .word 0x3ffc5447, 0x78fafb22, 0x3c912f07, 0x2493b5af, + .word 0x3ffc67f1, 0x2e57d14b, 0x3c92884d, 0xff483cad, + .word 0x3ffc7ba8, 0x8988c933, 0xbc8e76bb, 0xbe255559, + .word 0x3ffc8f6d, 0x9406e7b5, 0x3c71acbc, 0x48805c44, + .word 0x3ffca340, 0x5751c4db, 0xbc87f2be, 0xd10d08f5, + .word 0x3ffcb720, 0xdcef9069, 0x3c7503cb, 0xd1e949db, + .word 0x3ffccb0f, 0x2e6d1675, 0xbc7d220f, 0x86009092, + .word 0x3ffcdf0b, 0x555dc3fa, 0xbc8dd83b, 0x53829d72, + .word 0x3ffcf315, 0x5b5bab74, 0xbc9a08e9, 0xb86dff57, + .word 0x3ffd072d, 0x4a07897c, 0xbc9cbc37, 0x43797a9c, + .word 0x3ffd1b53, 0x2b08c968, 0x3c955636, 0x219a36ee, + .word 0x3ffd2f87, 0x080d89f2, 0xbc9d487b, 0x719d8578, + .word 0x3ffd43c8, 0xeacaa1d6, 0x3c93db53, 0xbf5a1614, + .word 0x3ffd5818, 0xdcfba487, 0x3c82ed02, 0xd75b3707, + .word 0x3ffd6c76, 0xe862e6d3, 0x3c5fe87a, 0x4a8165a0, + .word 0x3ffd80e3, 0x16c98398, 0xbc911ec1, 0x8beddfe8, + .word 0x3ffd955d, 0x71ff6075, 0x3c9a052d, 0xbb9af6be, + .word 0x3ffda9e6, 0x03db3285, 0x3c9c2300, 0x696db532, + .word 0x3ffdbe7c, 0xd63a8315, 0xbc9b76f1, 0x926b8be4, + .word 0x3ffdd321, 0xf301b460, 0x3c92da57, 0x78f018c3, + .word 0x3ffde7d5, 0x641c0658, 0xbc9ca552, 0x8e79ba8f, + .word 0x3ffdfc97, 0x337b9b5f, 0xbc91a5cd, 0x4f184b5c, + .word 0x3ffe1167, 0x6b197d17, 0xbc72b529, 0xbd5c7f44, + .word 0x3ffe2646, 0x14f5a129, 0xbc97b627, 0x817a1496, + .word 0x3ffe3b33, 0x3b16ee12, 0xbc99f4a4, 0x31fdc68b, + .word 0x3ffe502e, 0xe78b3ff6, 0x3c839e89, 0x80a9cc8f, + .word 0x3ffe6539, 0x24676d76, 0xbc863ff8, 0x7522b735, + .word 0x3ffe7a51, 0xfbc74c83, 0x3c92d522, 0xca0c8de2, + .word 0x3ffe8f79, 0x77cdb740, 0xbc910894, 0x80b054b1, + .word 0x3ffea4af, 0xa2a490da, 0xbc9e9c23, 0x179c2893, + .word 0x3ffeb9f4, 0x867cca6e, 0x3c94832f, 0x2293e4f2, + .word 0x3ffecf48, 0x2d8e67f1, 0xbc9c93f3, 0xb411ad8c, + .word 0x3ffee4aa, 0xa2188510, 0x3c91c68d, 0xa487568d, + .word 0x3ffefa1b, 0xee615a27, 0x3c9dc7f4, 0x86a4b6b0, + .word 0x3fff0f9c, 0x1cb6412a, 0xbc932200, 0x65181d45, + .word 0x3fff252b, 0x376bba97, 0x3c93a1a5, 0xbf0d8e43, + .word 0x3fff3ac9, 0x48dd7274, 0xbc795a5a, 0x3ed837de, + .word 0x3fff5076, 0x5b6e4540, 0x3c99d3e1, 0x2dd8a18b, + .word 0x3fff6632, 0x798844f8, 0x3c9fa37b, 0x3539343e, + .word 0x3fff7bfd, 0xad9cbe14, 0xbc9dbb12, 0xd006350a, + .word 0x3fff91d8, 0x02243c89, 0xbc612ea8, 0xa779f689, + .word 0x3fffa7c1, 0x819e90d8, 0x3c874853, 0xf3a5931e, + .word 0x3fffbdba, 0x3692d514, 0xbc796773, 0x15098eb6, + .word 0x3fffd3c2, 0x2b8f71f1, 0x3c62eb74, 0x966579e7, + .word 0x3fffe9d9, 0x6b2a23d9, 0x3c74a603, 0x7442fde3, +! + .word 0x3c900000, 0x00000000, ! 2**(-54) = 5.551115123125782702e-17 + .word 0x3ff00000, 0x00000000, ! DONE = 1.0 + .word 0x43300000, 0x00000000, ! DVAIN52 = 2**52 = 4.503599627370496e15 + .word 0xffffffff, 0x00000000, ! MHI32 = 0xffffffff00000000 + .word 0x4062776d, 0x8ce329bd, ! KA5 = (5.77078604860893737986e-01*256) + .word 0x406ec709, 0xdc39fc99, ! KA3 = (9.61796693925765549423e-01*256) + .word 0x40871547, 0x652b82fe, ! KA1 = (2.885390081777926774e+00*256) + .word 0x41100000, 0x00000000, ! HTHRESH = 262144.0 + .word 0xc110cc00, 0x00000000, ! LTHRESH = -275200.0 + .word 0x3d83b2ab, 0xc07c93d0, ! KB4 = 2.23939573811855104311e-12 + .word 0x000fffff, 0xffffffff, ! MMANT + .word 0x00000800, 0x00000000, ! MROUND + .word 0xfffff000, 0x00000000, ! MHI20 + +! local storage indices +#define tmp0_lo STACK_BIAS-4 +#define tmp0_hi STACK_BIAS-8 +#define tmp1_lo STACK_BIAS-12 +#define tmp1_hi STACK_BIAS-16 +#define tmp2_lo STACK_BIAS-20 +#define tmp2_hi STACK_BIAS-24 +#define tmp3 STACK_BIAS-28 +#define tmp4 STACK_BIAS-32 +#define ind_buf STACK_BIAS-48 +#define tmp_counter STACK_BIAS-56 +#define tmp_px STACK_BIAS-64 +#define tmp_py STACK_BIAS-72 +#define tmp_mant STACK_BIAS-80 +#define tmp5 STACK_BIAS-88 +#define tmp6 STACK_BIAS-96 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 96 + +#define LOGTBL %g5 +#define EXPTBL %g1 +#define EXPTBL_P8 %l4 + +#define MASK_0x7fffffff %o4 +#define MASK_0x000fffff %o3 +#define MASK_0x3ff00000 %o1 + +#define counter %i0 +#define px %i1 +#define stridex %l5 +#define py %i3 +#define stridey %l6 +#define pz %i5 +#define stridez %l7 + +#define HTHRESH %f0 +#define LTHRESH %f2 + +#define MHI32 %f38 +#define KA1_LO %f40 +#define KA1_HI %f40 + +#define KB1 %f42 +#define KB2 %f42 +#define KB3 %f42 +#define KB4 %f44 +#define KB5 %f42 + +#define KA1 %f46 +#define KA3 %f28 +#define KA5 %f50 + +#define DZERO %f24 +#define DZERO_HI %f24 +#define DZERO_LO %f25 +#define DONE %f18 +#define DONE_HI %f18 +#define DONE_LO %f19 + +#define XKB1 %f42 +#define XKB2 %f40 +#define XKB3 %f32 +#define XKB4 %f36 +#define XKB5 %f34 + +#define s_h %f46 +#define yr %f30 + +#define ind_TINY 64 +#define ind_HUGE 56 +#define ind_LO 48 +#define ind_HI 40 +#define ind_KB5 32 +#define ind_KB3 24 +#define ind_KB2 16 +#define ind_KB1 8 + +!-------------------------------------------------------------------- +! !!!!! vpow algorithm !!!!! +! +! hx = ((unsigned*)px)[0]; +! lx = ((unsigned*)px)[1]; +! hy = ((unsigned*)py)[0]; +! ly = ((unsigned*)py)[1]; +! sx = hx >> 31; +! sy = hy >> 31; +! hx &= 0x7fffffff; +! hy &= 0x7fffffff; +! y0 = *px; +! +! if (hy < 0x3bf00000) { /* |Y| < 2^(-64) */ +! if ((hy | ly) == 0) { /* pow(X,0) */ +! *pz = DONE; +! goto next; +! } +! if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0)) { /* |X| = Nan */ +! *pz = y0 * y0; +! goto next; +! } +! else if ((hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0)) { /* X = 0 or Inf */ +! ((int*)pz)[0] = hx; +! ((int*)pz)[1] = lx; +! if (sy) *pz = DONE / *pz; +! goto next; +! } +! else *pz = (sx) ? DZERO / DZERO : DONE; +! goto next; +! } +! yisint = 0; /* Y - non-integer */ +! expy = hy >> 20; /* Y exponent */ +! +! if (hx >= 0x7ff00000 || expy >= 0x43e) { /* X=Inf,Nan or |Y|>2^63,Inf,Nan */ +! if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) || +! hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0)) +! *pz = y0 * *py; /* |X| or |Y| = Nan */ +! goto next; +! if (hy == 0x7ff00000 && (ly == 0)) { /* |Y| = Inf */ +! if (hx == 0x3ff00000 && (lx == 0)) +! *pz = *py - *py; /* +-1 ** +-Inf */ +! else if ((hx < 0x3ff00000) != sy) +! *pz = DZERO; +! else { +! ((int*)pz)[0] = hy; +! ((int*)pz)[1] = ly; +! } +! goto next; +! } +! if (expy < 0x43e) { /* |Y| < 2^63 */ +! if (sx) { /* X = -Inf */ +! if (expy >= 0x434) /* |Y| >= 2^53 */ +! yisint##I = 2; /* Y - even */ +! else { +! if (expy >= 0x3ff) { /* |Y| >= 1 */ +! if (expy > (20 + 0x3ff)) { +! i0 = ly >> (52 - (expy - 0x3ff)); +! if ((i0 << (52 - (expy - 0x3ff))) == ly) yisint = 2 - (i0 & 1); +! } +! else if (ly == 0) { +! i0 = hy >> (20 - (expy - 0x3ff)); +! if ((i0 << (20 - (expy - 0x3ff))) == hy) yisint = 2 - (i0 & 1); +! } +! } +! } +! } +! if (sy) hx = lx = 0; +! hx += yisint << 31; +! ((int*)pz)[0] = hx; +! ((int*)pz)[1] = lx; +! goto next; +! } +! else { /* |Y| >= 2^63 */ +! if (lx == 0 && /* |X| = 0, 1, Inf */ +! (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000)) { +! ((int*)pz)[0] = hx; +! ((int*)pz)[1] = lx; +! if (sy) *pz = DONE / *pz; +! } +! else { +! y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE; +! *pz = y0 * y0; +! } +! goto next; +! } +! } +! if (sx || (hx | lx) == 0) { /* X <= 0 */ +! if (expy >= 0x434) /* |Y| >= 2^53 */ +! yisint = 2; /* Y - even */ +! else { +! if (expy >= 0x3ff) { /* |Y| >= 1 */ +! if (expy > (20 + 0x3ff)) { +! i0 = ly >> (52 - (expy - 0x3ff)); +! if ((i0 << (52 - (expy - 0x3ff))) == ly) yisint = 2 - (i0 & 1); +! } +! else if (ly == 0) { +! i0 = hy >> (20 - (expy - 0x3ff)); +! if ((i0 << (20 - (expy - 0x3ff))) == hy) yisint = 2 - (i0 & 1); +! } +! } +! } +! if ((hx | lx) == 0) { /* X == 0 */ +! y0 = DZERO; +! if (sy) y0 = DONE / y0; +! if (sx & yisint) y0 = -y0; +! *pz = y0; +! goto next; +! } +! if (yisint == 0) { /* pow(neg,non-integer) */ +! *pz = DZERO / DZERO; /* NaN */ +! goto next; +! } +! } +! +! *((int*)&x + 1) = ((unsigned*)px)[1]; +! *((int*)&ax + 1) = 0; +! exp = hx; +! hx &= 0xfffff; +! hx |= 0x3ff00000; +! *(int*)&x = hx; +! hx += 0x800; +! hx &= 0xfffff000; +! *(int*)&ax = hx; +! if (exp <= 0xfffff) { +! y0 = vis_fand(x, MMANT); +! ax = (double) ((long long *) & y0)[0]; +! x = vis_fand(ax, MMANT); +! x = vis_for(x, DONE); +! exp = ((unsigned int*) & ax)[0]; +! exp -= (1023 + 51) << 20; +! hx = exp & 0xfffff; +! hx |= 0x3ff00000; +! hx += 0x800; +! *(int*)&ax = hx; +! } +! exp = (exp >> 20); +! exp = exp - 2046; +! ux = x + ax; +! yd = DONE / ux; +! u = x - ax; +! s = u * yd; +! ux = vis_fand(ux, MHI32); +! y = s * s; +! s_h = vis_fand(s, MHI32); +! dtmp8 = KA5 * y; +! dtmp8 = dtmp8 + KA3; +! dtmp8 = dtmp8 * y; +! s = dtmp8 * s; +! dtmp0 = (ux - ax); +! s_l = (x - dtmp0); +! dtmp0 = s_h * ux; +! dtmp1 = s_h * s_l; +! s_l = u - dtmp0; +! s_l -= dtmp1; +! dtmp0 = KA1 * yd; +! s_l = dtmp0 * s_l; +! i = (hx >> 8); +! i = i & 0xff0; +! itmp0 = (hx >> 20); +! exp += itmp0; +! yd = KA1_HI * s_h; +! y = *(double *)((char*)__mt_constlog2 + i); +! itmp0 = exp << 8; +! y += (double)itmp0; +! m_h = y + yd; +! dtmp2 = m_h - y; +! dtmp2 -= yd; +! dtmp2 -= s_l; +! y = s - dtmp2; +! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); +! dtmp1 = KA1_LO * s_h; +! dtmp0 += dtmp1; +! y += dtmp0; +! dtmp0 = y + m_h; +! s_h = vis_fand(dtmp0, MHI32); +! dtmp0 = (s_h - m_h); +! y = y - dtmp0; +! yd = *py; +! s = vis_fand(yd, MHI32); +! dtmp0 = (yd - s); +! dtmp1 = yd * y; +! dtmp0 *= s_h; +! yd = dtmp0 + dtmp1; +! s = s_h * s; +! if (s > HTHRESH) {s = HTHRESH; yd = DZERO;} +! if (s < LTHRESH) {s = LTHRESH; yd = DZERO;} +! dtmp0 = (s + yd); +! ind = (int)dtmp0; +! i = ind & 0xff; +! i = i << 4; +! u = (double)(int)dtmp0; +! ind >>= 8; +! y = s - u; +! y = y + yd; +! u = *(double*)((char*)__mt_constexp2 + i); +! dtmp0 = KB5 * y; +! dtmp1 = dtmp0 + KB4; +! dtmp2 = dtmp1 * y; +! dtmp3 = dtmp2 + KB3; +! dtmp4 = dtmp3 * y; +! dtmp5 = dtmp4 + KB2; +! dtmp6 = dtmp5 * y; +! dtmp7 = dtmp6 + KB1; +! y = dtmp7 * y; +! eflag = (ind + 1021); +! eflag = eflag >> 31; +! gflag = (1022 - ind); +! gflag = gflag >> 31; +! dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); +! dtmp1 = u * y; +! dtmp2 = dtmp0 + dtmp1; +! u = dtmp2 + u; +! ind = yisint + ind; +! itmp0 = 54 & eflag; +! itmp1 = 52 & gflag; +! ind = ind + itmp0; +! ind = ind - itmp1; +! ind <<= 20; +! *(int*)&dtmp0 = ind; +! *((int*)&dtmp0 + 1) = 0; +! u = vis_fpadd32(u, dtmp0); +! ind = eflag - gflag; +! ind += 1; +! ind *= 8; +! dtmp1 = (*(double*)((char*)lconst + ind); +! dtmp1 = u * dtmp1; +! *pz = dtmp1; +!-------------------------------------------------------------------- +! !!!!! vpowx algorithm !!!!! (x > 0 and x != Inf, NaN) +! +! /* perform s_h + yr = 256*log2(x) */ +! +! exp = ((unsigned*)px)[0]; +! y0 = px[0]; +! if (exp <= 0xfffff) { +! y0 = (double) ((long long *) & y0)[0]; +! exp = ((unsigned int*) & y0)[0]; +! exp -= (1023 + 51) << 20; +! } +! x = vis_fand(y0, MMANT); +! x = vis_for(x, DONE); +! ax = vis_fpadd32(x, MROUND); +! ax = vis_fand(ax, MHI20); +! hx = *(int*)&ax; +! exp = (exp >> 20); +! exp = exp - 2046; +! ux = x + ax; +! yd = DONE / ux; +! u = x - ax; +! s = u * yd; +! ux = vis_fand(ux, MHI32); +! y = s * s; +! s_h = vis_fand(s, MHI32); +! dtmp8 = KA5 * y; +! dtmp8 = dtmp8 + KA3; +! dtmp8 = dtmp8 * y; +! s = dtmp8 * s; +! dtmp0 = (ux - ax); +! s_l = (x - dtmp0); +! dtmp0 = s_h * ux; +! dtmp1 = s_h * s_l; +! s_l = u - dtmp0; +! s_l -= dtmp1; +! dtmp0 = KA1 * yd; +! s_l = dtmp0 * s_l; +! i = (hx >> 8); +! i = i & 0xff0; +! itmp0 = (hx >> 20); +! exp += itmp0; +! yd = KA1_HI * s_h; +! y = *(double *)((char*)__mt_constlog2 + i); +! itmp0 = exp << 8; +! y += (double)itmp0; +! m_h = y + yd; +! dtmp2 = m_h - y; +! dtmp2 -= yd; +! dtmp2 -= s_l; +! y = s - dtmp2; +! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); +! dtmp1 = KA1_LO * s_h; +! dtmp0 += dtmp1; +! y += dtmp0; +! dtmp0 = y + m_h; +! s_h = vis_fand(dtmp0, MHI32); +! dtmp0 = (s_h - m_h); +! yr = y - dtmp0; +! +! hy = ((unsigned*)py)[0]; +! ly = ((unsigned*)py)[1]; +! hx = ((unsigned*)px)[0]; +! lx = ((unsigned*)px)[1]; +! sy = hy >> 31; +! hy &= 0x7fffffff; +! +! if (hy < 0x3bf00000) {/* |Y| < 2^(-64) */ +! *pz = DONE; +! goto next; +! } +! +! if (hy >= 0x43e00000) { /* |Y|>2^63,Inf,Nan */ +! if (hy == 0x7ff00000 && (ly == 0)) { /* |Y| = Inf */ +! if (hx == 0x3ff00000 && (lx == 0)) +! *pz = *py - *py; /* 1 ** +-Inf */ +! else if ((hx < 0x3ff00000) != sy) +! *pz = DZERO; +! else { +! ((int*)pz)[0] = hy; +! ((int*)pz)[1] = ly; +! } +! goto next; +! } +! if (hy >= 0x7ff00000) { +! *pz = *px + *py; /* |Y| = Nan */ +! goto next; +! } +! /* |Y| >= 2^63 */ +! if (lx == 0 && (hx == 0x3ff00000)) { /* X = 1 */ +! *pz = DONE; +! } +! else { +! y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE; +! *pz = y0 * y0; +! } +! goto next; +! } +! +! yd = *py; +! s = vis_fand(yd, MHI32); +! dtmp0 = (yd - s); +! dtmp1 = yd * yr; +! dtmp0 *= s_h; +! yd = dtmp0 + dtmp1; +! s = s_h * s; +! if (s > HTHRESH) {s = HTHRESH; yd = DZERO;} +! if (s < LTHRESH) {s = LTHRESH; yd = DZERO;} +! dtmp0 = (s + yd); +! ind = (int)dtmp0; +! i = ind & 0xff; +! i = i << 4; +! u = (double)(int)dtmp0; +! ind >>= 8; +! y = s - u; +! y = y + yd; +! u = *(double*)((char*)__mt_constexp2 + i); +! dtmp0 = XKB5 * y; +! dtmp1 = dtmp0 + XKB4; +! dtmp2 = dtmp1 * y; +! dtmp3 = dtmp2 + XKB3; +! dtmp4 = dtmp3 * y; +! dtmp5 = dtmp4 + XKB2; +! dtmp6 = dtmp5 * y; +! dtmp7 = dtmp6 + XKB1; +! y = dtmp7 * y; +! eflag = (ind + 1021); +! eflag = eflag >> 31; +! gflag = (1022 - ind); +! gflag = gflag >> 31; +! dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); +! dtmp1 = u * y; +! dtmp2 = dtmp0 + dtmp1; +! u = dtmp2 + u; +! itmp0 = 54 & eflag; +! itmp1 = 52 & gflag; +! ind = ind + itmp0; +! ind = ind - itmp1; +! ind <<= 20; +! *(int*)&dtmp0 = ind; +! *((int*)&dtmp0 + 1) = 0; +! u = vis_fpadd32(u, dtmp0); +! ind = eflag - gflag; +! ind += 1; +! ind *= 8; +! dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); +! dtmp1 = u * dtmp1; +! *pz = dtmp1; +!-------------------------------------------------------------------- + + ENTRY(__vpow) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,g5) + wr %g0,0x82,%asi ! set %asi for non-faulting loads + + cmp counter,0 + ble,pn %icc,.end + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + ld [px],%o0 + add LOGTBL,4095,EXPTBL + st counter,[%fp+tmp_counter] + add EXPTBL,65,EXPTBL + sra %i2,0,stridex + stx px,[%fp+tmp_px] + add EXPTBL,4095,%l0 + fzero DZERO + stx py,[%fp+tmp_py] + + cmp stridex,0 + bne,pt %icc,.common_case + add %l0,1,%l0 + + cmp %o0,0 + ble,pt %icc,.common_case + sethi %hi(0x7f800000),%o1 + + cmp %o0,%o1 + bl,pn %icc,.stridex_zero + nop + +.common_case: + sra stridez,0,stridez + ldd [%l0+8],DONE + ldd [%l0+24],MHI32 + sra %i4,0,stridey + ldd [%l0+32],KA5 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + ldd [%l0+40],KA3 + sethi %hi(0xffc00),MASK_0x000fffff + ldd [%l0+48],KA1 + sethi %hi(0x3ff00000),MASK_0x3ff00000 + ldd [%l0+56],HTHRESH + sllx stridex,3,stridex + add MASK_0x7fffffff,0x3ff,MASK_0x7fffffff + ldd [%l0+64],LTHRESH + sllx stridey,3,stridey + add MASK_0x000fffff,0x3ff,MASK_0x000fffff + ldd [%l0+72],KB4 + sllx stridez,3,stridez + st %g0,[%fp+tmp1_lo] ! *((int*)&ax + 1) = 0; + sub %g0,1,%o2 + st %g0,[%fp+tmp2_lo] ! (Y0_0) *((int*)&dtmp0 + 1) = 0; + st MASK_0x000fffff,[%fp+tmp_mant] + sub pz,stridez,pz + st %o2,[%fp+tmp_mant+4] + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],px + ldx [%fp+tmp_py],py + st %g0,[%fp+tmp_counter] +.begin1: + subcc counter,1,counter + bneg,pn %icc,.end + or %g0,ind_buf,%o7 + + lda [py]%asi,%o2 ! (Y0_1) hy = *py; + + and %o2,MASK_0x7fffffff,%l1 ! (Y0_3) hy &= 0x7fffffff; + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + + sra %l1,20,%o0 ! (Y0_3) expy = hy >> 20; + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + + and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff; + + or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000; + + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + + add pz,stridez,pz + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + + sra %l3,20,%l2 ! (Y0_3) exp = (exp >> 20); + + cmp %o0,959 ! (Y0_3) if (expy < 0x3fb); + bl,pn %icc,.spec0 ! (Y0_3) if (expy < 0x3fb); + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + + cmp %o0,1086 ! (Y0_3) if (expy >= 0x43e); + bge,pn %icc,.spec1 ! (Y0_3) if (expy >= 0x43e); + nop + + cmp %l2,2047 ! (Y0_2) if (exp >= 0x7ff) + bge,pn %icc,.spec1 ! (Y0_2) if (exp >= 0x7ff) + nop + + cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff) + + ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx; + ble,pn %icc,.update0 ! (Y0_2) if (hx <= 0xfffff) + nop +.cont0: + sub %o7,ind_buf,%o7 ! stack buffer pointer update + sub pz,stridez,pz + ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx; + + add %o7,4,%o7 ! stack buffer pointer update + faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax; + + and %o7,15,%o7 ! stack buffer pointer update + + add %o7,ind_buf,%o7 ! stack buffer pointer update + add px,stridex,px ! px += stridex; + + lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0]; + + lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff; + + st %g0,[%fp+%o7] ! (Y1_2) yisint = 0; + or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000; + + st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx; + add %i4,2048,%i4 ! (Y1_2) hx += 0x800; + + st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000; + + st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx; + and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff; + cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff) + + ble,pn %icc,.update1 ! (Y1_2) if (hx <= 0xfffff) + nop +.cont1: + sub %o7,ind_buf,%o7 ! stack buffer pointer update + + add %o7,4,%o7 ! stack buffer pointer update + fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux; + + and %o7,15,%o7 ! stack buffer pointer update + + sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20); + add %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx; + + ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx; + sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20); + sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046; + + add %o5,%l0,%o5 ! (Y0_2) exp += itmp0; + + sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8; + st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0; + faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax; + + fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32); + add px,stridex,px ! px += stridex; + + ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI; + fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax); + + ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0; + fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax; + + sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8); + + and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0; + + ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0); + + fitod %f16,%f54 ! (Y0_2) (double)itmp0; + add %l4,8,%o0 ! (Y0_2) i += 8; + + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32); + + faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0; + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux; + + and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff; + fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s; + + or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000; + + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0; + + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y; + + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h; + + fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l; + ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO; + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3; + + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd; + + fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1; + + cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff) + + sra %l2,20,%l2 ! (Y1_1) exp = (exp >> 20); + ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx; + ble,pn %icc,.update2 ! (Y0_2) if (hx <= 0xfffff) + fsubd %f36,%f54,%f30 ! (Y0_1) dtmp2 = m_h - y; +.cont2: + cmp %l2,2047 ! (Y1_1) if (exp >= 0x7ff) + sub %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx; + + sra %i4,20,%l0 ! (Y1_1) itmp0 = (hx >> 20); + sub %l2,2046,%o5 ! (Y1_1) exp = exp - 2046; + fmuld KA1,%f20,%f20 ! (Y0_1) dtmp0 = KA1 * yd; + + add %o5,%l0,%o5 ! (Y1_1) exp += itmp0; + fmuld %f62,%f12,%f62 ! (Y0_1) dtmp8 = dtmp8 * y; + + sll %o5,8,%l0 ! (Y1_1) itmp0 = exp << 8; + add %o7,4,%o7 ! stack buffer pointer update + st %l0,[%fp+tmp3] ! (Y1_1) (double)itmp0; + faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax; + + bge,pn %icc,.update3 ! (Y1_1) if (exp >= 0x7ff) + fsubd %f30,%f48,%f48 ! (Y0_1) dtmp2 -= yd; +.cont3: + and %o7,15,%o7 ! stack buffer pointer update + fmuld %f20,%f10,%f10 ! (Y0_1) s_l = dtmp0 * s_l; + + add %o7,ind_buf,%o7 ! stack buffer pointer update + fmuld KA1_LO,%f4,%f4 ! (Y0_1) dtmp1 = KA1_LO * s_h; + fand %f26,MHI32,%f26 ! (Y1_1) ux = vis_fand(ux, MHI32); + + fmuld %f62,%f52,%f62 ! (Y0_1) s = dtmp8 * s; + ldd [LOGTBL+%o0],%f52 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f20 ! (Y0_1) dtmp2 -= s_l; + + add px,stridex,px ! px += stridex; + fsubd %f26,%f14,%f10 ! (Y1_1) dtmp0 = (ux - ax); + + faddd %f52,%f4,%f52 ! (Y0_1) dtmp0 += dtmp1; + + ldd [EXPTBL-ind_HI],KA1_HI ! (Y1_1) load KA1_HI; + fsubd %f62,%f20,%f4 ! (Y0_1) y = s - dtmp2; + + ld [%fp+tmp3],%f16 ! (Y1_1) (double)itmp0; + fsubd %f8,%f14,%f58 ! (Y1_1) u = x - ax; + + sra %i4,8,%o0 ! (Y1_1) i = (hx >> 8); + + faddd %f4,%f52,%f48 ! (Y0_1) y += dtmp0; + and %o0,4080,%o0 ! (Y1_1) i = i & 0xff0; + + ldd [LOGTBL+%o0],%f62 ! (Y1_1) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f58,%f22,%f52 ! (Y1_1) s = u * yd; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l = (x - dtmp0); + + lda [py]%asi,%f30 ! (Y0_1) yd = *py; + fitod %f16,%f14 ! (Y1_1) (double)itmp0; + + lda [py+4]%asi,%f31 ! (Y0_1) yd = *py; + faddd %f48,%f36,%f8 ! (Y0_1) dtmp0 = y + m_h; + + add %o0,8,%o0 ! (Y1_1) i += 8; + lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0]; + fand %f52,MHI32,%f4 ! (Y1_1) s_h = vis_fand(s, MHI32); + + faddd %f62,%f14,%f14 ! (Y1_1) y += (double)itmp0; + + lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + fand %f8,MHI32,%f20 ! (Y0_1) s_h = vis_fand(dtmp0, MHI32); + fmuld %f4,%f26,%f8 ! (Y1_1) dtmp0 = s_h * ux; + + fand %f30,MHI32,%f6 ! (Y0_1) s = vis_fand(yd, MHI32); + and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff; + fmuld %f52,%f52,%f26 ! (Y1_1) y = s * s; + + st %g0,[%fp+%o7] ! (Y1_2) yisint = 0; + or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000; + fsubd %f20,%f36,%f62 ! (Y0_1) dtmp0 = (s_h - m_h); + + st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx; + fsubd %f58,%f8,%f8 ! (Y1_1) s_l = u - dtmp0; + + add %i4,2048,%i4 ! (Y1_2) hx += 0x800; + fmuld %f20,%f6,%f34 ! (Y0_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y0_1) dtmp0 = (yd - s); + + st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000; + fmuld KA5,%f26,%f36 ! (Y1_1) dtmp8 = KA5 * y; + + st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx; + fsubd %f48,%f62,%f62 ! (Y0_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y1_1) yd = KA1_HI * s_h; + + fmuld %f4,%f10,%f10 ! (Y1_1) dtmp1 = s_h * s_l; + + ldd [EXPTBL-ind_LO],KA1_LO ! (Y1_1) load KA1_LO; + and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff; + fmuld %f6,%f20,%f6 ! (Y0_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y0_1) s > HTHRESH + + cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff) + fmuld %f30,%f62,%f30 ! (Y0_1) dtmp1 = yd * y; + faddd %f36,KA3,%f62 ! (Y1_1) dtmp8 = dtmp8 + KA3; + + ble,pn %icc,.update4 ! (Y1_2) if (hx <= 0xfffff) + faddd %f14,%f48,%f36 ! (Y1_1) m_h = y + yd; +.cont4: + sub %o7,ind_buf,%o7 ! stack buffer pointer update + fmovdg %fcc0,HTHRESH,%f34 ! (Y0_1) s = HTHRESH + + add %o7,4,%o7 ! stack buffer pointer update + fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l -= dtmp1; + + and %o7,15,%o7 ! stack buffer pointer update + faddd %f6,%f30,%f6 ! (Y0_1) yd = dtmp0 + dtmp1; + + sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20); + add %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx; + fsubd %f36,%f14,%f30 ! (Y1_1) dtmp2 = m_h - y; + + cmp %l3,2047 ! (Y0_2) if (exp >= 0x7ff) + ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx; + fmuld KA1,%f22,%f22 ! (Y1_1) dtmp0 = KA1 * yd; + + sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20); + sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046; + fcmped %fcc1,%f34,LTHRESH ! (Y0_1) s < LTHRESH + + add %o5,%l0,%o5 ! (Y0_2) exp += itmp0; + add py,stridey,py ! py += stridey; + fmuld %f62,%f26,%f62 ! (Y1_1) dtmp8 = dtmp8 * y; + fmovdg %fcc0,DZERO,%f6 ! (Y0_1) yd = DZERO + + sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8; + st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0; + faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax; + + bge,pn %icc,.update5 ! (Y0_2) if (exp >= 0x7ff) + fsubd %f30,%f48,%f48 ! (Y1_1) dtmp2 -= yd; +.cont5: + lda [py]%asi,%l1 ! (Y1_1) hy = *py; + fmuld %f22,%f10,%f10 ! (Y1_1) s_l = dtmp0 * s_l; + fmovdl %fcc1,LTHRESH,%f34 ! (Y0_1) s = LTHRESH + + fmovdl %fcc1,DZERO,%f6 ! (Y0_1) yd = DZERO + + fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32); + fmuld KA1_LO,%f4,%f4 ! (Y1_1) dtmp1 = KA1_LO * s_h; + + fmuld %f62,%f52,%f62 ! (Y1_1) s = dtmp8 * s; + ldd [LOGTBL+%o0],%f52 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f22 ! (Y1_1) dtmp2 -= s_l; + + add px,stridex,px ! px += stridex; + faddd %f34,%f6,%f58 ! (Y0_1) dtmp0 = (s + yd); + + and %l1,MASK_0x7fffffff,%l1 ! (Y1_1) hy &= 0x7fffffff; + ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI; + fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax); + + faddd %f52,%f4,%f52 ! (Y1_1) dtmp0 += dtmp1; + + fsubd %f62,%f22,%f4 ! (Y1_1) y = s - dtmp2; + + fdtoi %f58,%f17 ! (Y0_1) (int)dtmp0; + + ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0; + fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax; + sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8); + + sra %l1,20,%l1 ! (Y1_1) expy = hy >> 20; + ldd [EXPTBL-ind_KB5],KB5 ! (Y0_1) load KB5; + faddd %f4,%f52,%f48 ! (Y1_1) y += dtmp0; + + and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0; + st %f17,[%fp+tmp4] ! (Y0_1) ind = (int)dtmp0; + fitod %f17,%f4 ! (Y0_1) u = (double)(int)dtmp0; + + ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0); + + lda [py]%asi,%f30 ! (Y1_1) yd = *py; + fitod %f16,%f54 ! (Y0_2) (double)itmp0; + + lda [py+4]%asi,%f31 ! (Y1_1) yd = *py; + faddd %f48,%f36,%f32 ! (Y1_1) dtmp0 = y + m_h; + + add %l4,8,%o0 ! (Y0_2) i += 8; + fsubd %f34,%f4,%f60 ! (Y0_1) y = s - u; + + cmp %l1,959 ! (Y1_1) if (expy < 0x3fb); + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32); + + bl,pn %icc,.update6 ! (Y1_1) if (expy < 0x3fb); + faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0; +.cont6: + cmp %l1,1086 ! (Y1_1) if (expy >= 0x43e); + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + fand %f32,MHI32,%f22 ! (Y1_1) s_h = vis_fand(dtmp0, MHI32); + + fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux; + bge,pn %icc,.update7 ! (Y1_1) if (expy >= 0x43e); + faddd %f60,%f6,%f60 ! (Y0_1) y = y + yd; +.cont7: + ld [%fp+%o7],%o2 ! (Y0_1) load yisint + fand %f30,MHI32,%f6 ! (Y1_1) s = vis_fand(yd, MHI32); + + and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff; + fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s; + + or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000; + fsubd %f22,%f36,%f62 ! (Y1_1) dtmp0 = (s_h - m_h); + + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0; + fmuld KB5,%f60,%f58 ! (Y0_1) dtmp0 = KB5 * y; + + ldd [EXPTBL-ind_KB3],KB3 ! (Y0_1) load KB3; + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + fmuld %f22,%f6,%f34 ! (Y1_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y1_1) dtmp0 = (yd - s); + + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y; + + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + fsubd %f48,%f62,%f62 ! (Y1_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h; + + subcc counter,1,counter + fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l; + faddd %f58,KB4,%f58 ! (Y0_1) dtmp1 = dtmp0 + KB4; + + ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO; + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + fmuld %f6,%f22,%f6 ! (Y1_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y1_1) s > HTHRESH; + + fmuld %f30,%f62,%f30 ! (Y1_1) dtmp1 = yd * y; + ba 1f + faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3; + + .align 16 +1: + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + fmuld %f58,%f60,%f58 ! (Y0_1) dtmp2 = dtmp1 * y; + bneg,pn %icc,.tail + faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd; + + nop + fmovdg %fcc0,HTHRESH,%f34 ! (Y1_1) s = HTHRESH; + + fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1; + +.main_loop: + cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff) + add py,stridey,py ! py += stridey; + faddd %f6,%f30,%f6 ! (Y1_0) yd = dtmp0 + dtmp1; + + sra %l2,20,%l2 ! (Y1_1) exp = (exp >> 20); + ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx; + ble,pn %icc,.update8 ! (Y0_2) if (hx <= 0xfffff) + fsubd %f36,%f54,%f30 ! (Y0_1) dtmp2 = m_h - y; +.cont8: + cmp %l2,2047 ! (Y1_1) if (exp >= 0x7ff) + sub %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx; + faddd %f58,KB3,%f58 ! (Y0_0) dtmp3 = dtmp2 + KB3; + + sra %i4,20,%l0 ! (Y1_1) itmp0 = (hx >> 20); + sub %l2,2046,%o5 ! (Y1_1) exp = exp - 2046; + fmuld KA1,%f20,%f20 ! (Y0_1) dtmp0 = KA1 * yd; + fcmped %fcc1,%f34,LTHRESH ! (Y1_0) s < LTHRESH; + + ldd [EXPTBL-ind_KB2],KB2 ! (Y0_0) load KB2; + add %o5,%l0,%o5 ! (Y1_1) exp += itmp0; + fmuld %f62,%f12,%f62 ! (Y0_1) dtmp8 = dtmp8 * y; + fmovdg %fcc0,DZERO,%f6 ! (Y1_0) yd = DZERO + + sll %o5,8,%l0 ! (Y1_1) itmp0 = exp << 8; + add %o7,4,%o7 ! stack buffer pointer update + st %l0,[%fp+tmp3] ! (Y1_1) (double)itmp0; + faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax; + + ld [%fp+tmp4],%i2 ! (Y0_0) ind = (int)dtmp0; + fsubd %f30,%f48,%f48 ! (Y0_1) dtmp2 -= yd; + bge,pn %icc,.update9 ! (Y1_1) if (exp >= 0x7ff) + fmuld %f58,%f60,%f58 ! (Y0_0) dtmp4 = dtmp3 * y; +.cont9: + lda [py]%asi,%l1 ! (Y0_1) hy = *py; + and %o7,15,%o7 ! stack buffer pointer update + fmuld %f20,%f10,%f10 ! (Y0_1) s_l = dtmp0 * s_l; + fmovdl %fcc1,LTHRESH,%f34 ! (Y1_0) s = LTHRESH; + + add %o7,ind_buf,%o7 ! stack buffer pointer update + fmovdl %fcc1,DZERO,%f6 ! (Y1_0) yd = DZERO + + fmuld KA1_LO,%f4,%f4 ! (Y0_1) dtmp1 = KA1_LO * s_h; + fand %f26,MHI32,%f26 ! (Y1_1) ux = vis_fand(ux, MHI32); + + fmuld %f62,%f52,%f62 ! (Y0_1) s = dtmp8 * s; + nop + faddd %f58,KB2,%f30 ! (Y0_0) dtmp5 = dtmp4 + KB2; + + nop + add pz,stridez,pz ! pz += stridez; + ldd [LOGTBL+%o0],%f52 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f20 ! (Y0_1) dtmp2 -= s_l; + + sra %i2,8,%l0 ! (Y0_0) ind >>= 8; + ldd [EXPTBL-ind_KB1],KB1 ! (Y0_0) load KB1; + add px,stridex,px ! px += stridex; + faddd %f34,%f6,%f58 ! (Y1_0) dtmp0 = (s + yd); + + add %l0,1021,%l2 ! (Y0_0) eflag = (ind + 1021); + sub %g0,%l0,%o5 ! (Y0_0) gflag = (1022 - ind); + fsubd %f26,%f14,%f10 ! (Y1_1) dtmp0 = (ux - ax); + + sra %l2,31,%l2 ! (Y0_0) eflag = eflag >> 31; + add %o5,1022,%o5 ! (Y0_0) gflag = (1022 - ind); + fmuld %f30,%f60,%f48 ! (Y0_0) dtmp6 = dtmp5 * y; + faddd %f52,%f4,%f52 ! (Y0_1) dtmp0 += dtmp1; + + sra %o5,31,%o5 ! (Y0_0) gflag = gflag >> 31; + and %l2,54,%o0 ! (Y0_0) itmp0 = 54 & eflag; + ldd [EXPTBL-ind_HI],KA1_HI ! (Y1_1) load KA1_HI; + fsubd %f62,%f20,%f4 ! (Y0_1) y = s - dtmp2; + + lda [py]%asi,%f30 ! (Y0_1) yd = *py; + sub %l2,%o5,%l2 ! (Y0_0) ind = eflag - gflag; + add %l0,%o0,%l0 ! (Y0_0) ind = ind + itmp0; + fdtoi %f58,%f20 ! (Y1_0) u = (double)(int)dtmp0; + + sra %i4,8,%o0 ! (Y1_1) i = (hx >> 8); + and %o5,52,%o5 ! (Y0_0) itmp1 = 52 & gflag; + ld [%fp+tmp3],%f16 ! (Y1_1) (double)itmp0; + fsubd %f8,%f14,%f58 ! (Y1_1) u = x - ax; + + and %o0,4080,%o0 ! (Y1_1) i = i & 0xff0; + sub %l0,%o5,%i4 ! (Y0_0) ind = ind - itmp1; + st %f20,[%fp+tmp4] ! (Y1_0) ind = (int)dtmp0; + faddd %f48,KB1,%f14 ! (Y0_0) dtmp7 = dtmp6 + KB1; + + add %o2,%i4,%i4 ! (Y0_0) ind = yisint + ind; + and %i2,255,%o5 ! (Y0_0) i = ind & 0xff; + lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0]; + faddd %f4,%f52,%f48 ! (Y0_1) y += dtmp0; + + sll %i4,20,%i4 ! (Y0_0) ind <<= 20; + ldd [LOGTBL+%o0],%f62 ! (Y1_1) y = *(double *)((char*)__mt_constlog2 + i); + and %l1,MASK_0x7fffffff,%l1 ! (Y0_1) hy &= 0x7fffffff; + fitod %f20,%f4 ! (Y1_0) u = (double)(int)dtmp0; + + lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + nop + fmuld %f58,%f22,%f52 ! (Y1_1) s = u * yd; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l = (x - dtmp0); + + sll %o5,4,%o5 ! (Y0_0) i = i << 4; + st %i4,[%fp+tmp2_hi] ! (Y0_0) *(int*)&dtmp0 = ind; + fmuld %f14,%f60,%f20 ! (Y0_0) y = dtmp7 * y; + fitod %f16,%f14 ! (Y1_1) (double)itmp0; + + sra %l1,20,%l1 ! (Y0_1) expy = hy >> 20; + nop + ldd [EXPTBL+%o5],%f56 ! (Y0_0) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f48,%f36,%f8 ! (Y0_1) dtmp0 = y + m_h; + + add %o5,8,%o5 ! (Y0_0) i += 8; + add %o0,8,%o0 ! (Y1_1) i += 8; + lda [py+4]%asi,%f31 ! (Y0_1) yd = *py; + fsubd %f34,%f4,%f60 ! (Y1_0) y = s - u; + + cmp %l1,959 ! (Y0_1) if (expy < 0x3fb); + and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff; + ldd [EXPTBL-ind_KB5],KB5 ! (Y1_0) load KB5; + fand %f52,MHI32,%f4 ! (Y1_1) s_h = vis_fand(s, MHI32); + + ldd [EXPTBL+%o5],%f16 ! (Y0_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,%f20,%f34 ! (Y0_0) dtmp1 = u * y; + bl,pn %icc,.update10 ! (Y0_1) if (expy < 0x3fb); + faddd %f62,%f14,%f14 ! (Y1_1) y += (double)itmp0; +.cont10: + or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000; + cmp %l1,1086 ! (Y0_1) if (expy >= 0x43e); + fand %f8,MHI32,%f20 ! (Y0_1) s_h = vis_fand(dtmp0, MHI32); + + fmuld %f4,%f26,%f8 ! (Y1_1) dtmp0 = s_h * ux; + st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx; + bge,pn %icc,.update11 ! (Y0_1) if (expy >= 0x43e); + faddd %f60,%f6,%f60 ! (Y1_0) y = y + yd; +.cont11: + add %i4,2048,%i4 ! (Y1_2) hx += 0x800; + ld [%fp+%o7],%o2 ! (Y1_0) load yisint + fand %f30,MHI32,%f6 ! (Y0_1) s = vis_fand(yd, MHI32); + + st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1]; + and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000; + fmuld %f52,%f52,%f26 ! (Y1_1) y = s * s; + faddd %f16,%f34,%f16 ! (Y0_0) dtmp2 = dtmp0 + dtmp1; + + st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx; + fsubd %f20,%f36,%f62 ! (Y0_1) dtmp0 = (s_h - m_h); + + fsubd %f58,%f8,%f8 ! (Y1_1) s_l = u - dtmp0; + fmuld KB5,%f60,%f58 ! (Y1_0) dtmp0 = KB5 * y; + + ldd [EXPTBL-ind_KB3],KB3 ! (Y1_0) load KB3; + fmuld %f20,%f6,%f34 ! (Y0_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y0_1) dtmp0 = (yd - s); + + faddd %f16,%f56,%f56 ! (Y0_0) u = dtmp2 + u; + nop + fmuld KA5,%f26,%f36 ! (Y1_1) dtmp8 = KA5 * y; + + nop + add %l2,513,%l2 ! (Y0_0) ind += 513; + fsubd %f48,%f62,%f62 ! (Y0_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y1_1) yd = KA1_HI * s_h; + + sll %l2,3,%o5 ! (Y0_0) ind *= 8; + ldd [%fp+tmp2_hi],%f16 ! (Y0_0) ld dtmp0; + fmuld %f4,%f10,%f10 ! (Y1_1) dtmp1 = s_h * s_l; + faddd %f58,KB4,%f58 ! (Y1_0) dtmp1 = dtmp0 + KB4; + + ldd [EXPTBL-ind_LO],KA1_LO ! (Y1_1) load KA1_LO; + and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff; + fmuld %f6,%f20,%f6 ! (Y0_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y0_1) s > HTHRESH + + ldd [EXPTBL+%o5],%f20 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + nop + nop + fpadd32 %f56,%f16,%f56 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + nop + cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff) + fmuld %f30,%f62,%f30 ! (Y0_1) dtmp1 = yd * y; + faddd %f36,KA3,%f62 ! (Y1_1) dtmp8 = dtmp8 + KA3; + + fmuld %f58,%f60,%f58 ! (Y1_0) dtmp2 = dtmp1 * y; + st %g0,[%fp+%o7] ! (Y1_2) yisint = 0; + ble,pn %icc,.update12 ! (Y1_2) if (hx <= 0xfffff) + faddd %f14,%f48,%f36 ! (Y1_1) m_h = y + yd; +.cont12: + sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20); + sub %o7,ind_buf,%o7 ! stack buffer pointer update + fmuld %f56,%f20,%f16 ! (Y0_0) dtmp1 = u * dtmp1; + fmovdg %fcc0,HTHRESH,%f34 ! (Y0_1) s = HTHRESH + + cmp %l3,2047 ! (Y0_2) if (exp >= 0x7ff) + st %f16,[pz] ! (Y0_0) write into memory + fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux; + fsubd %f8,%f10,%f10 ! (Y1_1) s_l -= dtmp1; + + sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20); + sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046; + st %f17,[pz+4] ! (Y0_0) write into memory + faddd %f6,%f30,%f6 ! (Y0_1) yd = dtmp0 + dtmp1; + + add %o5,%l0,%o5 ! (Y0_2) exp += itmp0; + add py,stridey,py ! py += stridey; + ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx; + fsubd %f36,%f14,%f30 ! (Y1_1) dtmp2 = m_h - y; + + sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8; + ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx; + fmuld KA1,%f22,%f22 ! (Y1_1) dtmp0 = KA1 * yd; + faddd %f58,KB3,%f58 ! (Y1_0) dtmp3 = dtmp2 + KB3; + + add %o7,4,%o7 ! stack buffer pointer update + st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0; + fcmped %fcc1,%f34,LTHRESH ! (Y0_1) s < LTHRESH + + and %o7,15,%o7 ! stack buffer pointer update + ld [%fp+tmp4],%l0 ! (Y1_0) ind = (int)dtmp0; + fmuld %f62,%f26,%f62 ! (Y1_1) dtmp8 = dtmp8 * y; + fmovdg %fcc0,DZERO,%f6 ! (Y0_1) yd = DZERO + + nop + add %o7,ind_buf,%o7 ! stack buffer pointer update + ldd [EXPTBL-ind_KB2],KB2 ! (Y1_0) load KB2; + faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax; + + fmuld %f58,%f60,%f58 ! (Y1_0) dtmp4 = dtmp3 * y; + nop + bge,pn %icc,.update13 ! (Y0_2) if (exp >= 0x7ff) + fsubd %f30,%f48,%f48 ! (Y1_1) dtmp2 -= yd; +.cont13: + lda [py]%asi,%l1 ! (Y1_1) hy = *py; + nop + fmuld %f22,%f10,%f10 ! (Y1_1) s_l = dtmp0 * s_l; + fmovdl %fcc1,LTHRESH,%f34 ! (Y0_1) s = LTHRESH + + nop + nop + fmovdl %fcc1,DZERO,%f6 ! (Y0_1) yd = DZERO + + fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32); + nop + nop + fmuld KA1_LO,%f4,%f4 ! (Y1_1) dtmp1 = KA1_LO * s_h; + + nop + add px,stridex,px ! px += stridex; + faddd %f58,KB2,%f30 ! (Y1_0) dtmp5 = dtmp4 + KB2; + fmuld %f62,%f52,%f62 ! (Y1_1) s = dtmp8 * s; + + sra %l0,8,%i2 ! (Y1_0) ind >>= 8; + add pz,stridez,pz ! pz += stridez; + ldd [LOGTBL+%o0],%f52 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + fsubd %f48,%f10,%f22 ! (Y1_1) dtmp2 -= s_l; + + add %i2,1021,%l3 ! (Y1_0) eflag = (ind + 1021); + sub %g0,%i2,%o5 ! (Y1_0) gflag = (1022 - ind); + ldd [EXPTBL-ind_KB1],KB1 ! (Y1_0) load KB1; + faddd %f34,%f6,%f58 ! (Y0_1) dtmp0 = (s + yd); + + sra %l3,31,%l3 ! (Y1_0) eflag = eflag >> 31; + add %o5,1022,%o5 ! (Y1_0) gflag = (1022 - ind); + ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI; + fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax); + + sra %o5,31,%o5 ! (Y1_0) gflag = gflag >> 31; + and %l3,54,%o0 ! (Y1_0) itmp0 = 54 & eflag; + fmuld %f30,%f60,%f48 ! (Y1_0) dtmp6 = dtmp5 * y; + faddd %f52,%f4,%f52 ! (Y1_1) dtmp0 += dtmp1; + + sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8); + add %i2,%o0,%i2 ! (Y1_0) ind = ind + itmp0; + fsubd %f62,%f22,%f4 ! (Y1_1) y = s - dtmp2; + + lda [py]%asi,%f30 ! (Y1_1) yd = *py; + and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0; + and %o5,52,%o0 ! (Y1_0) itmp1 = 52 & gflag; + fdtoi %f58,%f22 ! (Y0_1) (int)dtmp0; + + sub %l3,%o5,%l3 ! (Y1_0) ind = eflag - gflag; + sub %i2,%o0,%i2 ! (Y1_0) ind = ind - itmp1; + ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0; + fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax; + + add %o2,%i2,%i2 ! (Y1_0) ind = yisint + ind; + and %l0,255,%o5 ! (Y1_0) i = ind & 0xff; + st %f22,[%fp+tmp4] ! (Y0_1) ind = (int)dtmp0; + faddd %f48,KB1,%f54 ! (Y1_0) dtmp7 = dtmp6 + KB1; + + sll %i2,20,%o0 ! (Y1_0) ind <<= 20; + nop + lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0]; + faddd %f4,%f52,%f48 ! (Y1_1) y += dtmp0; + + and %l1,MASK_0x7fffffff,%l1 ! (Y1_1) hy &= 0x7fffffff; + nop + st %o0,[%fp+tmp2_hi] ! (Y1_0) *(int*)&dtmp0 = ind; + fitod %f22,%f4 ! (Y0_1) u = (double)(int)dtmp0; + + lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + nop + fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd; + fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0); + + sll %o5,4,%o5 ! (Y1_0) i = i << 4; + ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i); + fmuld %f54,%f60,%f22 ! (Y1_0) y = dtmp7 * y; + fitod %f16,%f54 ! (Y0_2) (double)itmp0; + + sra %l1,20,%l1 ! (Y1_1) expy = hy >> 20; + nop + ldd [EXPTBL+%o5],%f56 ! (Y1_0) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f48,%f36,%f32 ! (Y1_1) dtmp0 = y + m_h; + + add %o5,8,%o5 ! (Y1_0) i += 8; + add %l4,8,%o0 ! (Y0_2) i += 8; + lda [py+4]%asi,%f31 ! (Y1_1) yd = *py; + fsubd %f34,%f4,%f60 ! (Y0_1) y = s - u; + + cmp %l1,959 ! (Y1_1) if (expy < 0x3fb); + and MASK_0x000fffff,%l0,%l4 ! (Y0_3) hx &= 0xfffff; + fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32); + + ldd [EXPTBL+%o5],%f16 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,%f22,%f34 ! (Y1_0) dtmp1 = u * y; + bl,pn %icc,.update14 ! (Y1_1) if (expy < 0x3fb); + faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0; +.cont14: + ldd [EXPTBL-ind_KB5],KB5 ! (Y0_1) load KB5; + or MASK_0x3ff00000,%l4,%o5 ! (Y0_3) hx |= 0x3ff00000; + cmp %l1,1086 ! (Y1_1) if (expy >= 0x43e); + fand %f32,MHI32,%f22 ! (Y1_1) s_h = vis_fand(dtmp0, MHI32); + + fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux; + st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx; + bge,pn %icc,.update15 ! (Y1_1) if (expy >= 0x43e); + faddd %f60,%f6,%f60 ! (Y0_1) y = y + yd; +.cont15: + add %o5,2048,%o5 ! (Y0_3) hx += 0x800; + nop + ld [%fp+%o7],%o2 ! (Y0_1) load yisint + fand %f30,MHI32,%f6 ! (Y1_1) s = vis_fand(yd, MHI32); + + and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000; + st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1]; + fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s; + faddd %f16,%f34,%f16 ! (Y1_0) dtmp2 = dtmp0 + dtmp1; + + nop + nop + st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx; + fsubd %f22,%f36,%f62 ! (Y1_1) dtmp0 = (s_h - m_h); + + fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0; + nop + nop + fmuld KB5,%f60,%f58 ! (Y0_1) dtmp0 = KB5 * y; + + ldd [EXPTBL-ind_KB3],KB3 ! (Y0_1) load KB3; + nop + fmuld %f22,%f6,%f34 ! (Y1_1) s = s_h * s; + fsubd %f30,%f6,%f6 ! (Y1_1) dtmp0 = (yd - s); + + fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y; + nop + faddd %f16,%f56,%f56 ! (Y1_0) u = dtmp2 + u; + + add %l3,513,%l3 ! (Y1_0) ind += 1; + fsubd %f48,%f62,%f62 ! (Y1_1) y = y - dtmp0; + fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h; + + sll %l3,3,%o5 ! (Y1_0) ind *= 8; + ldd [%fp+tmp2_hi],%f16 ! (Y1_0) *(int*)&dtmp0 = ind; + fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l; + faddd %f58,KB4,%f58 ! (Y0_1) dtmp1 = dtmp0 + KB4; + + ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO; + and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff; + fmuld %f6,%f22,%f6 ! (Y1_1) dtmp0 *= s_h; + fcmped %fcc0,%f34,HTHRESH ! (Y1_1) s > HTHRESH; + + nop + subcc counter,2,counter ! update cycle counter + ldd [EXPTBL+%o5],%f22 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f56,%f16,%f56 ! (Y1_0) u = vis_fpadd32(u, dtmp0); + + fmuld %f30,%f62,%f30 ! (Y1_1) dtmp1 = yd * y; + nop + nop + faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3; + + nop + st %g0,[%fp+%o7] ! (Y0_3) yisint = 0; + fmuld %f58,%f60,%f58 ! (Y0_1) dtmp2 = dtmp1 * y; + faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd; + + fmuld %f56,%f22,%f16 ! (Y1_0) dtmp1 = u * dtmp1; + nop + st %f16,[pz] ! (Y1_0) write into memory + fmovdg %fcc0,HTHRESH,%f34 ! (Y1_1) s = HTHRESH; + + fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux; + st %f17,[pz+4] ! (Y1_0) write into memory + bpos,pt %icc,.main_loop + fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1; + +.tail: + addcc counter,1,counter + bneg,pn %icc,.end_loop + + faddd %f58,KB3,%f58 ! (Y0_0) dtmp3 = dtmp2 + KB3; + ldd [EXPTBL-ind_KB2],KB2 ! (Y0_0) load KB2; + + ld [%fp+tmp4],%i2 ! (Y0_0) ind = (int)dtmp0; + fmuld %f58,%f60,%f58 ! (Y0_0) dtmp4 = dtmp3 * y; + faddd %f58,KB2,%f30 ! (Y0_0) dtmp5 = dtmp4 + KB2; + + add pz,stridez,pz ! pz += stridez; + ldd [EXPTBL-ind_KB1],KB1 ! (Y0_0) load KB1; + sra %i2,8,%l0 ! (Y0_0) ind >>= 8; + + add %l0,1021,%l2 ! (Y0_0) eflag = (ind + 1021); + sub %g0,%l0,%o5 ! (Y0_0) gflag = (1022 - ind); + fmuld %f30,%f60,%f48 ! (Y0_0) dtmp6 = dtmp5 * y; + + sra %l2,31,%l2 ! (Y0_0) eflag = eflag >> 31; + add %o5,1022,%o5 ! (Y0_0) gflag = (1022 - ind); + + sra %o5,31,%o5 ! (Y0_0) gflag = gflag >> 31; + and %l2,54,%o0 ! (Y0_0) itmp0 = 54 & eflag; + + sub %l2,%o5,%l2 ! (Y0_0) ind = eflag - gflag; + add %l0,%o0,%l0 ! (Y0_0) ind = ind + itmp0; + + and %o5,52,%o5 ! (Y0_0) itmp1 = 52 & gflag; + faddd %f48,KB1,%f14 ! (Y0_0) dtmp7 = dtmp6 + KB1; + + sub %l0,%o5,%l0 ! (Y0_0) ind = ind - itmp1; + and %i2,255,%i4 ! (Y0_0) i = ind & 0xff; + + sll %i4,4,%o5 ! (Y0_0) i = i << 4; + + ldd [EXPTBL+%o5],%f56 ! (Y0_0) u = *(double*)((char*)__mt_constexp2 + i); + add %o2,%l0,%l0 ! (Y0_0) ind = yisint + ind; + fmuld %f14,%f60,%f20 ! (Y0_0) y = dtmp7 * y; + + sll %l0,20,%i2 ! (Y0_0) ind <<= 20; + + add %o5,8,%o5 ! (Y0_0) i += 8; + st %i2,[%fp+tmp2_hi] ! (Y0_0) *(int*)&dtmp0 = ind; + + ldd [EXPTBL+%o5],%f16 ! (Y0_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,%f20,%f34 ! (Y0_0) dtmp1 = u * y; + + faddd %f16,%f34,%f16 ! (Y0_0) dtmp2 = dtmp0 + dtmp1; + + faddd %f16,%f56,%f56 ! (Y0_0) u = dtmp2 + u; + add %l2,513,%l2 ! (Y0_0) ind += 513; + + sll %l2,3,%o5 ! (Y0_0) ind *= 8; + ldd [%fp+tmp2_hi],%f16 ! (Y0_0) ld dtmp0; + + ldd [EXPTBL+%o5],%f20 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f56,%f16,%f56 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + fmuld %f56,%f20,%f16 ! (Y0_0) dtmp1 = u * dtmp1; + st %f16,[pz] ! (Y0_0) write into memory + st %f17,[pz+4] ! (Y0_0) write into memory + +.end_loop: + ba .begin + nop +.end: + ret + restore %g0,0,%o0 + + .align 16 +.update0: + cmp %l0,%g0 ! if (x >= 0); + fzero %f30 + + lda [py+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos0 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + cmp %o0,1076 ! if (expy >= 0x434); + bge .neg0 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %o0,1023 ! if (expy < 0x3ff); + bl .neg0 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %o0,1043 ! if (expy <= (20 + 0x3ff)); + ble .small0 ! if (expy <= (20 + 0x3ff)); + sub %o0,1023,%o0 ! expy - 0x3ff; + + sub %g0,%o0,%o0 + add %o0,52,%o0 ! sh = (52 - (expy - 0x3ff); + srl %l0,%o0,%i4 ! i0 = (ly >> sh); + + sll %i4,%o0,%i4 ! (i0 << sh); + + srl %l0,%o0,%o0 ! i0 = (ly >> sh); + cmp %i4,%l0 ! if ((i0 << sh) == ly); + + and %o0,1,%o0 ! i0 &= 1; + + sub %g0,%o0,%o0 + add %o0,2,%o0 ! i0 = 2 - i0; + + move %icc,%o0,%o5 ! yisint = i0; + + ba .neg0 + nop +.small0: + sub %g0,%o0,%o0 + cmp %l0,%g0 ! if (ly != 0); + + add %o0,20,%o0 ! sh = (20 - (expy - 0x3ff); + bne .neg0 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%o0,%i4 ! i0 = (hy >> sh); + + sll %i4,%o0,%i4 ! (i0 << sh); + + srl %l1,%o0,%o0 ! i0 = (hy >> sh); + cmp %i4,%l1 ! if ((i0 << sh) == hy); + + and %o0,1,%o0 ! i0 &= 1; + + sub %g0,%o0,%o0 + add %o0,2,%o0 ! i0 = 2 - i0; + + move %icc,%o0,%o5 ! yisint = i0; +.neg0: + orcc %l3,%i2,%g0 ! if (x != 0); + + sra %o2,31,%i4 ! sy = (*((unsigned*)py)[0]) >> 31; + bne,pt %icc,3f ! if (x != 0); + nop + + cmp %i4,%g0 ! if (sy == 0); + be 1f ! if (sy == 0); + and %o5,1,%i4 ! yisint &= 1; + + fdivd DONE,%f30,%f30 ! y0 = DONE / y0; +1: + cmp %i4,%g0 ! if ((yisint & 1) == 0); + be 2f ! if ((yisint & 1) == 0); + nop + + fnegd %f30,%f30 ! y0 = -y0; +2: + st %f30,[pz] + ba .update_point + st %f31,[pz+4] +3: + cmp %o5,%g0 ! if (yisint != 0); + bne .pos0 ! if (yisint != 0); + nop + + fdivd DZERO,DZERO,%f30 ! y0 = DZERO / DZERO; + st %f30,[pz] + ba .update_point + st %f31,[pz+4] +.pos0: + orcc %l3,%i2,%g0 ! if (x != 0); + + sra %o2,31,%i4 ! sy = (*((unsigned*)py)[0]) >> 31; + bne,pt %icc,.nzero0 ! if (x != 0); + nop + + cmp %i4,%g0 ! if (sy == 0); + be 1f ! if (sy == 0); + nop + + fdivd DONE,%f30,%f30 ! y0 = DONE / y0; +1: + st %f30,[pz] + ba .update_point + st %f31,[pz+4] +.nzero0: + sll %o5,11,%o5 + cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont0 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]; + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%l4 ! hx = exp & 0xfffff; + or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000; + add %l4,2048,%l4 ! hx += 0x800; + and %l4,-4096,%l4 ! hx &= 0xfffff000; + + ba .cont0 + st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update1: + cmp counter,0 + ble,pt %icc,.cont1 + add py,stridey,%o5 + + stx px,[%fp+tmp_px] + + orcc %l2,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero1 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u1: + st counter,[%fp+tmp_counter] + ba .cont1 + or %g0,0,counter +.nzero1: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos1 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg1 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg1 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small1 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg1 + nop +.small1: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg1 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg1: + cmp %o5,%g0 + be .u1 + nop +.pos1: + sll %o5,11,%o5 + cmp %l2,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont1 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + std %f32,[%fp+tmp5]; + std %f54,[%fp+tmp6]; + ldd [%fp+tmp0_hi],%f32 + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]; + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + std %f32,[%fp+tmp0_hi]; + sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%i4 ! hx = exp & 0xfffff; + ldd [%fp+tmp5],%f32 + or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000; + add %i4,2048,%i4 ! hx += 0x800; + ldd [%fp+tmp6],%f54 + and %i4,-4096,%i4 ! hx &= 0xfffff000; + + ba .cont1 + st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update2: + cmp counter,1 + ble,pt %icc,.cont2 + add py,stridey,%o5 + + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + orcc %l3,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero2 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u2: + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont2 + or %g0,1,counter +.nzero2: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos2 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg2 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg2 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small2 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg2 + nop +.small2: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg2 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg2: + cmp %o5,%g0 + be .u2 + nop +.pos2: + sll %o5,11,%o5 + cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont2 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%l4 ! hx = exp & 0xfffff; + or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000; + add %l4,2048,%l4 ! hx += 0x800; + and %l4,-4096,%l4 ! hx &= 0xfffff000; + + ba .cont2 + st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update3: + cmp counter,0 + ble,pt %icc,.cont3 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .cont3 + or %g0,0,counter + + .align 16 +.update4: + cmp counter,2 + ble,pt %icc,.cont4 + add py,stridey,%o5 + + add %o5,stridey,%o5 + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + orcc %l2,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero4 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u4: + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont4 + or %g0,2,counter +.nzero4: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos4 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg4 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg4 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 2; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small4 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg4 + nop +.small4: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg4 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg4: + cmp %o5,%g0 + be .u4 + nop +.pos4: + sll %o5,11,%o5 + cmp %l2,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont4 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + std %f32,[%fp+tmp5]; + std %f54,[%fp+tmp6]; + ldd [%fp+tmp0_hi],%f32 + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + std %f32,[%fp+tmp0_hi]; + sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%i4 ! hx = exp & 0xfffff; + ldd [%fp+tmp5],%f32 + or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000; + add %i4,2048,%i4 ! hx += 0x800; + ldd [%fp+tmp6],%f54 + and %i4,-4096,%i4 ! hx &= 0xfffff000; + + ba .cont4 + st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update5: + cmp counter,1 + ble,pt %icc,.cont5 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont5 + or %g0,1,counter + + .align 16 +.update6: + cmp counter,0 + ble,pt %icc,.cont6 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont6 + or %g0,0,counter + + .align 16 +.update7: + cmp counter,0 + ble,pt %icc,.cont7 + fmovd DONE,%f30 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%o2 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont7 + or %g0,0,counter + + .align 16 +.update8: + cmp counter,2 + ble,pt %icc,.cont8 + add py,stridey,%o5 + + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + orcc %l3,%i2,%g0 ! if (x == 0); + bne,pt %icc,.nzero8 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u8: + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont8 + or %g0,2,counter +.nzero8: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos8 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .pos8 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg8 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small8 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg8 + nop +.small8: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg8 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg8: + cmp %o5,%g0 + be .u8 + nop +.pos8: + sll %o5,11,%o5 + cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff); + + bg,pt %icc,.cont8 ! if (exp > 0xfffff); + st %o5,[%fp+%o7] + + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%l4 ! hx &= 0xfffff; + or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000; + add %l4,2048,%l4 ! hx += 0x800; + and %l4,-4096,%l4 ! hx &= 0xfffff000; + + ba .cont8 + st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update9: + cmp counter,1 + ble,pt %icc,.cont9 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont9 + or %g0,1,counter + + .align 16 +.update10: + cmp counter,0 + ble,pt %icc,.cont10 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont10 + or %g0,0,counter + + .align 16 +.update11: + cmp counter,0 + ble,pt %icc,.cont11 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + ba .cont11 + or %g0,0,counter + + .align 16 +.update12: + cmp counter,3 + ble,pt %icc,.cont12 + add py,stridey,%o5 + + add %o5,stridey,%o5 + stx px,[%fp+tmp_px] + + add %o5,stridey,%o5 + orcc %l2,%i2,%g0 ! if (x == 0); + + bne,pt %icc,.nzero12 ! if (x == 0); + stx %o5,[%fp+tmp_py] +.u12: + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont12 + or %g0,3,counter +.nzero12: + lda [%o5]%asi,%l1 ! ld hy; + cmp %l0,%g0 ! if (x >= 0); + + lda [%o5+4]%asi,%l0 ! ld ly + bge,pt %icc,.pos12 ! if (x >= 0); + or %g0,%g0,%o5 ! yisint = 0; + + and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff; + + sra %i2,20,%i2 ! expy = hy >> 20; + + cmp %i2,1076 ! if (expy >= 0x434); + bge .neg12 ! if (expy >= 0x434); + or %g0,2,%o5 ! yisint = 2; + + cmp %i2,1023 ! if (expy < 0x3ff); + bl .neg12 ! if (expy < 0x3ff); + or %g0,0,%o5 ! yisint = 0; + + cmp %i2,1043 ! if (expy <= (20 + 0x3ff)); + ble .small12 ! if (expy <= (20 + 0x3ff)); + sub %i2,1023,%i2 ! expy - 0x3ff; + + sub %g0,%i2,%i2 + add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff); + srl %l0,%i2,%l1 ! i0 = (ly >> sh); + + sll %l1,%i2,%l1 ! (i0 << sh); + + srl %l0,%i2,%i2 ! i0 = (ly >> sh); + cmp %l1,%l0 ! if ((i0 << sh) == ly); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; + + ba .neg12 + nop +.small12: + sub %g0,%i2,%i2 + cmp %l0,%g0 ! if (ly != 0); + + add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff); + bne .neg12 ! if (ly != 0); + or %g0,0,%o5 ! yisint = 0; + + srl %l1,%i2,%l0 ! i0 = (hy >> sh); + + sll %l0,%i2,%l0 ! (i0 << sh); + + srl %l1,%i2,%i2 ! i0 = (hy >> sh); + cmp %l0,%l1 ! if ((i0 << sh) == hy); + + and %i2,1,%i2 ! i0 &= 1; + + sub %g0,%i2,%i2 + add %i2,2,%i2 ! i0 = 2 - i0; + + move %icc,%i2,%o5 ! yisint = i0; +.neg12: + cmp %o5,%g0 + be .u12 + nop +.pos12: + sll %o5,11,%o5 + cmp %l2,MASK_0x000fffff ! y0 = vis_fand(x, MMANT); + + bg,pt %icc,.cont12 ! y0 = vis_fand(x, MMANT); + st %o5,[%fp+%o7] + + std %f32,[%fp+tmp5]; + std %f54,[%fp+tmp6]; + ldd [%fp+tmp0_hi],%f32 + ldd [%fp+tmp_mant],%f54 + + or %g0,1074,%o5 + fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT); + + sll %o5,20,%o5 + fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0] + + std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0]; + fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT); + + ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0]; + for %f32,DONE,%f32 ! x = vis_for(x, DONE); + + std %f32,[%fp+tmp0_hi]; + sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20; + and MASK_0x000fffff,%i2,%i4 ! hx &= 0xfffff; + ldd [%fp+tmp5],%f32 + or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000; + add %i4,2048,%i4 ! hx += 0x800; + ldd [%fp+tmp6],%f54 + and %i4,-4096,%i4 ! hx &= 0xfffff000; + + ba .cont12 + st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx; + + .align 16 +.update13: + cmp counter,2 + ble,pt %icc,.cont13 + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%l1 + + stx %o5,[%fp+tmp_px] + add py,stridey,%o5 + + add %l1,counter,counter + stx %o5,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont13 + or %g0,2,counter + + .align 16 +.update14: + cmp counter,1 + ble,pt %icc,.cont14 + fmovd DONE,%f30 + + ld [%fp+tmp_counter],%o2 + sub px,stridex,%o5 + + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont14 + or %g0,1,counter + + .align 16 +.update15: + cmp counter,1 + ble,pt %icc,.cont15 + fmovd DONE,%f30 + + sub px,stridex,%o5 + + ld [%fp+tmp_counter],%o2 + sub %o5,stridex,%o5 + stx py,[%fp+tmp_py] + + add %o2,counter,counter + sub %o5,stridex,%o5 + stx %o5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont15 + or %g0,1,counter + + .align 16 +.spec0: + lda [py+4]%asi,%o5 ! ld ly; + lda [px]%asi,%f16 ! y0 = *px; + lda [px+4]%asi,%f17 ! y0 = *px; + orcc %l1,%o5,%g0 ! if (hy | ly) != 0; + + bne,pn %icc,1f + sethi %hi(0x7ff00000),%o5 + + st DONE_HI,[pz] + ba .update_point + st DONE_LO,[pz+4] +1: + cmp %l3,%o5 ! if (hx > 0x7ff00000); + bgu,a,pn %icc,6f ! if (hx > 0x7ff00000); + fmuld %f16,%f16,%f16 ! *pz = y0 * y0; + + bne,pt %icc,2f ! if (hx != 0x7ff00000); + orcc %l3,%i2,%g0 ! if (hx | lx) != 0; + + cmp %i2,0 ! if (lx) != 0; + bne,pn %icc,5f ! if (lx) != 0; + srl %o2,31,%o5 ! sy; + + st %l3,[pz] ! ((int*)pz)[0] = hx; + ba 3f + cmp %o5,0 ! if (sy == 0); +2: + bne,pt %icc,4f ! if (hx | lx) != 0; + srl %l0,31,%o5 ! sx; + + st %l3,[pz] ! ((int*)pz)[0] = hx; + srl %o2,31,%o5 ! sy; + cmp %o5,0 ! if (sy == 0); +3: + be,pt %icc,.update_point ! if (sy == 0); + st %i2,[pz+4] ! ((int*)pz)[1] = lx; + + ld [pz],%f16 ! *pz; + ld [pz+4],%f17 ! *pz; + fdivd DONE,%f16,%f16 ! *pz = DONE / *pz; + + st %f16,[pz] + ba .update_point + st %f17,[pz+4] +4: + cmp %o5,0 ! if (sx == 0); + bne,a,pt %icc,1f + nop + + st DONE_HI,[pz] ! *pz = DONE; + ba .update_point + st DONE_LO,[pz+4] ! *pz = DONE; +1: + fdivd DZERO,DZERO,%f16 ! *pz = DZERO / DZERO; + st %f16,[pz] + ba .update_point + st %f17,[pz+4] +5: + fmuld %f16,%f16,%f16 ! *pz = y0 * y0; +6: + st %f16,[pz] + ba .update_point + st %f17,[pz+4] + + .align 16 +.spec1: + lda [px]%asi,%f14 ! y0 = *px; + lda [px+4]%asi,%f15 ! y0 = *px; + sethi %hi(0x7ff00000),%o5 + lda [py+4]%asi,%i4 ! ld ly; + srl %o2,31,%o2 ! sy + cmp %l3,%o5 ! if (hx >= 0x7ff00000); + bcc,pn %icc,3f + nop + + cmp %l1,%o5 ! if (hy > 0x7ff00000); + bgu,a,pt %icc,.spec1_nan_inf ! if (hy > 0x7ff00000); + lda [py]%asi,%f16 ! ld y + + bne,a,pt %icc,1f ! if (hy != 0x7ff00000); + cmp %i2,0 ! if (lx != 0); + + ba 2f ! if (hy == 0x7ff00000); + cmp %i4,0 ! if (ly != 0); +1: + bne,pt %icc,7f ! if (lx != 0); + nop + + cmp %l3,0 ! if (hx == 0); + be,a,pt %icc,6f ! if (hx == 0); + st %l3,[pz] ! ((int*)pz)[0] = hx; + + cmp %l3,MASK_0x3ff00000 ! if (hx == 0x3ff00000); + be,a,pn %icc,6f ! if (hx == 0x3ff00000); + st %l3,[pz] ! ((int*)pz)[0] = hx; + + ba 5f + cmp %l3,%o5 ! if (hx != 0x7ff00000); +3: + bgu,a,pt %icc,.spec1_nan_inf ! if (hx > 0x7ff00000); + lda [py]%asi,%f16 ! ld y + + bne,a,pn %icc,1f ! if (hx != 0x7ff00000); + cmp %l1,%o5 ! if (hy > 0x7ff00000); + + cmp %i2,0 ! if (lx != 0); + bne,a,pt %icc,.spec1_nan_inf ! if (lx != 0); + lda [py]%asi,%f16 ! ld y + + cmp %l1,%o5 ! if (hy > 0x7ff00000); +1: + bgu,a,pt %icc,.spec1_nan_inf ! if (hy > 0x7ff00000); + lda [py]%asi,%f16 ! ld y + + bne,pn %icc,3f ! if (hy != 0x7ff00000); + nop + + cmp %i4,0 ! if (ly != 0); +2: + bne,a,pn %icc,.spec1_nan_inf ! if (ly != 0); + lda [py]%asi,%f16 ! ld y + + cmp %l3,MASK_0x3ff00000 ! if (hx != 0x3ff00000); + bne,pn %icc,1f ! if (hx != 0x3ff00000); + cmp %i2,0 ! if (lx != 0); + + bne,pn %icc,1f ! if (lx != 0); + nop + + ld [py],%f16 ! ld y + ld [py+4],%f17 ! ld y + fzero %f14 + fmuld %f16,%f14,%f14 ! *pz = *py * 0.0; + st %f14,[pz] + ba .update_point + st %f15,[pz+4] +1: + sub %l3,MASK_0x3ff00000,%o7 ! (hx - 0x3ff00000); + srlx %o7,63,%l2 ! (hx - 0x3ff00000) >> 63; + + cmp %l2,%o2 ! if ((hx < 0x3ff00000) == sy) + be,a,pn %icc,1f ! if ((hx < 0x3ff00000) == sy) + st %l1,[pz] ! ((int*)pz)[0] = hy; + + st DZERO_HI,[pz] ! *pz = DZERO; + ba .update_point + st DZERO_LO,[pz+4] ! *pz = DZERO; +1: + ba .update_point + st %i4,[pz+4] ! ((int*)pz)[0] = ly; +3: + cmp %o0,1086 ! if (expy >= 0x43e); + bge,pn %icc,4f ! if (expy >= 0x43e) + nop + + srl %l0,31,%l0 ! sx; + cmp %l0,0 ! if (sx == 0); + be,pn %icc,2f + or %g0,0,%l4 + + cmp %o0,1076 ! if (expy >= 0x434); + + bge,pn %icc,2f ! if (expy >= 0x434); + or %g0,2,%l4 ! yisint = 2; + + cmp %o0,1023 ! if (expy < 0x3ff); + bl,a,pn %icc,2f ! if (expy < 0x3ff); + or %g0,0,%l4 ! yisint = 0; + + cmp %o0,1043 ! if (expy <= (20 + 0x3ff)); + ble,pn %icc,1f + sub %o0,1023,%l2 ! (expy - 0x3ff); + + sub %g0,%l2,%l2 ! 0 - (expy - 0x3ff); + add %l2,52,%l2 ! sh = 52 - (expy - 0x3ff); + srl %i4,%l2,%o0 ! i0 = ly >> sh; + sll %o0,%l2,%l2 ! i0 << sh; + cmp %l2,%i4 ! if ((i0 << sh) != ly); + bne,a,pn %icc,2f ! if ((i0 << sh) != ly); + or %g0,0,%l4 ! yisint = 0; + + and %o0,1,%o0 ! i0 &= 1; + sub %g0,%o0,%o0 + + ba 2f + add %o0,2,%l4 ! yisint = 2 - (i0 & 1); +1: + cmp %i4,0 ! if (ly != 0) + bne,a,pn %icc,2f ! if (ly != 0) + or %g0,0,%l4 ! yisint = 0; + + sub %o0,1023,%l2 ! (expy - 0x3ff); + sub %g0,%l2,%l2 ! 0 - (expy - 0x3ff); + add %l2,20,%l2 ! sh = 20 - (expy - 0x3ff); + srl %l1,%l2,%o0 ! i0 = hy >> sh; + sll %o0,%l2,%l2 ! i0 << sh; + cmp %l2,%l1 ! if ((i0 << sh) != hy); + bne,a,pn %icc,2f ! if ((i0 << sh) != hy); + or %g0,0,%l4 ! yisint = 0; + + and %o0,1,%o0 ! i0 &= 1; + sub %g0,%o0,%o0 + add %o0,2,%l4 ! yisint = 2 - (i0 & 1); +2: + cmp %o2,0 ! if (sy == 0); + sll %l4,31,%l4 ! yisint << 31; + be,pt %icc,1f ! if (sy == 0); + add %l3,%l4,%l3 ! hx += yisint << 31; + + or %g0,%l4,%l3 ! hx = yisint << 31; + or %g0,0,%i2 ! lx = 0; +1: + st %l3,[pz] ! ((int*)pz)[0] = hx; + ba .update_point + st %i2,[pz+4] ! ((int*)pz)[1] = lx; +4: + cmp %i2,0 ! if (lx != 0); + bne,pn %icc,7f ! if (lx != 0); + nop + + cmp %l3,%o5 ! if (hx != 0x7ff00000); +5: + bne,pn %icc,7f ! if (hx != 0x7ff00000); + nop + + st %l3,[pz] ! ((int*)pz)[0] = hx; +6: + cmp %o2,0 ! if (sy == 0); + be,pt %icc,.update_point + st %i2,[pz+4] ! ((int*)pz)[1] = lx; + + ld [pz],%f14 ! ld *pz; + ld [pz+4],%f15 ! ld *pz; + fdivd DONE,%f14,%f14 ! *pz = DONE / *pz; + st %f14,[pz] + ba .update_point + st %f15,[pz+4] +7: + sub %l3,MASK_0x3ff00000,%o7 ! hx - 0x3ff00000; + srlx %o7,63,%l2 ! (hx - 0x3ff00000) >> 63; + cmp %l2,%o2 ! if (hx < 0x3ff00000) == sy); + be,a,pn %icc,1f ! if (hx < 0x3ff00000) == sy); + ldd [EXPTBL-ind_HUGE],%f14 ! y0 = _HUGE; + + ldd [EXPTBL-ind_TINY],%f14 ! y0 = _TINY; +1: + fmuld %f14,%f14,%f14 ! *pz = y0 * y0 + + st %f14,[pz] + ba .update_point + st %f15,[pz+4] + + .align 16 +.spec1_nan_inf: + lda [py+4]%asi,%f17 ! ld y + fmuld %f14,%f16,%f16 ! *pz = *px * *py + st %f16,[pz] + ba .update_point + st %f17,[pz+4] + + + .align 16 +.update_point: + add px,stridex,px + ba .begin1 + add py,stridey,py + + .align 64 +.stridex_zero: + + sra stridez,0,stridez + ld [%i1],%f18 ! y0 = px[0]; + ld [%i1+4],%f19 ! y0 = px[0]; + + sra %i4,0,stridey + sethi %hi(0xffc00),MASK_0x000fffff + ldd [%l0+80],%f12 ! ld MMANT + + sllx stridez,3,stridez + add MASK_0x000fffff,0x3ff,MASK_0x000fffff + ldd [%l0+8],%f56 ! ld DONE + + sllx stridey,3,stridey + ldd [%l0+88],%f14 ! ld MROUND + + ldd [%l0+96],%f16 ! ld MHI20 + cmp %o0,MASK_0x000fffff ! if (exp <= 0xfffff) + + bg,pt %icc,1f + srl %o0,20,%o0 ! exp = (exp >> 20); + + fxtod %f18,%f18 ! y0 = (double) ((long long *) & y0)[0]; + std %f18,[%fp+tmp0_hi] ! exp = ((unsigned int*) & y0)[0]; + or %g0,1074,%i2 + ld [%fp+tmp0_hi],%o0 ! exp = ((unsigned int*) & y0)[0]; + srl %o0,20,%o0 ! exp = (exp >> 20); + sub %o0,%i2,%o0 ! exp -= (1023 + 51) << 20; +1: + ldd [%l0+24],MHI32 + sub %o0,2046,%l5 ! exp = exp - 2046; + fand %f18,%f12,%f18 ! x = vis_fand(y0, MMANT); + + ldd [%l0+48],%f10 ! ld KA1 + for %f18,%f56,%f18 ! x = vis_for(x, DONE); + + ldd [EXPTBL-ind_HI],%f28 ! ld KA1_HI + fpadd32 %f18,%f14,%f44 ! ax = vis_fpadd32(x, MROUND); + + ldd [%l0+32],%f46 ! ld KA5 + fand %f44,%f16,%f60 ! ax = vis_fand(ax, MHI20); + + std %f60,[%fp+tmp0_hi] ! itmp0 = (hx >> 20); + faddd %f18,%f60,%f50 ! ux = x + ax; + + ldd [EXPTBL-ind_LO],%f52 ! ld KA1_LO + fsubd %f18,%f60,%f30 ! u = x - ax; + + ld [%fp+tmp0_hi],%i2 ! itmp0 = (hx >> 20); + fdivd %f56,%f50,%f56 ! yd = DONE / ux; + fand %f50,MHI32,%f50 ! ux = vis_fand(ux, MHI32); + + srl %i2,20,%l3 ! itmp0 = (hx >> 20); + ldd [%l0+40],%f26 ! ld KA3 + + srl %i2,8,%i2 ! i = (hx >> 8); + add %l5,%l3,%l5 ! exp += itmp0; + + and %i2,4080,%o3 ! i = i & 0xff0; + sll %l5,8,%l3 ! itmp0 = exp << 8; + st %l3,[%fp+tmp1_hi] ! (double)itmp0; + fsubd %f50,%f60,%f60 ! dtmp0 = (ux - ax); + + add %o3,8,%i2 + ldd [%o3+LOGTBL],%f58 ! y = *(double *)((char*)__mt_constlog2 + i); + + ldd [%i2+LOGTBL],%f20 ! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8); + + ld [%fp+tmp1_hi],%f8 ! (double)itmp0; + + fitod %f8,%f62 ! (double)itmp0; + + faddd %f58,%f62,%f22 ! y += (double)itmp0; + + fsubd %f18,%f60,%f62 ! s_l = (x - dtmp0); + fmuld %f30,%f56,%f16 ! s = u * yd; + + fmuld %f10,%f56,%f8 ! dtmp0 = KA1 * yd; + fand %f16,MHI32,%f58 ! s_h = vis_fand(s, MHI32); + + ldd [%l0+56],HTHRESH + fmuld %f16,%f16,%f18 ! y = s * s; + + ldd [%l0+64],LTHRESH + fmuld %f58,%f50,%f60 ! dtmp0 = s_h * ux; + + ldd [%l0+72],XKB4 + fmuld %f28,%f58,%f50 ! yd = KA1_HI * s_h; + + ldd [EXPTBL-ind_KB1],XKB1 + fmuld %f46,%f18,%f56 ! dtmp8 = KA5 * y; + + ldd [EXPTBL-ind_KB2],XKB2 + fmuld %f58,%f62,%f46 ! dtmp1 = s_h * s_l; + fsubd %f30,%f60,%f62 ! s_l = u - dtmp0; + + ldd [EXPTBL-ind_KB3],XKB3 + fmuld %f52,%f58,%f10 ! dtmp1 = KA1_LO * s_h; + faddd %f22,%f50,%f28 ! m_h = y + yd; + + ldd [EXPTBL-ind_KB5],XKB5 + faddd %f56,%f26,%f58 ! dtmp8 = dtmp8 + KA3; + + add EXPTBL,8,EXPTBL_P8 + fsubd %f62,%f46,%f46 ! s_l -= dtmp1; + + fsubd %f28,%f22,%f60 ! dtmp2 = m_h - y; + + st %g0,[%fp+tmp0_lo] ! *((int*)&dtmp0 + 1) = 0; + faddd %f20,%f10,%f56 ! dtmp0 += dtmp1; + + st %g0,[%fp+tmp1_lo] ! *((int*)&dtmp0 + 1) = 0; + fmuld %f58,%f18,%f18 ! dtmp8 = dtmp8 * y; + + st %g0,[%fp+tmp2_lo] ! *((int*)&dtmp0 + 1) = 0; + fmuld %f8,%f46,%f62 ! s_l = dtmp0 * s_l; + + fsubd %f60,%f50,%f10 ! dtmp2 -= yd; + + fmuld %f18,%f16,%f58 ! s = dtmp8 * s; + + fsubd %f10,%f62,%f46 ! dtmp2 -= s_l; + + fsubd %f58,%f46,%f50 ! y = s - dtmp2; + + faddd %f50,%f56,%f60 ! y += dtmp0; + + faddd %f60,%f28,%f18 ! dtmp0 = y + m_h; + + fand %f18,MHI32,s_h ! s_h = vis_fand(dtmp0, MHI32); + + fsubd s_h,%f28,%f62 ! dtmp0 = (s_h - m_h); + + fsubd %f60,%f62,yr ! yr = y - dtmp0; + +.xbegin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_py],py + st %g0,[%fp+tmp_counter] +.xbegin1: + subcc counter,1,counter + bneg,pn %icc,.end + nop + + lda [py]0x82,%l2 ! (Y0_3) hy = *py; + + lda [py]0x82,%f18 ! (Y0_3) yd = *py; + lda [py+4]%asi,%f19 ! (Y0_3) yd = *py; + + sra %l2,20,%l5 ! (Y0_3) expy = hy >> 20; + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + + bl,pn %icc,.xspec0 ! (Y0_3) if (expy < 0x3fb); + nop + + cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e); + + bge,pn %icc,.xspec1 ! (Y0_2) if (expy >= 0x43e); + nop + + add py,stridey,py ! y += stridey; + fand %f18,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32); + + lda [py]0x82,%l5 ! (Y1_2) hy = *py; + + lda [py]0x82,%f10 ! (Y1_2) yd = *py; + lda [py+4]%asi,%f11 ! (Y1_2) yd = *py; + + sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20; + + and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff; + + cmp %l5,959 ! (Y1_2) if (expy < 0x3fb); + add py,stridey,py ! y += stridey; + fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s; + fsubd %f18,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s); + + fmuld %f18,yr,%f26 ! (Y0_2) dtmp1 = yd * yr; + bl,pn %icc,.xupdate0 ! (Y1_2) if (expy < 0x3fb); + nop +.xcont0: + cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e); + bge,pn %icc,.xupdate1 ! (Y0_2) if (expy >= 0x43e); + nop +.xcont1: + fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h; + fand %f10,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32); + + fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH); + + faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1; + + lda [py]0x82,%l5 ! (Y2_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH; + + fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO; + + fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH); + + lda [py]0x82,%f14 ! (Y2_2) yd = *py; + lda [py+4]%asi,%f15 ! (Y2_2) yd = *py; + + sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20; + + fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add py,stridey,py ! y += stridey; + and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH; + + cmp %l5,959 ! (Y2_2) if (expy < 0x3fb); + + fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s; + bl,pn %icc,.xupdate2 ! (Y2_2) if (expy < 0x3fb); + fsubd %f10,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s); +.xcont2: + cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e); + fmuld %f10,yr,%f8 ! (Y1_2) dtmp1 = yd * yr; + faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd); + + lda [py]0x82,%l5 ! (Y0_3) hy = *py; + bge,pn %icc,.xupdate3 ! (Y2_2) if (expy >= 0x43e); + nop +.xcont3: + fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h; + fand %f14,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32); + + fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH); + + fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0; + + st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0; + + faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1; + + lda [py]0x82,%f18 ! (Y0_3) yd = *py; + lda [py+4]%asi,%f19 ! (Y0_3) yd = *py; + fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH; + + fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0; + + fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO; + + sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20; + fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH); + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + + bl,pn %icc,.xupdate4 ! (Y0_3) if (expy < 0x3fb); + nop +.xcont4: + fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO; + + fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH; + + faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd; + + ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0; + + + fsubd %f14,%f44,%f50 ! (Y2_1) dtmp0 = (yd - s); + + cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e); + + fmuld s_h,%f44,%f44 ! (Y2_1) s = s_h * s; + bge,pn %icc,.xupdate5 ! (Y0_2) if (expy >= 0x43e); + faddd %f16,%f10,%f22 ! (Y1_1) dtmp0 = (s + yd); +.xcont5: + sra %o2,8,%o0 ! (Y0_1) ind >>= 8; + add py,stridey,py ! y += stridey; + fmuld %f14,yr,%f20 ! (Y2_1) dtmp1 = yd * yr; + + add %o0,1021,%i1 ! (Y0_1) eflag = (ind + 1021); + fmuld XKB5,%f54,%f48 ! (Y0_1) dtmp0 = XKB5 * y; + + sub %g0,%o0,%o3 ! (Y0_1) gflag = (1022 - ind); + fmuld %f50,s_h,%f52 ! (Y2_1) dtmp0 *= s_h; + fand %f18,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32); + + sra %i1,31,%o1 ! (Y0_1) eflag = eflag >> 31; + add %o3,1022,%l0 ! (Y0_1) gflag = (1022 - ind); + fcmped %fcc0,%f44,HTHRESH ! (Y2_1) if (s > HTHRESH); + + sra %l0,31,%o4 ! (Y0_1) gflag = gflag >> 31; + and %o1,54,%i4 ! (Y0_1) itmp0 = 54 & eflag; + fdtoi %f22,%f4 ! (Y1_1) u = (double)(int)dtmp0; + + add %o0,%i4,%i2 ! (Y0_1) ind = ind + itmp0; + and %o4,52,%l3 ! (Y0_1) itmp1 = 52 & gflag; + st %f4,[%fp+tmp4] ! (Y1_1) ind = (int)dtmp0; + faddd %f48,XKB4,%f60 ! (Y0_1) dtmp1 = dtmp0 + XKB4; + + sub %i2,%l3,%l2 ! (Y0_1) ind = ind - itmp1; + sub %o1,%o4,%o4 ! (Y0_1) ind = eflag - gflag; + faddd %f52,%f20,%f62 ! (Y2_1) yd = dtmp0 + dtmp1; + + sll %l2,20,%o3 ! (Y0_1) ind <<= 20; + lda [py]0x82,%l5 ! (Y1_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f44 ! (Y2_1) s = HTHRESH; + + st %o3,[%fp+tmp0_hi] ! (Y0_1) *(int*)&dtmp0 = ind; + fitod %f4,%f48 ! (Y1_1) u = (double)(int)dtmp0; + + fmuld %f60,%f54,%f60 ! (Y0_1) dtmp2 = dtmp1 * y; + + lda [py]0x82,%f20 ! (Y1_2) yd = *py; + lda [py+4]%asi,%f21 ! (Y1_2) yd = *py; + fmovdg %fcc0,DZERO,%f62 ! (Y2_1) yd = DZERO; + + fcmped %fcc1,%f44,LTHRESH ! (Y2_1) if (s < LTHRESH); + + fsubd %f16,%f48,%f50 ! (Y1_1) y = s - u; + + faddd %f60,XKB3,%f60 ! (Y0_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20; + + fmovdl %fcc1,DZERO,%f62 ! (Y2_1) yd = DZERO; + + and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f44 ! (Y2_1) s = LTHRESH; + + cmp %l5,959 ! (Y1_2) if (expy < 0x3fb); + fmuld %f60,%f54,%f48 ! (Y0_1) dtmp4 = dtmp3 * y; + faddd %f50,%f10,%f52 ! (Y1_1) y = y + yd; + + ld [%fp+tmp4],%o1 ! (Y1_1) ind = (int)dtmp0; + + add py,stridey,py ! y += stridey; + fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s; + fsubd %f18,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s); + + fmuld %f18,yr,%f26 ! (Y0_2) dtmp1 = yd * yr; + bl,pn %icc,.xupdate6 ! (Y1_2) if (expy < 0x3fb); + faddd %f44,%f62,%f28 ! (Y2_1) dtmp0 = (s + yd); +.xcont6: + sra %o1,8,%o3 ! (Y1_1) ind >>= 8; + cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e); + fmuld XKB5,%f52,%f22 ! (Y1_1) dtmp0 = XKB5 * y; + faddd %f48,XKB2,%f14 ! (Y0_1) dtmp5 = dtmp4 + XKB2; + + add %o3,1021,%o0 ! (Y1_1) eflag = (ind + 1021); + bge,pn %icc,.xupdate7 ! (Y0_2) if (expy >= 0x43e); + nop +.xcont7: + sub %g0,%o3,%i2 ! (Y1_1) gflag = (1022 - ind); + fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h; + fand %f20,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32); + + sra %o0,31,%l3 ! (Y1_1) eflag = eflag >> 31; + add %i2,1022,%l2 ! (Y1_1) gflag = (1022 - ind); + fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH); + + sra %l2,31,%o7 ! (Y1_1) gflag = gflag >> 31; + and %l3,54,%i1 ! (Y1_1) itmp0 = 54 & eflag; + fdtoi %f28,%f3 ! (Y2_1) u = (double)(int)dtmp0; + + add %o3,%i1,%l0 ! (Y1_1) ind = ind + itmp0; + and %o7,52,%l1 ! (Y1_1) itmp1 = 52 & gflag; + st %f3,[%fp+ind_buf] ! (Y2_1) ind = (int)dtmp0; + faddd %f22,XKB4,%f60 ! (Y1_1) dtmp1 = dtmp0 + XKB4; + + sub %l0,%l1,%i4 ! (Y1_1) ind = ind - itmp1; + sub %l3,%o7,%o7 ! (Y1_1) ind = eflag - gflag; + faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1; + + sll %i4,20,%i2 ! (Y1_1) ind <<= 20; + lda [py]0x82,%l5 ! (Y2_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH; + + st %i2,[%fp+tmp1_hi] ! (Y1_1) *(int*)&dtmp0 = ind; + fitod %f3,%f18 ! (Y2_1) u = (double)(int)dtmp0; + + fmuld %f60,%f52,%f60 ! (Y1_1) dtmp2 = dtmp1 * y; + + fmuld %f14,%f54,%f56 ! (Y0_1) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO; + + fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH); + + lda [py]0x82,%f26 ! (Y2_2) yd = *py; + lda [py+4]%asi,%f27 ! (Y2_2) yd = *py; + fsubd %f44,%f18,%f18 ! (Y2_1) y = s - u; + + faddd %f60,XKB3,%f44 ! (Y1_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20; + and %o2,255,%o2 ! (Y0_1) i = ind & 0xff; + faddd %f56,XKB1,%f58 ! (Y0_1) dtmp7 = dtmp6 + XKB1; + + sll %o2,4,%l2 ! (Y0_1) i = i << 4; + fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add py,stridey,py ! y += stridey; + and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH; + + cmp %l5,959 ! (Y2_2) if (expy < 0x3fb); + ldd [EXPTBL+%l2],%f22 ! (Y0_1) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f18,%f62,%f18 ! (Y2_1) y = y + yd; + fmuld %f44,%f52,%f62 ! (Y1_1) dtmp4 = dtmp3 * y; + + ld [%fp+ind_buf],%l1 ! (Y2_1) ind = (int)dtmp0; + fmuld %f58,%f54,%f54 ! (Y0_1) y = dtmp7 * y; + + fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s; + bl,pn %icc,.xupdate8 ! (Y2_2) if (expy < 0x3fb); + fsubd %f20,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s); +.xcont8: + cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e); + fmuld %f20,yr,%f8 ! (Y1_2) dtmp1 = yd * yr; + faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd); + + sra %l1,8,%o2 ! (Y2_1) ind >>= 8; + lda [py]0x82,%l5 ! (Y0_3) hy = *py; + fmuld XKB5,%f18,%f20 ! (Y2_1) dtmp0 = XKB5 * y; + faddd %f62,XKB2,%f12 ! (Y1_1) dtmp5 = dtmp4 + XKB2; + + add %o2,1021,%l0 ! (Y2_1) eflag = (ind + 1021); + bge,pn %icc,.xupdate9 ! (Y2_2) if (expy >= 0x43e); + nop +.xcont9: + sub %g0,%o2,%l3 ! (Y2_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l2],%f14 ! (Y0_1) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h; + fand %f26,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32); + + sra %l0,31,%o0 ! (Y2_1) eflag = eflag >> 31; + add %l3,1022,%i4 ! (Y2_1) gflag = (1022 - ind); + fmuld %f22,%f54,%f56 ! (Y0_1) dtmp1 = u * y; + fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH); + + sra %i4,31,%o5 ! (Y2_1) gflag = gflag >> 31; + and %o0,54,%i2 ! (Y2_1) itmp0 = 54 & eflag; + fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0; + + add %o2,%i2,%i1 ! (Y2_1) ind = ind + itmp0; + and %o5,52,%l2 ! (Y2_1) itmp1 = 52 & gflag; + st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0; + faddd %f20,XKB4,%f60 ! (Y2_1) dtmp1 = dtmp0 + XKB4; + + sub %i1,%l2,%o3 ! (Y2_1) ind = ind - itmp1; + sub %o0,%o5,%o5 ! (Y2_1) ind = eflag - gflag; + faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1; + + sll %o3,20,%l3 ! (Y2_1) ind <<= 20; + lda [py]0x82,%f28 ! (Y0_3) yd = *py; + lda [py+4]%asi,%f29 ! (Y0_3) yd = *py; + fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH; + + st %l3,[%fp+tmp2_hi] ! (Y2_1) *(int*)&dtmp0 = ind; + fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0; + + fmuld %f60,%f18,%f60 ! (Y2_1) dtmp2 = dtmp1 * y; + faddd %f14,%f56,%f20 ! (Y0_1) dtmp2 = dtmp0 + dtmp1; + + fmuld %f12,%f52,%f56 ! (Y1_1) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO; + + sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20; + fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH); + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + faddd %f60,XKB3,%f60 ! (Y2_1) dtmp3 = dtmp2 + XKB3; + + and %o1,255,%o1 ! (Y1_1) i = ind & 0xff; + bl,pn %icc,.xupdate10 ! (Y0_3) if (expy < 0x3fb); + faddd %f56,XKB1,%f8 ! (Y1_1) dtmp7 = dtmp6 + XKB1; +.xcont10: + sll %o1,4,%l0 ! (Y1_1) i = i << 4; + fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO; + + nop + ba 1f + fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH; + + .align 16 +1: + subcc counter,2,counter + ldd [EXPTBL+%l0],%f56 ! (Y1_1) u = *(double*)((char*)__mt_constexp2 + i); + fmuld %f60,%f18,%f58 ! (Y2_1) dtmp4 = dtmp3 * y; + faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd; + + fmuld %f8,%f52,%f60 ! (Y1_1) y = dtmp7 * y; + ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0; + bneg,pn %icc,.xtail + faddd %f20,%f22,%f12 ! (Y0_1) u = dtmp2 + u; + +.xmain_loop: + cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e); + add %o4,513,%o4 ! (Y0_0) ind += 513; + ldd [%fp+tmp0_hi],%f52 ! (Y0_0) *(int*)&dtmp0 = ind; + fsubd %f26,%f44,%f50 ! (Y2_1) dtmp0 = (yd - s); + + fmuld s_h,%f44,%f44 ! (Y2_1) s = s_h * s; + sra %o2,8,%o0 ! (Y0_1) ind >>= 8; + bge,pn %icc,.xupdate11 ! (Y0_2) if (expy >= 0x43e); + faddd %f16,%f10,%f22 ! (Y1_1) dtmp0 = (s + yd); +.xcont11: + sll %o4,3,%l2 ! (Y0_0) ind *= 8; + add py,stridey,py ! y += stridey; + fmuld %f26,yr,%f20 ! (Y2_1) dtmp1 = yd * yr; + faddd %f58,XKB2,%f14 ! (Y2_0) dtmp5 = dtmp4 + XKB2; + + add %o0,1021,%i1 ! (Y0_1) eflag = (ind + 1021); + ldd [%l2+EXPTBL],%f62 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fmuld XKB5,%f54,%f48 ! (Y0_1) dtmp0 = XKB5 * y; + fpadd32 %f12,%f52,%f58 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + sub %g0,%o0,%o3 ! (Y0_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l0],%f8 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fand %f28,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32); + fmuld %f50,s_h,%f52 ! (Y2_1) dtmp0 *= s_h; + + sra %i1,31,%o1 ! (Y0_1) eflag = eflag >> 31; + add %o3,1022,%l0 ! (Y0_1) gflag = (1022 - ind); + fmuld %f56,%f60,%f26 ! (Y1_0) dtmp1 = u * y; + fcmped %fcc0,%f44,HTHRESH ! (Y2_1) if (s > HTHRESH); + + sra %l0,31,%o4 ! (Y0_1) gflag = gflag >> 31; + and %o1,54,%i4 ! (Y0_1) itmp0 = 54 & eflag; + fmuld %f58,%f62,%f6 ! (Y0_0) dtmp1 = u * dtmp1; + fdtoi %f22,%f4 ! (Y1_1) u = (double)(int)dtmp0; + + add %o0,%i4,%i2 ! (Y0_1) ind = ind + itmp0; + and %o4,52,%l3 ! (Y0_1) itmp1 = 52 & gflag; + st %f4,[%fp+tmp4] ! (Y1_1) ind = (int)dtmp0; + faddd %f48,XKB4,%f60 ! (Y0_1) dtmp1 = dtmp0 + XKB4; + + sub %i2,%l3,%l2 ! (Y0_1) ind = ind - itmp1; + sub %o1,%o4,%o4 ! (Y0_1) ind = eflag - gflag; + st %f6,[pz] ! (Y0_0) write into memory + faddd %f52,%f20,%f62 ! (Y2_1) yd = dtmp0 + dtmp1; + + sll %l2,20,%o3 ! (Y0_1) ind <<= 20; + nop + st %o3,[%fp+tmp0_hi] ! (Y0_1) *(int*)&dtmp0 = ind; + fmovdg %fcc0,HTHRESH,%f44 ! (Y2_1) s = HTHRESH; + + lda [py]0x82,%l5 ! (Y1_2) hy = *py; + nop + fitod %f4,%f48 ! (Y1_1) u = (double)(int)dtmp0; + + fmuld %f60,%f54,%f60 ! (Y0_1) dtmp2 = dtmp1 * y; + nop + st %f7,[pz+4] ! (Y0_0) write into memory + faddd %f8,%f26,%f26 ! (Y1_0) dtmp2 = dtmp0 + dtmp1; + + lda [py]0x82,%f8 ! (Y1_2) yd = *py; + nop + fmuld %f14,%f18,%f52 ! (Y2_0) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f62 ! (Y2_1) yd = DZERO; + + lda [py+4]%asi,%f9 ! (Y1_2) yd = *py; + add pz,stridez,pz ! z += stridez; + fcmped %fcc1,%f44,LTHRESH ! (Y2_1) if (s < LTHRESH); + + fsubd %f16,%f48,%f50 ! (Y1_1) y = s - u; + + faddd %f60,XKB3,%f60 ! (Y0_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20; + and %l1,255,%l1 ! (Y2_0) i = ind & 0xff; + faddd %f52,XKB1,%f58 ! (Y2_0) dtmp7 = dtmp6 + XKB1; + + sll %l1,4,%l0 ! (Y2_0) i = i << 4; + fmovdl %fcc1,DZERO,%f62 ! (Y2_1) yd = DZERO; + + and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff; + nop + fmovdl %fcc1,LTHRESH,%f44 ! (Y2_1) s = LTHRESH; + + cmp %l5,959 ! (Y1_2) if (expy < 0x3fb); + ldd [EXPTBL+%l0],%f20 ! (Y2_0) u = *(double*)((char*)__mt_constexp2 + i); + fmuld %f60,%f54,%f48 ! (Y0_1) dtmp4 = dtmp3 * y; + faddd %f50,%f10,%f52 ! (Y1_1) y = y + yd; + + add %o7,513,%o7 ! (Y1_0) ind += 513; + ld [%fp+tmp4],%o1 ! (Y1_1) ind = (int)dtmp0; + fmuld %f58,%f18,%f18 ! (Y2_0) y = dtmp7 * y; + faddd %f26,%f56,%f58 ! (Y1_0) u = dtmp2 + u; + + add py,stridey,py ! y += stridey; + ldd [%fp+tmp1_hi],%f60 ! (Y1_0) *(int*)&dtmp0 = ind; + fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s; + fsubd %f28,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s); + + sll %o7,3,%l3 ! (Y1_0) ind *= 8; + fmuld %f28,yr,%f26 ! (Y0_2) dtmp1 = yd * yr; + bl,pn %icc,.xupdate12 ! (Y1_2) if (expy < 0x3fb); + faddd %f44,%f62,%f28 ! (Y2_1) dtmp0 = (s + yd); +.xcont12: + sra %o1,8,%o3 ! (Y1_1) ind >>= 8; + cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e); + fmuld XKB5,%f52,%f22 ! (Y1_1) dtmp0 = XKB5 * y; + faddd %f48,XKB2,%f14 ! (Y0_1) dtmp5 = dtmp4 + XKB2; + + add %o3,1021,%o0 ! (Y1_1) eflag = (ind + 1021); + ldd [%l3+EXPTBL],%f48 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + bge,pn %icc,.xupdate13 ! (Y1_2) if (expy >= 0x43e); + fpadd32 %f58,%f60,%f60 ! (Y1_0) u = vis_fpadd32(u, dtmp0); +.xcont13: + sub %g0,%o3,%i2 ! (Y1_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l0],%f16 ! (Y2_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h; + fand %f8,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32); + + sra %o0,31,%l3 ! (Y1_1) eflag = eflag >> 31; + add %i2,1022,%l2 ! (Y1_1) gflag = (1022 - ind); + fmuld %f20,%f18,%f56 ! (Y2_0) dtmp1 = u * y; + fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH); + + sra %l2,31,%o7 ! (Y1_1) gflag = gflag >> 31; + and %l3,54,%i1 ! (Y1_1) itmp0 = 54 & eflag; + fmuld %f60,%f48,%f18 ! (Y1_0) dtmp1 = u * dtmp1; + fdtoi %f28,%f3 ! (Y2_1) u = (double)(int)dtmp0; + + add %o3,%i1,%l0 ! (Y1_1) ind = ind + itmp0; + and %o7,52,%l1 ! (Y1_1) itmp1 = 52 & gflag; + st %f3,[%fp+ind_buf] ! (Y2_1) ind = (int)dtmp0; + faddd %f22,XKB4,%f60 ! (Y1_1) dtmp1 = dtmp0 + XKB4; + + sub %l0,%l1,%i4 ! (Y1_1) ind = ind - itmp1; + sub %l3,%o7,%o7 ! (Y1_1) ind = eflag - gflag; + st %f18,[pz] ! (Y1_0) write into memory + faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1; + + sll %i4,20,%i2 ! (Y1_1) ind <<= 20; + lda [py]0x82,%l5 ! (Y2_2) hy = *py; + fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH; + + st %i2,[%fp+tmp1_hi] ! (Y1_1) *(int*)&dtmp0 = ind; + fitod %f3,%f10 ! (Y2_1) u = (double)(int)dtmp0; + + fmuld %f60,%f52,%f60 ! (Y1_1) dtmp2 = dtmp1 * y; + st %f19,[pz+4] ! (Y1_0) write into memory + faddd %f16,%f56,%f28 ! (Y2_0) dtmp2 = dtmp0 + dtmp1; + + fmuld %f14,%f54,%f56 ! (Y0_1) dtmp6 = dtmp5 * y; + fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add pz,stridez,pz ! z += stridez; + fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH); + + lda [py]0x82,%f26 ! (Y2_2) yd = *py; + fsubd %f44,%f10,%f18 ! (Y2_1) y = s - u; + + lda [py+4]%asi,%f27 ! (Y2_2) yd = *py; + faddd %f60,XKB3,%f44 ! (Y1_1) dtmp3 = dtmp2 + XKB3; + + sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20; + and %o2,255,%o2 ! (Y0_1) i = ind & 0xff; + faddd %f56,XKB1,%f58 ! (Y0_1) dtmp7 = dtmp6 + XKB1; + + sll %o2,4,%l2 ! (Y0_1) i = i << 4; + fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO; + + add py,stridey,py ! y += stridey; + and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff; + fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH; + + cmp %l5,959 ! (Y2_2) if (expy < 0x3fb); + ldd [EXPTBL+%l2],%f22 ! (Y0_1) u = *(double*)((char*)__mt_constexp2 + i); + faddd %f18,%f62,%f18 ! (Y2_1) y = y + yd; + fmuld %f44,%f52,%f62 ! (Y1_1) dtmp4 = dtmp3 * y; + + add %o5,513,%o5 ! (Y2_0) ind += 513; + ld [%fp+ind_buf],%l1 ! (Y2_1) ind = (int)dtmp0; + fmuld %f58,%f54,%f54 ! (Y0_1) y = dtmp7 * y; + faddd %f28,%f20,%f58 ! (Y2_0) u = dtmp2 + u; + + ldd [%fp+tmp2_hi],%f60 ! (Y2_0) *(int*)&dtmp0 = ind; + fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s; + bl,pn %icc,.xupdate14 ! (Y2_2) if (expy < 0x3fb); + fsubd %f8,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s); +.xcont14: + sll %o5,3,%i1 ! (Y2_0) ind *= 8; + cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e); + fmuld %f8,yr,%f8 ! (Y1_2) dtmp1 = yd * yr; + faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd); + + sra %l1,8,%o2 ! (Y2_1) ind >>= 8; + lda [py]0x82,%l5 ! (Y0_3) hy = *py; + fmuld XKB5,%f18,%f20 ! (Y2_1) dtmp0 = XKB5 * y; + faddd %f62,XKB2,%f12 ! (Y1_1) dtmp5 = dtmp4 + XKB2; + + add %o2,1021,%l0 ! (Y2_1) eflag = (ind + 1021); + ldd [%i1+EXPTBL],%f62 ! (Y2_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + bge,pn %icc,.xupdate15 ! (Y2_2) if (expy >= 0x43e); + fpadd32 %f58,%f60,%f60 ! (Y2_0) u = vis_fpadd32(u, dtmp0); +.xcont15: + sub %g0,%o2,%l3 ! (Y2_1) gflag = (1022 - ind); + ldd [EXPTBL_P8+%l2],%f14 ! (Y0_1) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h; + fand %f26,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32); + + sra %l0,31,%o0 ! (Y2_1) eflag = eflag >> 31; + add %l3,1022,%i4 ! (Y2_1) gflag = (1022 - ind); + fmuld %f22,%f54,%f56 ! (Y0_1) dtmp1 = u * y; + fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH); + + sra %i4,31,%o5 ! (Y2_1) gflag = gflag >> 31; + and %o0,54,%i2 ! (Y2_1) itmp0 = 54 & eflag; + fmuld %f60,%f62,%f6 ! (Y2_0) dtmp1 = u * dtmp1; + fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0; + + add %o2,%i2,%i1 ! (Y2_1) ind = ind + itmp0; + and %o5,52,%l2 ! (Y2_1) itmp1 = 52 & gflag; + st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0; + faddd %f20,XKB4,%f60 ! (Y2_1) dtmp1 = dtmp0 + XKB4; + + sub %i1,%l2,%o3 ! (Y2_1) ind = ind - itmp1; + sub %o0,%o5,%o5 ! (Y2_1) ind = eflag - gflag; + st %f6,[pz] ! (Y2_0) write into memory + faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1; + + sll %o3,20,%l3 ! (Y2_1) ind <<= 20; + lda [py]0x82,%f28 ! (Y0_3) yd = *py; + fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH; + + lda [py+4]%asi,%f29 ! (Y0_3) yd = *py; + fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0; + + fmuld %f60,%f18,%f60 ! (Y2_1) dtmp2 = dtmp1 * y; + st %l3,[%fp+tmp2_hi] ! (Y2_1) *(int*)&dtmp0 = ind; + faddd %f14,%f56,%f20 ! (Y0_1) dtmp2 = dtmp0 + dtmp1; + + fmuld %f12,%f52,%f56 ! (Y1_1) dtmp6 = dtmp5 * y; + st %f7,[pz+4] ! (Y2_0) write into memory + fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO; + + sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20; + add pz,stridez,pz ! z += stridez; + fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH); + + and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff; + fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u; + + cmp %l5,959 ! (Y0_3) if (expy < 0x3fb); + faddd %f60,XKB3,%f60 ! (Y2_1) dtmp3 = dtmp2 + XKB3; + + and %o1,255,%o1 ! (Y1_1) i = ind & 0xff; + bl,pn %icc,.xupdate16 ! (Y0_3) if (expy < 0x3fb); + faddd %f56,XKB1,%f8 ! (Y1_1) dtmp7 = dtmp6 + XKB1; +.xcont16: + sll %o1,4,%l0 ! (Y1_1) i = i << 4; + fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO; + + subcc counter,3,counter ! update cycle counter + fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH; + + ldd [EXPTBL+%l0],%f56 ! (Y1_1) u = *(double*)((char*)__mt_constexp2 + i); + fmuld %f60,%f18,%f58 ! (Y2_1) dtmp4 = dtmp3 * y; + faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd; + + fmuld %f8,%f52,%f60 ! (Y1_1) y = dtmp7 * y; + ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0; + bpos,pt %icc,.xmain_loop + faddd %f20,%f22,%f12 ! (Y0_1) u = dtmp2 + u; + +.xtail: + addcc counter,2,counter + ldd [%fp+tmp0_hi],%f52 ! (Y0_0) *(int*)&dtmp0 = ind; + + add %o4,513,%o4 ! (Y0_0) ind += 513; + bneg,pn %icc,.xend_loop + nop + + sll %o4,3,%l2 ! (Y0_0) ind *= 8; + + subcc counter,1,counter + ldd [%l2+EXPTBL],%f62 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f12,%f52,%f58 ! (Y0_0) u = vis_fpadd32(u, dtmp0); + + ldd [EXPTBL_P8+%l0],%f8 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8); + + fmuld %f56,%f60,%f26 ! (Y1_0) dtmp1 = u * y; + + fmuld %f58,%f62,%f6 ! (Y0_0) dtmp1 = u * dtmp1; + + st %f6,[pz] ! (Y0_0) write into memory + st %f7,[pz+4] ! (Y0_0) write into memory + bneg,pn %icc,.xend_loop + add pz,stridez,pz ! z += stridez; + + faddd %f8,%f26,%f26 ! (Y1_0) dtmp2 = dtmp0 + dtmp1; + + add %o7,513,%o7 ! (Y1_0) ind += 513; + faddd %f26,%f56,%f58 ! (Y1_0) u = dtmp2 + u; + + ldd [%fp+tmp1_hi],%f60 ! (Y1_0) *(int*)&dtmp0 = ind; + + sll %o7,3,%l3 ! (Y1_0) ind *= 8; + + ldd [%l3+EXPTBL],%f48 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind); + fpadd32 %f58,%f60,%f60 ! (Y1_0) u = vis_fpadd32(u, dtmp0); + + fmuld %f60,%f48,%f18 ! (Y1_0) dtmp1 = u * dtmp1; + + st %f18,[pz] ! (Y1_0) write into memory + st %f19,[pz+4] ! (Y1_0) write into memory + add pz,stridez,pz ! z += stridez; + +.xend_loop: + ba .xbegin + nop + + .align 16 +.xupdate0: + cmp counter,0 + sub py,stridey,%i2 + ble,pt %icc,.xcont0 + fmovd DZERO,%f10 + + stx %i2,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont0 + or %g0,0,counter + + .align 16 +.xupdate1: + cmp counter,0 + sub py,stridey,%i2 + ble,pt %icc,.xcont1 + fmovd DZERO,%f10 + + stx %i2,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont1 + or %g0,0,counter + + .align 16 +.xupdate2: + cmp counter,1 + sub py,stridey,%l3 + ble,pt %icc,.xcont2 + fmovd DZERO,%f14 + + stx %l3,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .xcont2 + or %g0,1,counter + + .align 16 +.xupdate3: + cmp counter,1 + sub py,stridey,%l3 + ble,pt %icc,.xcont3 + fmovd DZERO,%f14 + + stx %l3,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .xcont3 + or %g0,1,counter + + .align 16 +.xupdate4: + cmp counter,2 + ble,pt %icc,.xcont4 + fmovd DZERO,%f18 + + stx py,[%fp+tmp_py] + sub counter,2,counter + + st counter,[%fp+tmp_counter] + ba .xcont4 + or %g0,2,counter + + .align 16 +.xupdate5: + cmp counter,2 + ble,pt %icc,.xcont5 + fmovd DZERO,%f18 + + stx py,[%fp+tmp_py] + sub counter,2,counter + + st counter,[%fp+tmp_counter] + ba .xcont5 + or %g0,2,counter + + .align 16 +.xupdate6: + cmp counter,3 + sub py,stridey,%i2 + ble,pt %icc,.xcont6 + fmovd DZERO,%f20 + + stx %i2,[%fp+tmp_py] + sub counter,3,counter + + st counter,[%fp+tmp_counter] + ba .xcont6 + or %g0,3,counter + + .align 16 +.xupdate7: + cmp counter,3 + sub py,stridey,%i2 + ble,pt %icc,.xcont7 + fmovd DZERO,%f20 + + stx %i2,[%fp+tmp_py] + sub counter,3,counter + + st counter,[%fp+tmp_counter] + ba .xcont7 + or %g0,3,counter + + .align 16 +.xupdate8: + cmp counter,4 + sub py,stridey,%l3 + ble,pt %icc,.xcont8 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont8 + or %g0,4,counter + + .align 16 +.xupdate9: + cmp counter,4 + sub py,stridey,%l3 + ble,pt %icc,.xcont9 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont9 + or %g0,4,counter + + .align 16 +.xupdate10: + cmp counter,5 + ble,pt %icc,.xcont10 + fmovd DZERO,%f28 + + stx py,[%fp+tmp_py] + sub counter,5,counter + + st counter,[%fp+tmp_counter] + ba .xcont10 + or %g0,5,counter + + .align 16 +.xupdate11: + cmp counter,3 + ble,pt %icc,.xcont11 + fmovd DZERO,%f28 + + stx py,[%fp+tmp_py] + sub counter,3,counter + + st counter,[%fp+tmp_counter] + ba .xcont11 + or %g0,3,counter + + .align 16 +.xupdate12: + cmp counter,4 + sub py,stridey,%i2 + ble,pt %icc,.xcont12 + fmovd DZERO,%f8 + + stx %i2,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont12 + or %g0,4,counter + + .align 16 +.xupdate13: + cmp counter,4 + sub py,stridey,%i2 + ble,pt %icc,.xcont13 + fmovd DZERO,%f8 + + stx %i2,[%fp+tmp_py] + sub counter,4,counter + + st counter,[%fp+tmp_counter] + ba .xcont13 + or %g0,4,counter + + .align 16 +.xupdate14: + cmp counter,5 + sub py,stridey,%l3 + ble,pt %icc,.xcont14 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,5,counter + + st counter,[%fp+tmp_counter] + ba .xcont14 + or %g0,5,counter + + .align 16 +.xupdate15: + cmp counter,5 + sub py,stridey,%l3 + ble,pt %icc,.xcont15 + fmovd DZERO,%f26 + + stx %l3,[%fp+tmp_py] + sub counter,5,counter + + st counter,[%fp+tmp_counter] + ba .xcont15 + or %g0,5,counter + + .align 16 +.xupdate16: + cmp counter,6 + ble,pt %icc,.xcont16 + fmovd DZERO,%f28 + + stx py,[%fp+tmp_py] + sub counter,6,counter + + st counter,[%fp+tmp_counter] + ba .xcont16 + or %g0,6,counter + + .align 16 +.xspec0: + add EXPTBL,4095,%l0 + add %l0,1,%l0 + ldd [%l0+8],%f20 ! ld DONE + st %f20,[pz] ! *pz = DONE; + ba .xupdate_point + st %f21,[pz+4] ! *pz = DONE; + + .align 16 +.xspec1: + ldx [%fp+tmp_px],%l1 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + + sethi %hi(0x7ff00000),%o3 + add MASK_0x7fffffff,0x3ff,MASK_0x7fffffff + + and %l2,MASK_0x7fffffff,%o2 ! if (hy &= 0x7fffffff); + sethi %hi(0x3ff00000),MASK_0x3ff00000 + + cmp %o2,%o3 ! if (hy != 0x7ff00000); + bne,pn %icc,2f ! if (hy != 0x7ff00000); + nop + + ld [py+4],%l3 ! ld ly; + cmp %l3,0 ! if (ly != 0); + bne,a,pt %icc,3f ! if (ly != 0); + nop + + ld [%l1],%i1 ! ld hx; + cmp %i1,MASK_0x3ff00000 ! if (hx != 0x3ff00000); + bne,a,pn %icc,1f ! if (hx != 0x3ff00000); + srl %l2,31,%o7 ! sy = hy >> 31; + + ld [%l1+4],%i2 ! ld lx; + cmp %i2,0 ! if (lx != 0); + bne,pn %icc,1f ! if (lx != 0); + srl %l2,31,%o7 ! sy = hy >> 31; + + fzero %f28 + fmuld %f18,%f28,%f28 ! *pz = *py * 0.0; + st %f28,[pz] + ba .xupdate_point + st %f29,[pz+4] +1: + sub %i1,MASK_0x3ff00000,%o0 ! hx - 0x3ff00000; + srlx %o0,63,%o0 ! (hx - 0x3ff00000) >> 63; + + cmp %o0,%o7 ! if ((hx < 0x3ff00000) == sy); + be,pn %icc,1f ! if ((hx < 0x3ff00000) == sy); + + st DZERO_HI,[pz] + ba .xupdate_point + st DZERO_LO,[pz+4] +1: + st %o2,[pz] ! ((int*)pz)[0] = hy; + ba .xupdate_point + st %l3,[pz+4] ! ((int*)pz)[1] = ly; +2: + bl,a,pn %icc,1f ! if (hy < 0x7ff00000); + ld [%l1+4],%i2 ! ld lx; +3: + ld [%l1],%f20 ! x = *px; + ld [%l1+4],%f21 ! x = *px; + fmuld %f20,%f18,%f28 ! *pz = *px * *py; + st %f28,[pz] + ba .xupdate_point + st %f29,[pz+4] +1: + ld [%l1],%i1 ! ld hx; + cmp %i2,0 ! if (lx != 0); + bne,pn %icc,1f ! if (lx != 0); + nop + + cmp %i1,MASK_0x3ff00000 ! if (hx != 0x3ff00000); + add EXPTBL,4095,%l0 + bne,pn %icc,1f ! if (hx != 0x3ff00000); + add %l0,1,%l0 + + ldd [%l0+8],%f20 ! ld DONE + st %f20,[pz] ! *pz = DONE; + ba .xupdate_point + st %f21,[pz+4] ! *pz = DONE; +1: + srl %l2,31,%o7 ! sy = hy >> 31; + sub %i1,MASK_0x3ff00000,%o0 ! hx - 0x3ff00000; + + srlx %o0,63,%o0 ! (hx - 0x3ff00000) >> 63; + + cmp %o0,%o7 ! if (hx < 0x3ff00000) == sy); + be,a,pn %icc,1f ! if (hx < 0x3ff00000) == sy); + ldd [EXPTBL-ind_HUGE],%f20 ! y0 = _HUGE; + + ldd [EXPTBL-ind_TINY],%f20 ! y0 = _TINY; +1: + fmuld %f20,%f20,%f20 ! *pz = y0 * y0 + st %f20,[pz] + ba .xupdate_point + st %f21,[pz+4] + +.xupdate_point: + add py,stridey,py + ba .xbegin1 + add pz,stridez,pz + + SET_SIZE(__vpow) + diff --git a/usr/src/libm/src/mvec/vis/__vpowf.S b/usr/src/libm/src/mvec/vis/__vpowf.S new file mode 100644 index 0000000..f6e7722 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vpowf.S @@ -0,0 +1,3138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vpowf.S 1.7 06/01/23 SMI" + + .file "__vpowf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +! __mt_constexp2fa: + .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf + .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 + .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc + .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 + .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 + .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 + .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 + .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 + .word 0x3ff0b558, 0x6cf9890f, 0x3ff0c0f1, 0x45e46c85 + .word 0x3ff0cc92, 0x2b7247f7, 0x3ff0d83b, 0x23395dec + .word 0x3ff0e3ec, 0x32d3d1a2, 0x3ff0efa5, 0x5fdfa9c5 + .word 0x3ff0fb66, 0xaffed31b, 0x3ff10730, 0x28d7233e + .word 0x3ff11301, 0xd0125b51, 0x3ff11edb, 0xab5e2ab6 + .word 0x3ff12abd, 0xc06c31cc, 0x3ff136a8, 0x14f204ab + .word 0x3ff1429a, 0xaea92de0, 0x3ff14e95, 0x934f312e + .word 0x3ff15a98, 0xc8a58e51, 0x3ff166a4, 0x5471c3c2 + .word 0x3ff172b8, 0x3c7d517b, 0x3ff17ed4, 0x8695bbc0 + .word 0x3ff18af9, 0x388c8dea, 0x3ff19726, 0x58375d2f + .word 0x3ff1a35b, 0xeb6fcb75, 0x3ff1af99, 0xf8138a1c + .word 0x3ff1bbe0, 0x84045cd4, 0x3ff1c82f, 0x95281c6b + .word 0x3ff1d487, 0x3168b9aa, 0x3ff1e0e7, 0x5eb44027 + .word 0x3ff1ed50, 0x22fcd91d, 0x3ff1f9c1, 0x8438ce4d + .word 0x3ff2063b, 0x88628cd6, 0x3ff212be, 0x3578a819 + .word 0x3ff21f49, 0x917ddc96, 0x3ff22bdd, 0xa27912d1 + .word 0x3ff2387a, 0x6e756238, 0x3ff2451f, 0xfb82140a + .word 0x3ff251ce, 0x4fb2a63f, 0x3ff25e85, 0x711ece75 + .word 0x3ff26b45, 0x65e27cdd, 0x3ff2780e, 0x341ddf29 + .word 0x3ff284df, 0xe1f56381, 0x3ff291ba, 0x7591bb70 + .word 0x3ff29e9d, 0xf51fdee1, 0x3ff2ab8a, 0x66d10f13 + .word 0x3ff2b87f, 0xd0dad990, 0x3ff2c57e, 0x39771b2f + .word 0x3ff2d285, 0xa6e4030b, 0x3ff2df96, 0x1f641589 + .word 0x3ff2ecaf, 0xa93e2f56, 0x3ff2f9d2, 0x4abd886b + .word 0x3ff306fe, 0x0a31b715, 0x3ff31432, 0xedeeb2fd + .word 0x3ff32170, 0xfc4cd831, 0x3ff32eb8, 0x3ba8ea32 + .word 0x3ff33c08, 0xb26416ff, 0x3ff34962, 0x66e3fa2d + .word 0x3ff356c5, 0x5f929ff1, 0x3ff36431, 0xa2de883b + .word 0x3ff371a7, 0x373aa9cb, 0x3ff37f26, 0x231e754a + .word 0x3ff38cae, 0x6d05d866, 0x3ff39a40, 0x1b7140ef + .word 0x3ff3a7db, 0x34e59ff7, 0x3ff3b57f, 0xbfec6cf4 + .word 0x3ff3c32d, 0xc313a8e5, 0x3ff3d0e5, 0x44ede173 + .word 0x3ff3dea6, 0x4c123422, 0x3ff3ec70, 0xdf1c5175 + .word 0x3ff3fa45, 0x04ac801c, 0x3ff40822, 0xc367a024 + .word 0x3ff4160a, 0x21f72e2a, 0x3ff423fb, 0x2709468a + .word 0x3ff431f5, 0xd950a897, 0x3ff43ffa, 0x3f84b9d4 + .word 0x3ff44e08, 0x6061892d, 0x3ff45c20, 0x42a7d232 + .word 0x3ff46a41, 0xed1d0057, 0x3ff4786d, 0x668b3237 + .word 0x3ff486a2, 0xb5c13cd0, 0x3ff494e1, 0xe192aed2 + .word 0x3ff4a32a, 0xf0d7d3de, 0x3ff4b17d, 0xea6db7d7 + .word 0x3ff4bfda, 0xd5362a27, 0x3ff4ce41, 0xb817c114 + .word 0x3ff4dcb2, 0x99fddd0d, 0x3ff4eb2d, 0x81d8abff + .word 0x3ff4f9b2, 0x769d2ca7, 0x3ff50841, 0x7f4531ee + .word 0x3ff516da, 0xa2cf6642, 0x3ff5257d, 0xe83f4eef + .word 0x3ff5342b, 0x569d4f82, 0x3ff542e2, 0xf4f6ad27 + .word 0x3ff551a4, 0xca5d920f, 0x3ff56070, 0xdde910d2 + .word 0x3ff56f47, 0x36b527da, 0x3ff57e27, 0xdbe2c4cf + .word 0x3ff58d12, 0xd497c7fd, 0x3ff59c08, 0x27ff07cc + .word 0x3ff5ab07, 0xdd485429, 0x3ff5ba11, 0xfba87a03 + .word 0x3ff5c926, 0x8a5946b7, 0x3ff5d845, 0x90998b93 + .word 0x3ff5e76f, 0x15ad2148, 0x3ff5f6a3, 0x20dceb71 + .word 0x3ff605e1, 0xb976dc09, 0x3ff6152a, 0xe6cdf6f4 + .word 0x3ff6247e, 0xb03a5585, 0x3ff633dd, 0x1d1929fd + .word 0x3ff64346, 0x34ccc320, 0x3ff652b9, 0xfebc8fb7 + .word 0x3ff66238, 0x82552225, 0x3ff671c1, 0xc70833f6 + .word 0x3ff68155, 0xd44ca973, 0x3ff690f4, 0xb19e9538 + .word 0x3ff6a09e, 0x667f3bcd, 0x3ff6b052, 0xfa75173e + .word 0x3ff6c012, 0x750bdabf, 0x3ff6cfdc, 0xddd47645 + .word 0x3ff6dfb2, 0x3c651a2f, 0x3ff6ef92, 0x98593ae5 + .word 0x3ff6ff7d, 0xf9519484, 0x3ff70f74, 0x66f42e87 + .word 0x3ff71f75, 0xe8ec5f74, 0x3ff72f82, 0x86ead08a + .word 0x3ff73f9a, 0x48a58174, 0x3ff74fbd, 0x35d7cbfd + .word 0x3ff75feb, 0x564267c9, 0x3ff77024, 0xb1ab6e09 + .word 0x3ff78069, 0x4fde5d3f, 0x3ff790b9, 0x38ac1cf6 + .word 0x3ff7a114, 0x73eb0187, 0x3ff7b17b, 0x0976cfdb + .word 0x3ff7c1ed, 0x0130c132, 0x3ff7d26a, 0x62ff86f0 + .word 0x3ff7e2f3, 0x36cf4e62, 0x3ff7f387, 0x8491c491 + .word 0x3ff80427, 0x543e1a12, 0x3ff814d2, 0xadd106d9 + .word 0x3ff82589, 0x994cce13, 0x3ff8364c, 0x1eb941f7 + .word 0x3ff8471a, 0x4623c7ad, 0x3ff857f4, 0x179f5b21 + .word 0x3ff868d9, 0x9b4492ed, 0x3ff879ca, 0xd931a436 + .word 0x3ff88ac7, 0xd98a6699, 0x3ff89bd0, 0xa478580f + .word 0x3ff8ace5, 0x422aa0db, 0x3ff8be05, 0xbad61778 + .word 0x3ff8cf32, 0x16b5448c, 0x3ff8e06a, 0x5e0866d9 + .word 0x3ff8f1ae, 0x99157736, 0x3ff902fe, 0xd0282c8a + .word 0x3ff9145b, 0x0b91ffc6, 0x3ff925c3, 0x53aa2fe2 + .word 0x3ff93737, 0xb0cdc5e5, 0x3ff948b8, 0x2b5f98e5 + .word 0x3ff95a44, 0xcbc8520f, 0x3ff96bdd, 0x9a7670b3 + .word 0x3ff97d82, 0x9fde4e50, 0x3ff98f33, 0xe47a22a2 + .word 0x3ff9a0f1, 0x70ca07ba, 0x3ff9b2bb, 0x4d53fe0d + .word 0x3ff9c491, 0x82a3f090, 0x3ff9d674, 0x194bb8d5 + .word 0x3ff9e863, 0x19e32323, 0x3ff9fa5e, 0x8d07f29e + .word 0x3ffa0c66, 0x7b5de565, 0x3ffa1e7a, 0xed8eb8bb + .word 0x3ffa309b, 0xec4a2d33, 0x3ffa42c9, 0x80460ad8 + .word 0x3ffa5503, 0xb23e255d, 0x3ffa674a, 0x8af46052 + .word 0x3ffa799e, 0x1330b358, 0x3ffa8bfe, 0x53c12e59 + .word 0x3ffa9e6b, 0x5579fdbf, 0x3ffab0e5, 0x21356eba + .word 0x3ffac36b, 0xbfd3f37a, 0x3ffad5ff, 0x3a3c2774 + .word 0x3ffae89f, 0x995ad3ad, 0x3ffafb4c, 0xe622f2ff + .word 0x3ffb0e07, 0x298db666, 0x3ffb20ce, 0x6c9a8952 + .word 0x3ffb33a2, 0xb84f15fb, 0x3ffb4684, 0x15b749b1 + .word 0x3ffb5972, 0x8de5593a, 0x3ffb6c6e, 0x29f1c52a + .word 0x3ffb7f76, 0xf2fb5e47, 0x3ffb928c, 0xf22749e4 + .word 0x3ffba5b0, 0x30a1064a, 0x3ffbb8e0, 0xb79a6f1f + .word 0x3ffbcc1e, 0x904bc1d2, 0x3ffbdf69, 0xc3f3a207 + .word 0x3ffbf2c2, 0x5bd71e09, 0x3ffc0628, 0x6141b33d + .word 0x3ffc199b, 0xdd85529c, 0x3ffc2d1c, 0xd9fa652c + .word 0x3ffc40ab, 0x5fffd07a, 0x3ffc5447, 0x78fafb22 + .word 0x3ffc67f1, 0x2e57d14b, 0x3ffc7ba8, 0x8988c933 + .word 0x3ffc8f6d, 0x9406e7b5, 0x3ffca340, 0x5751c4db + .word 0x3ffcb720, 0xdcef9069, 0x3ffccb0f, 0x2e6d1675 + .word 0x3ffcdf0b, 0x555dc3fa, 0x3ffcf315, 0x5b5bab74 + .word 0x3ffd072d, 0x4a07897c, 0x3ffd1b53, 0x2b08c968 + .word 0x3ffd2f87, 0x080d89f2, 0x3ffd43c8, 0xeacaa1d6 + .word 0x3ffd5818, 0xdcfba487, 0x3ffd6c76, 0xe862e6d3 + .word 0x3ffd80e3, 0x16c98398, 0x3ffd955d, 0x71ff6075 + .word 0x3ffda9e6, 0x03db3285, 0x3ffdbe7c, 0xd63a8315 + .word 0x3ffdd321, 0xf301b460, 0x3ffde7d5, 0x641c0658 + .word 0x3ffdfc97, 0x337b9b5f, 0x3ffe1167, 0x6b197d17 + .word 0x3ffe2646, 0x14f5a129, 0x3ffe3b33, 0x3b16ee12 + .word 0x3ffe502e, 0xe78b3ff6, 0x3ffe6539, 0x24676d76 + .word 0x3ffe7a51, 0xfbc74c83, 0x3ffe8f79, 0x77cdb740 + .word 0x3ffea4af, 0xa2a490da, 0x3ffeb9f4, 0x867cca6e + .word 0x3ffecf48, 0x2d8e67f1, 0x3ffee4aa, 0xa2188510 + .word 0x3ffefa1b, 0xee615a27, 0x3fff0f9c, 0x1cb6412a + .word 0x3fff252b, 0x376bba97, 0x3fff3ac9, 0x48dd7274 + .word 0x3fff5076, 0x5b6e4540, 0x3fff6632, 0x798844f8 + .word 0x3fff7bfd, 0xad9cbe14, 0x3fff91d8, 0x02243c89 + .word 0x3fffa7c1, 0x819e90d8, 0x3fffbdba, 0x3692d514 + .word 0x3fffd3c2, 0x2b8f71f1, 0x3fffe9d9, 0x6b2a23d9 + +! __mt_constexp2fb: + .word 0x36900000, 0x36a00000, 0x36b00000, 0x36c00000 + .word 0x36d00000, 0x36e00000, 0x36f00000, 0x37000000 + .word 0x37100000, 0x37200000, 0x37300000, 0x37400000 + .word 0x37500000, 0x37600000, 0x37700000, 0x37800000 + .word 0x37900000, 0x37a00000, 0x37b00000, 0x37c00000 + .word 0x37d00000, 0x37e00000, 0x37f00000, 0x38000000 + .word 0x38100000, 0x38200000, 0x38300000, 0x38400000 + .word 0x38500000, 0x38600000, 0x38700000, 0x38800000 + .word 0x38900000, 0x38a00000, 0x38b00000, 0x38c00000 + .word 0x38d00000, 0x38e00000, 0x38f00000, 0x39000000 + .word 0x39100000, 0x39200000, 0x39300000, 0x39400000 + .word 0x39500000, 0x39600000, 0x39700000, 0x39800000 + .word 0x39900000, 0x39a00000, 0x39b00000, 0x39c00000 + .word 0x39d00000, 0x39e00000, 0x39f00000, 0x3a000000 + .word 0x3a100000, 0x3a200000, 0x3a300000, 0x3a400000 + .word 0x3a500000, 0x3a600000, 0x3a700000, 0x3a800000 + .word 0x3a900000, 0x3aa00000, 0x3ab00000, 0x3ac00000 + .word 0x3ad00000, 0x3ae00000, 0x3af00000, 0x3b000000 + .word 0x3b100000, 0x3b200000, 0x3b300000, 0x3b400000 + .word 0x3b500000, 0x3b600000, 0x3b700000, 0x3b800000 + .word 0x3b900000, 0x3ba00000, 0x3bb00000, 0x3bc00000 + .word 0x3bd00000, 0x3be00000, 0x3bf00000, 0x3c000000 + .word 0x3c100000, 0x3c200000, 0x3c300000, 0x3c400000 + .word 0x3c500000, 0x3c600000, 0x3c700000, 0x3c800000 + .word 0x3c900000, 0x3ca00000, 0x3cb00000, 0x3cc00000 + .word 0x3cd00000, 0x3ce00000, 0x3cf00000, 0x3d000000 + .word 0x3d100000, 0x3d200000, 0x3d300000, 0x3d400000 + .word 0x3d500000, 0x3d600000, 0x3d700000, 0x3d800000 + .word 0x3d900000, 0x3da00000, 0x3db00000, 0x3dc00000 + .word 0x3dd00000, 0x3de00000, 0x3df00000, 0x3e000000 + .word 0x3e100000, 0x3e200000, 0x3e300000, 0x3e400000 + .word 0x3e500000, 0x3e600000, 0x3e700000, 0x3e800000 + .word 0x3e900000, 0x3ea00000, 0x3eb00000, 0x3ec00000 + .word 0x3ed00000, 0x3ee00000, 0x3ef00000, 0x3f000000 + .word 0x3f100000, 0x3f200000, 0x3f300000, 0x3f400000 + .word 0x3f500000, 0x3f600000, 0x3f700000, 0x3f800000 + .word 0x3f900000, 0x3fa00000, 0x3fb00000, 0x3fc00000 + .word 0x3fd00000, 0x3fe00000, 0x3ff00000, 0x40000000 + .word 0x40100000, 0x40200000, 0x40300000, 0x40400000 + .word 0x40500000, 0x40600000, 0x40700000, 0x40800000 + .word 0x40900000, 0x40a00000, 0x40b00000, 0x40c00000 + .word 0x40d00000, 0x40e00000, 0x40f00000, 0x41000000 + .word 0x41100000, 0x41200000, 0x41300000, 0x41400000 + .word 0x41500000, 0x41600000, 0x41700000, 0x41800000 + .word 0x41900000, 0x41a00000, 0x41b00000, 0x41c00000 + .word 0x41d00000, 0x41e00000, 0x41f00000, 0x42000000 + .word 0x42100000, 0x42200000, 0x42300000, 0x42400000 + .word 0x42500000, 0x42600000, 0x42700000, 0x42800000 + .word 0x42900000, 0x42a00000, 0x42b00000, 0x42c00000 + .word 0x42d00000, 0x42e00000, 0x42f00000, 0x43000000 + .word 0x43100000, 0x43200000, 0x43300000, 0x43400000 + .word 0x43500000, 0x43600000, 0x43700000, 0x43800000 + .word 0x43900000, 0x43a00000, 0x43b00000, 0x43c00000 + .word 0x43d00000, 0x43e00000, 0x43f00000, 0x44000000 + .word 0x44100000, 0x44200000, 0x44300000, 0x44400000 + .word 0x44500000, 0x44600000, 0x44700000, 0x44800000 + .word 0x44900000, 0x44a00000, 0x44b00000, 0x44c00000 + .word 0x44d00000, 0x44e00000, 0x44f00000, 0x45000000 + .word 0x45100000, 0x45200000, 0x45300000, 0x45400000 + .word 0x45500000, 0x45600000, 0x45700000, 0x45800000 + .word 0x45900000, 0x45a00000, 0x45b00000, 0x45c00000 + .word 0x45d00000, 0x45e00000, 0x45f00000, 0x46000000 + .word 0x46100000, 0x46200000, 0x46300000, 0x46400000 + .word 0x46500000, 0x46600000, 0x46700000, 0x46800000 + .word 0x46900000, 0x46a00000, 0x46b00000, 0x46c00000 + .word 0x46d00000, 0x46e00000, 0x46f00000, 0x47000000 + .word 0x47100000, 0x47200000, 0x47300000, 0x47400000 + .word 0x47500000, 0x47600000, 0x47700000, 0x47800000 + .word 0x47900000, 0x47a00000, 0x47b00000, 0x47c00000 + .word 0x47d00000, 0x47e00000, 0x47f00000, 0x00000000 + + .word 0,0,0,0 + .word 0,0,0,0 + +.CONST_TBL: +! __mt_constlog4f: + .word 0x00000000, 0x00000000, 0x3e800000, 0x00000000 + .word 0x4006fe50, 0xb6ef0851, 0x3e7fc07f, 0x01fc07f0 + .word 0x4016e796, 0x85c2d22a, 0x3e7f81f8, 0x1f81f820 + .word 0x40211cd1, 0xd5133413, 0x3e7f4465, 0x9e4a4271 + .word 0x4026bad3, 0x758efd87, 0x3e7f07c1, 0xf07c1f08 + .word 0x402c4dfa, 0xb90aab5f, 0x3e7ecc07, 0xb301ecc0 + .word 0x4030eb38, 0x9fa29f9b, 0x3e7e9131, 0xabf0b767 + .word 0x4033aa2f, 0xdd27f1c3, 0x3e7e573a, 0xc901e574 + .word 0x403663f6, 0xfac91316, 0x3e7e1e1e, 0x1e1e1e1e + .word 0x403918a1, 0x6e46335b, 0x3e7de5d6, 0xe3f8868a + .word 0x403bc842, 0x40adabba, 0x3e7dae60, 0x76b981db + .word 0x403e72ec, 0x117fa5b2, 0x3e7d77b6, 0x54b82c34 + .word 0x40408c58, 0x8cda79e4, 0x3e7d41d4, 0x1d41d41d + .word 0x4041dcd1, 0x97552b7b, 0x3e7d0cb5, 0x8f6ec074 + .word 0x40432ae9, 0xe278ae1a, 0x3e7cd856, 0x89039b0b + .word 0x404476a9, 0xf983f74d, 0x3e7ca4b3, 0x055ee191 + .word 0x4045c01a, 0x39fbd688, 0x3e7c71c7, 0x1c71c71c + .word 0x40470742, 0xd4ef027f, 0x3e7c3f8f, 0x01c3f8f0 + .word 0x40484c2b, 0xd02f03b3, 0x3e7c0e07, 0x0381c0e0 + .word 0x40498edd, 0x077e70df, 0x3e7bdd2b, 0x899406f7 + .word 0x404acf5e, 0x2db4ec94, 0x3e7bacf9, 0x14c1bad0 + .word 0x404c0db6, 0xcdd94dee, 0x3e7b7d6c, 0x3dda338b + .word 0x404d49ee, 0x4c325970, 0x3e7b4e81, 0xb4e81b4f + .word 0x404e840b, 0xe74e6a4d, 0x3e7b2036, 0x406c80d9 + .word 0x404fbc16, 0xb902680a, 0x3e7af286, 0xbca1af28 + .word 0x4050790a, 0xdbb03009, 0x3e7ac570, 0x1ac5701b + .word 0x40511307, 0xdad30b76, 0x3e7a98ef, 0x606a63be + .word 0x4051ac05, 0xb291f070, 0x3e7a6d01, 0xa6d01a6d + .word 0x40524407, 0xab0e073a, 0x3e7a41a4, 0x1a41a41a + .word 0x4052db10, 0xfc4d9aaf, 0x3e7a16d3, 0xf97a4b02 + .word 0x40537124, 0xcea4cded, 0x3e79ec8e, 0x951033d9 + .word 0x40540646, 0x3b1b0449, 0x3e79c2d1, 0x4ee4a102 + .word 0x40549a78, 0x4bcd1b8b, 0x3e799999, 0x9999999a + .word 0x40552dbd, 0xfc4c96b3, 0x3e7970e4, 0xf80cb872 + .word 0x4055c01a, 0x39fbd688, 0x3e7948b0, 0xfcd6e9e0 + .word 0x4056518f, 0xe4677ba7, 0x3e7920fb, 0x49d0e229 + .word 0x4056e221, 0xcd9d0cde, 0x3e78f9c1, 0x8f9c18fa + .word 0x405771d2, 0xba7efb3c, 0x3e78d301, 0x8d3018d3 + .word 0x405800a5, 0x63161c54, 0x3e78acb9, 0x0f6bf3aa + .word 0x40588e9c, 0x72e0b226, 0x3e7886e5, 0xf0abb04a + .word 0x40591bba, 0x891f1709, 0x3e786186, 0x18618618 + .word 0x4059a802, 0x391e232f, 0x3e783c97, 0x7ab2bedd + .word 0x405a3376, 0x0a7f6051, 0x3e781818, 0x18181818 + .word 0x405abe18, 0x797f1f49, 0x3e77f405, 0xfd017f40 + .word 0x405b47eb, 0xf73882a1, 0x3e77d05f, 0x417d05f4 + .word 0x405bd0f2, 0xe9e79031, 0x3e77ad22, 0x08e0ecc3 + .word 0x405c592f, 0xad295b56, 0x3e778a4c, 0x8178a4c8 + .word 0x405ce0a4, 0x923a587d, 0x3e7767dc, 0xe434a9b1 + .word 0x405d6753, 0xe032ea0f, 0x3e7745d1, 0x745d1746 + .word 0x405ded3f, 0xd442364c, 0x3e772428, 0x7f46debc + .word 0x405e726a, 0xa1e754d2, 0x3e7702e0, 0x5c0b8170 + .word 0x405ef6d6, 0x7328e220, 0x3e76e1f7, 0x6b4337c7 + .word 0x405f7a85, 0x68cb06cf, 0x3e76c16c, 0x16c16c17 + .word 0x405ffd79, 0x9a83ff9b, 0x3e76a13c, 0xd1537290 + .word 0x40603fda, 0x8b97997f, 0x3e768168, 0x16816817 + .word 0x4060809c, 0xf27f703d, 0x3e7661ec, 0x6a5122f9 + .word 0x4060c105, 0x00d63aa6, 0x3e7642c8, 0x590b2164 + .word 0x40610113, 0xb153c8ea, 0x3e7623fa, 0x77016240 + .word 0x406140c9, 0xfaa1e544, 0x3e760581, 0x60581606 + .word 0x40618028, 0xcf72976a, 0x3e75e75b, 0xb8d015e7 + .word 0x4061bf31, 0x1e95d00e, 0x3e75c988, 0x2b931057 + .word 0x4061fde3, 0xd30e8126, 0x3e75ac05, 0x6b015ac0 + .word 0x40623c41, 0xd42727c8, 0x3e758ed2, 0x308158ed + .word 0x40627a4c, 0x0585cbf8, 0x3e7571ed, 0x3c506b3a + .word 0x4062b803, 0x473f7ad1, 0x3e755555, 0x55555555 + .word 0x4062f568, 0x75eb3f26, 0x3e753909, 0x48f40feb + .word 0x4063327c, 0x6ab49ca7, 0x3e751d07, 0xeae2f815 + .word 0x40636f3f, 0xfb6d9162, 0x3e750150, 0x15015015 + .word 0x4063abb3, 0xfaa02167, 0x3e74e5e0, 0xa72f0539 + .word 0x4063e7d9, 0x379f7016, 0x3e74cab8, 0x8725af6e + .word 0x406423b0, 0x7e986aa9, 0x3e74afd6, 0xa052bf5b + .word 0x40645f3a, 0x98a20739, 0x3e749539, 0xe3b2d067 + .word 0x40649a78, 0x4bcd1b8b, 0x3e747ae1, 0x47ae147b + .word 0x4064d56a, 0x5b33cec4, 0x3e7460cb, 0xc7f5cf9a + .word 0x40651011, 0x8708a8f9, 0x3e7446f8, 0x6562d9fb + .word 0x40654a6e, 0x8ca5438e, 0x3e742d66, 0x25d51f87 + .word 0x40658482, 0x26989d34, 0x3e741414, 0x14141414 + .word 0x4065be4d, 0x0cb51435, 0x3e73fb01, 0x3fb013fb + .word 0x4065f7cf, 0xf41e09af, 0x3e73e22c, 0xbce4a902 + .word 0x4066310b, 0x8f553048, 0x3e73c995, 0xa47babe7 + .word 0x40666a00, 0x8e4788cc, 0x3e73b13b, 0x13b13b14 + .word 0x4066a2af, 0x9e5a0f0a, 0x3e73991c, 0x2c187f63 + .word 0x4066db19, 0x6a76194a, 0x3e738138, 0x13813814 + .word 0x4067133e, 0x9b156c7c, 0x3e73698d, 0xf3de0748 + .word 0x40674b1f, 0xd64e0754, 0x3e73521c, 0xfb2b78c1 + .word 0x406782bd, 0xbfdda657, 0x3e733ae4, 0x5b57bcb2 + .word 0x4067ba18, 0xf93502e4, 0x3e7323e3, 0x4a2b10bf + .word 0x4067f132, 0x2182cf16, 0x3e730d19, 0x0130d190 + .word 0x40682809, 0xd5be7073, 0x3e72f684, 0xbda12f68 + .word 0x40685ea0, 0xb0b27b26, 0x3e72e025, 0xc04b8097 + .word 0x406894f7, 0x4b06ef8b, 0x3e72c9fb, 0x4d812ca0 + .word 0x4068cb0e, 0x3b4b3bbe, 0x3e72b404, 0xad012b40 + .word 0x406900e6, 0x160002cd, 0x3e729e41, 0x29e4129e + .word 0x4069367f, 0x6da0ab2f, 0x3e7288b0, 0x1288b013 + .word 0x40696bda, 0xd2acb5f6, 0x3e727350, 0xb8812735 + .word 0x4069a0f8, 0xd3b0e050, 0x3e725e22, 0x708092f1 + .word 0x4069d5d9, 0xfd5010b3, 0x3e724924, 0x92492492 + .word 0x406a0a7e, 0xda4c112d, 0x3e723456, 0x789abcdf + .word 0x406a3ee7, 0xf38e181f, 0x3e721fb7, 0x8121fb78 + .word 0x406a7315, 0xd02f20c8, 0x3e720b47, 0x0c67c0d9 + .word 0x406aa708, 0xf58014d3, 0x3e71f704, 0x7dc11f70 + .word 0x406adac1, 0xe711c833, 0x3e71e2ef, 0x3b3fb874 + .word 0x406b0e41, 0x26bcc86c, 0x3e71cf06, 0xada2811d + .word 0x406b4187, 0x34a9008c, 0x3e71bb4a, 0x4046ed29 + .word 0x406b7494, 0x8f5532da, 0x3e71a7b9, 0x611a7b96 + .word 0x406ba769, 0xb39e4964, 0x3e719453, 0x808ca29c + .word 0x406bda07, 0x1cc67e6e, 0x3e718118, 0x11811812 + .word 0x406c0c6d, 0x447c5dd3, 0x3e716e06, 0x89427379 + .word 0x406c3e9c, 0xa2e1a055, 0x3e715b1e, 0x5f75270d + .word 0x406c7095, 0xae91e1c7, 0x3e71485f, 0x0e0acd3b + .word 0x406ca258, 0xdca93316, 0x3e7135c8, 0x1135c811 + .word 0x406cd3e6, 0xa0ca8907, 0x3e712358, 0xe75d3033 + .word 0x406d053f, 0x6d260896, 0x3e711111, 0x11111111 + .word 0x406d3663, 0xb27f31d5, 0x3e70fef0, 0x10fef011 + .word 0x406d6753, 0xe032ea0f, 0x3e70ecf5, 0x6be69c90 + .word 0x406d9810, 0x643d6615, 0x3e70db20, 0xa88f4696 + .word 0x406dc899, 0xab3ff56c, 0x3e70c971, 0x4fbcda3b + .word 0x406df8f0, 0x2086af2c, 0x3e70b7e6, 0xec259dc8 + .word 0x406e2914, 0x2e0e0140, 0x3e70a681, 0x0a6810a7 + .word 0x406e5906, 0x3c8822ce, 0x3e70953f, 0x39010954 + .word 0x406e88c6, 0xb3626a73, 0x3e708421, 0x08421084 + .word 0x406eb855, 0xf8ca88fb, 0x3e707326, 0x0a47f7c6 + .word 0x406ee7b4, 0x71b3a950, 0x3e70624d, 0xd2f1a9fc + .word 0x406f16e2, 0x81db7630, 0x3e705197, 0xf7d73404 + .word 0x406f45e0, 0x8bcf0655, 0x3e704104, 0x10410410 + .word 0x406f74ae, 0xf0efafae, 0x3e703091, 0xb51f5e1a + .word 0x406fa34e, 0x1177c233, 0x3e702040, 0x81020408 + .word 0x406fd1be, 0x4c7f2af9, 0x3e701010, 0x10101010 + .word 0x40700000, 0x00000000, 0x3e700000, 0x00000000 + +! __mt_constexp2f: + .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf + .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 + .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc + .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 + .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 + .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 + .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 + .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 + .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85 + .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec + .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5 + .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e + .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6 + .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab + .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e + .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2 + .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0 + .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f + .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c + .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b + .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027 + .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d + .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819 + .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1 + .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a + .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75 + .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29 + .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70 + .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13 + .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f + .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589 + .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b + .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd + .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32 + .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d + .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b + .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a + .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef + .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4 + .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173 + .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175 + .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024 + .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a + .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4 + .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232 + .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237 + .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2 + .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7 + .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114 + .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff + .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee + .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef + .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27 + .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2 + .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf + .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc + .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03 + .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93 + .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71 + .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4 + .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd + .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7 + .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6 + .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538 + .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e + .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645 + .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5 + .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87 + .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a + .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd + .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09 + .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6 + .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb + .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0 + .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491 + .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9 + .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7 + .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21 + .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436 + .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f + .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778 + .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9 + .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a + .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2 + .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5 + .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3 + .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2 + .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d + .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5 + .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e + .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb + .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8 + .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052 + .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59 + .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba + .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774 + .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff + .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952 + .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1 + .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a + .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4 + .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f + .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207 + .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d + .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c + .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22 + .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933 + .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db + .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675 + .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74 + .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968 + .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6 + .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3 + .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075 + .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315 + .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658 + .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17 + .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12 + .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76 + .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740 + .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e + .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510 + .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a + .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274 + .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8 + .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89 + .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514 + .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9 + + .word 0xc057150d, 0x5f6e1c54 ! KA3 = -3.60659926599003171364e-01*256.0 + .word 0x405ec71c, 0x2e92efda ! KA2 = 4.80902715189356683026e-01*256.0 + .word 0xc0671547, 0x653cbec4 ! KA1 = -7.21347520569871841065e-01*256.0 + .word 0x40771547, 0x652af190 ! KA0 = 1.44269504088069658645e+00*256.0 + .word 0x3ecebfbe, 0x9d182250 ! KB2 = 3.66556671660783833261e-06 + .word 0x3f662e43, 0xe2528362 ! KB1 = 2.70760782821392980564e-03 + .word 0x40e00000, 0x00000000 ! HTHRESH = 32768.0 + .word 0xc0e2c000, 0x00000000 ! LTHRESH = -38400.0 ; 0.0f + .word 0x3f800000, 0x00000000 ! 1.0f ; free + +#define tmp_px STACK_BIAS-48 +#define tmp_py STACK_BIAS-40 +#define tmp_counter STACK_BIAS-32 +#define tmp0 STACK_BIAS-28 +#define tmp1 STACK_BIAS-24 +#define tmp2 STACK_BIAS-20 +#define tmp3 STACK_BIAS-16 +#define tmp4 STACK_BIAS-12 +#define tmp5 STACK_BIAS-8 +#define tmp6 STACK_BIAS-4 + + +#define KA3 %f34 +#define KA2 %f36 +#define KA1 %f38 +#define KA0 %f40 +#define KB2 %f42 +#define KB1 %f44 +#define HTHRESHOLD %f30 +#define LTHRESHOLD %f32 + +#define counter %o7 +#define stridex %i0 +#define stridey %i4 +#define stridez %l3 + +#define CONST_0x8000 %l1 +#define MASK_0x007fffff %l4 +#define MASK_0x7fffffff %l5 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +!-------------------------------------------------------------------- +! !!!!! vpowf algorithm !!!!! +! uy = *(unsigned int*)py; +! ux = *(unsigned int*)px; +! ay = uy & 0x7fffffff; +! ax0 = ux & 0x7fffffff; +! sx = ux >> 31; +! yisint0 = 0; /* Y - non-integer */ +! if (ax0 >= 0x7f800000 || ay >= 0x7f800000) { /* |X| or |Y| = Inf,Nan */ +! if (ax0 > 0x7f800000 || ay > 0x7f800000) /* |X| or |Y| = Nan */ +! pz[0] = *px * *py; +! goto next; +! if (ay == 0x7f800000) { /* |Y| = Inf */ +! float fy; +! if (ax0 == 0x3f800000) fy = *py - *py; /* +-1 ** +-Inf = NaN */ +! else fy = ((ax0 < 0x3f800000) != (uy >> 31)) ? ZERO : *(float*) &ay; +! pz[0] = fy; +! goto next; +! } +! if (sx) { /* X = -Inf */ +! exp = ay >> 23; +! if (exp >= 0x97) /* |Y| >= 2^24 */ +! yisint0 = 2; /* Y - even */ +! else { +! if (exp >= 0x7f) { /* |Y| >= 1 */ +! i0 = ay >> ((0x7f + 23) - exp); +! if ((i0 << ((0x7f + 23) - exp)) == ay) yisint0 = 2 - (i0 & 1); +! } +! } +! } +! if (uy >> 31) ax0 = 0; +! ax0 += yisint0 << 31; +! pz[0] = *(float*)&ax0; +! goto next; +! } +! exp0 = (ax0 >> 23) - 127; +! if ((int)ux < 0x00800000) { /* X = denormal or negative */ +! if ((int)ax0 < 0x00800000) { /* X = denormal */ +! *((float*) &ax0) = (float) (int)ax0; +! exp0 = (ax0 >> 23) - (127 + 149); +! } +! if ((int)ux <= 0) { /* X <= 0 */ +! exp = ay >> 23; +! if (exp >= 0x97) /* |Y| >= 2^24 */ +! yisint0 = 2; /* Y - even */ +! else { +! if (exp >= 0x7f) { /* |Y| >= 1 */ +! i0 = ay >> ((0x7f + 23) - exp); +! if ((i0 << ((0x7f + 23) - exp)) == ay) yisint0 = 2 - (i0 & 1); +! } +! } +! if (ax0 == 0) { /* pow(0,Y) */ +! float fy; +! fy = (uy >> 31) ? ONE / ZERO : ZERO; +! if (sx & yisint0) fy = -fy; +! pz[0] = fy; +! goto next; +! } +! if (yisint0 == 0) { /* pow(neg,non-integer) */ +! pz[0] = ZERO / ZERO; /* NaN */ +! goto next; +! } +! } +! } +! +! ax0 = *px; +! exp0 = ax0 & 0x7fffffff; +! exp0 >>= 23; +! exp0 -= 127; +! exp0 <<= 8; +! ax0 &= 0x007fffff; +! i0 = ax0 + 0x8000; +! i0 &= 0xffff0000; +! ind0 = i0 >> 12; +! ind0 &= -8; +! i0 = ax0 - i0; +! dtmp0 = (double) i0; +! dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); +! y0 = dtmp0 * dtmp1; +! dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); +! dtmp1 = (double) exp0; +! yy0 = dtmp0 + dtmp1; +! dtmp0 = KA3 * y0; +! dtmp0 += KA2; +! dtmp0 *= y0; +! dtmp0 += KA1; +! dtmp0 *= y0; +! dtmp0 += KA0; +! dtmp0 *= y0; +! yy0 += dtmp0; +! ftmp0 = *py0; +! dtmp0 = (double)ftmp0; +! yy0 *= dtmp0; +! if (yy0 >= HTHRESH) +! yy0 = HTHRESH; +! if (yy0 <= LTHRESH) +! yy0 = LTHRESH; +! ind0 = (int) yy0; +! ((int*)&dtmp1)[0] = ind0; +! ((int*)&dtmp1)[1] = 0; +! dtmp1 = vis_fpackfix(dtmp1); +! dtmp0 = (double)ind0; +! y0 = yy0 - dtmp0; +! dtmp0 = KB2 * y0; +! dtmp0 += KB1; +! yy0 = dtmp0 * y0; +! ind0 &= 255; +! ind0 <<= 3; +! di0 = *(double*)((char*)__mt_constexp2f + ind0); +! di0 = vis_fpadd32(di0,dtmp1); +! yy0 *= di0; +! yy0 += di0; +! ftmp0 = (float)yy0; +! *pz0 = ftmp0; +!-------------------------------------------------------------------- +! !!!!! vpowf algorithm,stridex=0 !!!!! +! +! ax = ax0 = *px; +! exp0 = ax0 & 0x7fffffff; +! exp0 >>= 23; +! exp0 -= 127; +! exp0 <<= 8; +! ax0 &= 0x007fffff; +! i0 = ax0 + 0x8000; +! i0 &= 0xffff0000; +! ind0 = i0 >> 12; +! ind0 &= -8; +! i0 = ax0 - i0; +! dtmp0 = (double) i0; +! dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); +! y0 = dtmp0 * dtmp1; +! dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); +! dtmp1 = (double) exp0; +! yy0 = dtmp0 + dtmp1; +! dtmp0 = KA3 * y0; +! dtmp0 += KA2; +! dtmp0 *= y0; +! dtmp0 += KA1; +! dtmp0 *= y0; +! dtmp0 += KA0; +! dtmp0 *= y0; +! yy = yy0 + dtmp0; +! +! uy = ((int*)py)[0]; +! ay = uy & 0x7fffffff; +! if (ay >= 0x7f800000) { /* |Y| = Inf or Nan */ +! float fy; +! if (ay > 0x7f800000) fy = *py + *py; /* |Y| = Nan */ +! else fy = ((ax < 0x3f800000) != (uy >> 31)) ? ZERO : *(float*)&ay; +! pz[0] = fy; +! goto next; +! } +! +! +! ftmp0 = py[0]; +! dtmp0 = (double)ftmp0; +! yy0 = dtmp0 * yy; +! if (yy0 >= HTHRESH) +! if (yy0 <= LTHRESH) +! yy0 = HTHRESH; +! yy0 = LTHRESH; +! ii0 = (int) yy0; +! dtmp0 = (double)ii0; +! i0 = ii0 >> 5; +! i0 &= -8; +! di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0]; +! y0 = yy0 - dtmp0; +! dtmp0 = KB2 * y0; +! dtmp0 += KB1; +! yy0 = dtmp0 * y0; +! ii0 &= 255; +! ii0 <<= 3; +! dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; +! di0 *= dtmp0; +! dtmp0 = yy0 * di0; +! dtmp0 += di0; +! ftmp0 = (float)dtmp0; +! pz[0] = ftmp0; +!-------------------------------------------------------------------- + ENTRY(__vpowf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + wr %g0,0x60,%gsr + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + ld [%i1],%o3 + add %l2,2064,%l0 + st %i0,[%fp+tmp_counter] + add %l0,2048,%l6 + ldd [%l6],KA3 + ldd [%l6+8],KA2 + sll stridey,2,stridey + ldd [%l6+16],KA1 + sll stridez,2,stridez + ldd [%l6+24],KA0 + sll %i2,2,stridex + ldd [%l6+32],KB2 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + fzero %f2 + ldd [%l6+40],KB1 + add MASK_0x7fffffff,1023,MASK_0x7fffffff + fzero %f10 + ldd [%l6+48],HTHRESHOLD + sethi %hi(0x7ffc00),MASK_0x007fffff + fzero %f20 + ldd [%l6+56],LTHRESHOLD + sethi %hi(0x8000),CONST_0x8000 + add MASK_0x007fffff,1023,MASK_0x007fffff + + cmp stridex,0 + bne,pt %icc,.common_case + sethi %hi(0x00800000),%l6 + + cmp %o3,%l6 + bl,pn %icc,.common_case + sethi %hi(0x7f800000),%o1 + + cmp %o3,%o1 + bge,pn %icc,.common_case + sethi %hi(0x3f800000),%l6 + + cmp %o3,%l6 + bne,pt %icc,.stridex_zero + nop + +.common_case: + stx %i1,[%fp+tmp_px] + stx %i3,[%fp+tmp_py] +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%o2 + ldx [%fp+tmp_py],%i2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px; + + lda [%i2]0x82,%l7 + sethi %hi(0xffff0000),%l6 + sethi %hi(0x7f800000),%o5 + + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + + cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000 + bge,pn %icc,.spec1 ! (Y0_2) if( ax0 >= 0x7f800000 ) + and %l7,MASK_0x7fffffff,%o4 + + cmp %o4,%o5 ! (Y0_2) ay0 ? 0x7f800000 + bge,pn %icc,.spec1 ! (Y0_2) if( ay0 >= 0x7f800000 ) + nop + + cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000 + ble,pn %icc,.spec2 ! (Y0_2) if(ux0 < 0x800000) + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + add %o2,stridex,%o2 ! px += stridex + + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px; + + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + + and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff; + and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff; + + cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000 + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + + srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23; + add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000; + + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + sub %i3,127,%i3 ! (Y1_2) exp0 -= 127; + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + + sll %i3,8,%i3 ! (Y1_2) exp0 <<= 8; + and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000; + st %i3,[%fp+tmp4] ! (Y1_2) STORE exp0 + + sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0; + st %o0,[%fp+tmp5] ! (Y1_2) STORE i0 + bge,pn %icc,.update0 ! (Y1_2) if(ax0 >= 0x7f800000) + nop +.cont0: + cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000 + + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update1 ! (Y1_2) if(ux0 < 0x800000) + nop +.cont1: + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + + sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12; + add %o2,stridex,%i3 ! px += stridex + lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px; + + and %o1,-8,%o0 ! (Y1_2) ind0 &= -8; + ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0 + + and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff; + and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff; + lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0; + + srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23; + cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000 + + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0 + sub %o3,127,%l7 ! (Y2_2) exp0 -= 127; + + add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000; + ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0; + + sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8; + and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000; + st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0 + + sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0; + st %i1,[%fp+tmp2] ! (Y2_2) STORE i0 + bge,pn %icc,.update2 ! (Y2_2) if(ax0 >= 0x7f800000) + nop +.cont2: + cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000 + + fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update3 ! (Y2_2) if(ux0 < 0x800000) + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; +.cont3: + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + + fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0; + + fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0; + + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + + faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2; + + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + add %i3,stridex,%o2 ! px += stridex + + lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px; + sra %o1,12,%g5 ! (Y2_1) ind0 = i0 >> 12; + + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + and %g5,-8,%o1 ! (Y2_1) ind0 &= -8; + ld [%fp+tmp2],%f6 ! (Y2_1) dtmp0 = (double) i0; + + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + add %l2,%o1,%g1 ! (Y2_1) (char*)__mt_constlog4f + ind0 + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + + fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0; + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000 + + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + ldd [%g1+8],%f58 ! (Y2_1) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f6,%f54 ! (Y2_1) dtmp0 = (double) i0; + + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + bge,pn %icc,.update4 ! (Y0_2) if( ax0 >= 0x7f800000 ) + nop +.cont4: + lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0 + add %i2,stridey,%o4 ! py += stridey + cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000 + + fmuld %f54,%f58,%f28 ! (Y2_1) y0 = dtmp0 * dtmp1; + lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0; + ble,pn %icc,.update5 ! (Y0_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1; +.cont5: + and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff; + ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0 + faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0; + + cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000 + bge,pn %icc,.update6 ! (Y1_1) if(ay0 >= 0x7f800000) + nop +.cont6: + fmuld KA3,%f28,%f62 ! (Y2_1) dtmp0 = KA3 * y0; + fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0; + + fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0; + + fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0; + + fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0; + + faddd %f62,KA2,%f26 ! (Y2_1) dtmp0 += KA2; + + add %o2,stridex,%o2 ! px += stridex + ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH) + + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px; + + faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0; + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + + and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff; + and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff; + + cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000 + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1; + + fmuld %f26,%f28,%f50 ! (Y2_1) dtmp0 *= y0; + srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23; + add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000; + fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH) + + fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0; + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + sub %i3,127,%i3 ! (Y1_2) exp0 -= 127; + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + + sll %i3,8,%i2 ! (Y1_2) exp0 <<= 8; + and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000; + st %i2,[%fp+tmp4] ! (Y1_2) STORE exp0 + + sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0; + st %o0,[%fp+tmp5] ! (Y1_2) STORE i0 + bge,pn %icc,.update7 ! (Y1_2) if(ax0 >= 0x7f800000) + nop +.cont7: + lda [stridey+%o4]0x82,%i3 ! Y(2_1) ay0 = *py0 + cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000 + add %o4,stridey,%i2 ! py += stridey; + fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + lda [stridey+%o4]0x82,%f16 ! (Y2_1) ftmp0 = *py0; + ble,pn %icc,.update8 ! (Y1_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f52 ! (Y2_1) dtmp0 += KA1; +.cont8: + and %i3,MASK_0x7fffffff,%i3 ! (Y2_1) ay0 &= 0x7fffffff + ld [%fp+tmp6],%f17 ! (Y2_1) dtmp1 = (double) exp0; + faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0; + + cmp %i3,%o5 ! (Y2_1) ay0 ? 0x7f800000 + bge,pn %icc,.update9 ! (Y2_1) if(ay0 >= 0x7f800000) + nop + +.cont9: + fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH; + + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + fstod %f16,%f54 ! (Y2_1) dtmp0 = (double)ftmp0; + + fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0; + + fitod %f17,%f24 ! (Y2_1) dtmp1 = (double) exp0; + + fmuld %f52,%f28,%f52 ! (Y2_1) dtmp0 *= y0; + fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0; + + st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0 + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + + fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH) + ldd [%l2+%o1],%f60 ! (Y2_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + + sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12; + add %o2,stridex,%i3 ! px += stridex + lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px; + + and %o1,-8,%o0 ! (Y1_2) ind0 &= -8; + add %i2,stridey,%i2 ! py += stridey + ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0 + faddd %f52,KA0,%f4 ! (Y2_1) dtmp0 += KA0; + + and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff; + and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff; + lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0; + fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0; + + srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23; + cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000 + faddd %f60,%f24,%f18 ! (Y2_1) yy0 = dtmp0 + dtmp1; + + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0 + sub %o3,127,%l7 ! (Y2_2) exp0 -= 127; + fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH) + + fmuld %f4,%f28,%f24 ! (Y2_1) dtmp0 *= y0; + add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000; + ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0; + + sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8; + and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000; + st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0 + fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0; + + + sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0; + st %i1,[%fp+tmp2] ! (Y2_2) STORE i0 + bge,pn %icc,.update10 ! (Y2_2) if(ax0 >= 0x7f800000) + nop +.cont10: + lda [%i2]0x82,%o2 ! (Y0_2) ay0 = *(int*)py0; + cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000 + fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update11 ! (Y2_2) if(ux0 < 0x800000) + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; +.cont11: + fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0; + and %o2,MASK_0x7fffffff,%o2 ! (Y0_2) ay0 &= 0x7fffffff + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + faddd %f18,%f24,%f52 ! (Y2_1) yy0 += dtmp0; + + ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0 + cmp %o2,%o5 ! (Y0_2) ay0 ? 0x7f800000 + bge,pn %icc,.update12 ! (Y0_2) if( ay0 >= 0x7f800000) + nop +.cont12: + fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0; + + cmp counter,6 ! counter + bl,pn %icc,.tail + sub %i5,stridez,%o4 + + ba .main_loop + nop + + .align 16 +.main_loop: + fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0; + and %g1,255,%o2 ! (Y0_0) ind0 &= 255; + sub counter,3,counter ! counter + fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH; + + fmuld %f54,%f52,%f18 ! (Y2_0) yy0 *= dtmp0; + sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3; + add %o4,stridez,%l7 ! pz += stridez + faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1; + + fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0; + st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0 + + faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2; + + fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0; + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + add %i3,stridex,%o2 ! px += stridex + fcmped %fcc0,HTHRESHOLD,%f18 ! (Y2_0) if (yy0 >= HTHRESH) + + lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px; + sra %o1,12,%g5 ! (Y2_1) ind0 = i0 >> 12; + fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1); + + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + and %g5,-8,%o1 ! (Y2_1) ind0 &= -8; + ld [%fp+tmp2],%f6 ! (Y2_1) dtmp0 = (double) i0; + + fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0; + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + + fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0; + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + add %l2,%o1,%g1 ! (Y2_1) (char*)__mt_constlog4f + ind0 + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + + fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0; + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000 + fcmped %fcc1,LTHRESHOLD,%f18 ! (Y2_0) if (yy0 <= LTHRESH) + + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + ldd [%g1+8],%f58 ! (Y2_1) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f6,%f54 ! (Y2_1) dtmp0 = (double) i0; + + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0; + + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + bge,pn %icc,.update13 ! (Y0_2) if( ax0 >= 0x7f800000 ) + faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0; +.cont13: + lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0 + add %i2,stridey,%o4 ! py += stridey + cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000 + fmovdl %fcc0,HTHRESHOLD,%f18 ! (Y2_0) yy0 = HTHRESH; + + fmuld %f54,%f58,%f28 ! (Y2_1) y0 = dtmp0 * dtmp1; + lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0; + ble,pn %icc,.update14 ! (Y0_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1; +.cont14: + fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0; + and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff; + ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0 + faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0; + + ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0; + cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000 + bge,pn %icc,.update15 ! (Y1_1) if(ay0 >= 0x7f800000) + fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0; +.cont15: + st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0; + fmovdg %fcc1,LTHRESHOLD,%f18 ! (Y2_0) yy0 = LTHRESH; + + add %l7,stridez,%l7 ! pz += stridez + fmuld KA3,%f28,%f62 ! (Y2_1) dtmp0 = KA3 * y0; + and %g5,255,%g5 ! (Y1_0) ind0 &= 255; + fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0; + + fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0; + sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3; + faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1; + + fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0; + ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0; + fdtoi %f18,%f2 ! (Y2_0) ind0 = (int) yy0; + st %f2,[%fp+tmp1] ! (Y2_0) STORE ind0 + + faddd %f62,KA2,%f26 ! (Y2_1) dtmp0 += KA2; + + fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0; + add %o2,stridex,%o2 ! px += stridex + ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH) + + fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1); + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px; + + faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0; + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + + fitod %f2,%f54 ! (Y2_0) dtmp0 = (double)ind0; + and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff; + and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff; + + fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0; + cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000 + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1; + + fmuld %f26,%f28,%f50 ! (Y2_1) dtmp0 *= y0; + srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23; + add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000; + fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH) + + fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0; + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + sub %i3,127,%i3 ! (Y1_2) exp0 -= 127; + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + + sll %i3,8,%i2 ! (Y1_2) exp0 <<= 8; + and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000; + st %i2,[%fp+tmp4] ! (Y1_2) STORE exp0 + fsubd %f18,%f54,%f26 ! (Y2_0) y0 = yy0 - dtmp0; + + sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0; + st %o0,[%fp+tmp5] ! (Y1_2) STORE i0 + bge,pn %icc,.update16 ! (Y1_2) if(ax0 >= 0x7f800000) + faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0; +.cont16: + lda [stridey+%o4]0x82,%i3 ! Y(2_1) ay0 = *py0 + cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000 + add %o4,stridey,%i2 ! py += stridey; + fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + lda [stridey+%o4]0x82,%f16 ! (Y2_1) ftmp0 = *py0; + ble,pn %icc,.update17 ! (Y1_2) if(ux0 < 0x800000) + faddd %f50,KA1,%f52 ! (Y2_1) dtmp0 += KA1; +.cont17: + fmuld KB2,%f26,%f4 ! (Y2_0) dtmp0 = KB2 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y2_1) ay0 &= 0x7fffffff + ld [%fp+tmp6],%f17 ! (Y2_1) dtmp1 = (double) exp0; + faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0; + + ld [%fp+tmp1],%o0 + cmp %i3,%o5 ! (Y2_1) ay0 ? 0x7f800000 + bge,pn %icc,.update18 ! (Y2_1) if(ay0 >= 0x7f800000) + fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0; +.cont18: + st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0; + add %l7,stridez,%o4 ! pz += stridez + fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH; + + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + and %o0,255,%o0 ! (Y2_0) ind0 &= 255; + fstod %f16,%f54 ! (Y2_1) dtmp0 = (double)ftmp0; + + fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0; + sll %o0,3,%l7 ! (Y2_0) ind0 <<= 3; + faddd %f4,KB1,%f60 ! (Y2_0) dtmp0 += KB1; + + fpackfix %f2,%f2 ! (Y2_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f17,%f24 ! (Y2_1) dtmp1 = (double) exp0; + ldd [%l0+%l7],%f4 ! (Y2_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f52,%f28,%f52 ! (Y2_1) dtmp0 *= y0; + fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0; + + st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0 + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + + fmuld %f60,%f26,%f62 ! (Y2_0) yy0 = dtmp0 * y0; + fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH) + ldd [%l2+%o1],%f60 ! (Y2_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + + sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12; + add %o2,stridex,%i3 ! px += stridex + lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px; + fpadd32 %f2,%f4,%f46 ! (Y2_0) di0 = vis_fpadd32(di0,dtmp1); + + and %o1,-8,%o0 ! (Y1_2) ind0 &= -8; + add %i2,stridey,%i2 ! py += stridey + ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0 + faddd %f52,KA0,%f4 ! (Y2_1) dtmp0 += KA0; + + and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff; + and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff; + lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0; + fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0; + + fmuld %f62,%f46,%f62 ! (Y2_0) yy0 *= di0; + srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23; + cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000 + faddd %f60,%f24,%f18 ! (Y2_1) yy0 = dtmp0 + dtmp1; + + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0 + sub %o3,127,%l7 ! (Y2_2) exp0 -= 127; + fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH) + + fmuld %f4,%f28,%f24 ! (Y2_1) dtmp0 *= y0; + add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000; + ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0; + + sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8; + and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000; + st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0 + fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0; + + sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0; + st %i1,[%fp+tmp2] ! (Y2_2) STORE i0 + bge,pn %icc,.update19 ! (Y2_2) if(ax0 >= 0x7f800000) + faddd %f62,%f46,%f22 ! (Y2_0) yy0 += di0; +.cont19: + lda [%i2]0x82,%o2 ! (Y0_2) ay0 = *(int*)py0; + cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000 + fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1; + ble,pn %icc,.update20 ! (Y2_2) if(ux0 < 0x800000) + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; +.cont20: + fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0; + and %o2,MASK_0x7fffffff,%o2 ! (Y0_2) ay0 &= 0x7fffffff + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + faddd %f18,%f24,%f52 ! (Y2_1) yy0 += dtmp0; + + ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0 + cmp %o2,%o5 ! (Y0_2) ay0 ? 0x7f800000 + bge,pn %icc,.update21 ! (Y0_2) if( ay0 >= 0x7f800000) + fdtos %f22,%f12 ! (Y2_0) ftmp0 = (float)yy0; +.cont21: + st %f12,[%o4] ! (Y2_0) *pz0 = ftmp0; + cmp counter,6 ! counter + bge,pt %icc,.main_loop + fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0; + +.tail: + subcc counter,1,counter + bneg,pn %icc,.begin + add %o4,stridez,%i5 + + fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0; + and %g1,255,%o2 ! (Y0_0) ind0 &= 255; + fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH; + + fmuld %f54,%f52,%f18 ! (Y2_0) yy0 *= dtmp0; + sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3; + add %o4,stridez,%l7 ! pz += stridez + faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1; + + fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0; + st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0 + + faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2; + + fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0; + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f18 ! (Y2_0) if (yy0 >= HTHRESH) + + fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1); + + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + + fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0; + + fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0; + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + + fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0; + fcmped %fcc1,LTHRESHOLD,%f18 ! (Y2_0) if (yy0 <= LTHRESH) + + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + + fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0; + + faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0; + + lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0 + add %i2,stridey,%o4 ! py += stridey + fmovdl %fcc0,HTHRESHOLD,%f18 ! (Y2_0) yy0 = HTHRESH; + + lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0; + faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1; + + fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0; + and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff; + ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0 + faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0; + + ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0; + cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000 + bge,pn %icc,.update22 ! (Y1_1) if(ay0 >= 0x7f800000) + fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0; +.cont22: + st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0; + fmovdg %fcc1,LTHRESHOLD,%f18 ! (Y2_0) yy0 = LTHRESH; + + subcc counter,1,counter + bneg,pn %icc,.begin + add %l7,stridez,%i5 + + add %l7,stridez,%l7 ! pz += stridez + and %g5,255,%g5 ! (Y1_0) ind0 &= 255; + fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0; + + fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0; + sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3; + faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1; + + fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1); + fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0; + ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0; + fdtoi %f18,%f2 ! (Y2_0) ind0 = (int) yy0; + st %f2,[%fp+tmp1] ! (Y2_0) STORE ind0 + + + fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0; + ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH) + + fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1); + + faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0; + + fitod %f2,%f54 ! (Y2_0) dtmp0 = (double)ind0; + + fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0; + faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1; + + fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH) + + fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0; + + fsubd %f18,%f54,%f26 ! (Y2_0) y0 = yy0 - dtmp0; + + faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0; + + fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH; + + + fmuld KB2,%f26,%f4 ! (Y2_0) dtmp0 = KB2 * y0; + faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0; + + ld [%fp+tmp1],%o0 + fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0; + + st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0; + add %l7,stridez,%o4 ! pz += stridez + fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o4,%i5 + + and %o0,255,%o0 ! (Y2_0) ind0 &= 255; + + fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0; + sll %o0,3,%l7 ! (Y2_0) ind0 <<= 3; + faddd %f4,KB1,%f60 ! (Y2_0) dtmp0 += KB1; + + fpackfix %f2,%f2 ! (Y2_0) dtmp1 = vis_fpackfix(dtmp1); + ldd [%l0+%l7],%f4 ! (Y2_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0; + + st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0 + + fmuld %f60,%f26,%f62 ! (Y2_0) yy0 = dtmp0 * y0; + fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH) + + fpadd32 %f2,%f4,%f46 ! (Y2_0) di0 = vis_fpadd32(di0,dtmp1); + + add %i2,stridey,%i2 ! py += stridey + + fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0; + + fmuld %f62,%f46,%f62 ! (Y2_0) yy0 *= di0; + + fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH) + + + fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0; + + faddd %f62,%f46,%f22 ! (Y2_0) yy0 += di0; + + fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH; + + fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0; + + ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0 + fdtos %f22,%f12 ! (Y2_0) ftmp0 = (float)yy0; + + st %f12,[%o4] ! (Y2_0) *pz0 = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.begin + add %o4,stridez,%i5 + + and %g1,255,%o2 ! (Y0_0) ind0 &= 255; + fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH; + + sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3; + add %o4,stridez,%l7 ! pz += stridez + faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1; + + fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1); + ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0; + st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0 + + fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0; + + fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1); + + fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0; + + fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0; + + fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0; + + faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0; + + fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0; + + ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0; + fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0; + st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0; + + subcc counter,1,counter + bneg .begin + add %l7,stridez,%i5 + + add %l7,stridez,%l7 ! pz += stridez + and %g5,255,%g5 ! (Y1_0) ind0 &= 255; + + sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3; + faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1; + + fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1); + ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0); + + fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0; + + fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1); + + fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0; + + faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0; + + fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0; + + st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0; + ba .begin + add %l7,stridez,%i5 ! pz += stridez + +.exit: + ret + restore + + .align 16 +.specs_exit: + add %i1,stridex,%o2 + add %i3,stridey,%i2 + st %f4,[%i5] + + sub counter,1,counter + ba .begin1 + add %i5,stridez,%i5 + +.spec1: + ld [%l0+2048+64],%f0 ! LOAD 1.0f + or %g0,%i1,%o1 + or %g0,%i3,%o3 + + ld [%o2],%f4 ! *px + or %g0,%o2,%i1 + or %g0,%i2,%i3 + + ld [%i3],%f6 ! *py + or %g0,%l7,%o2 + fsubs %f0,%f0,%f5 ! 0.0f + + sethi %hi(0x7f800000),%l6 + cmp %o4,0 ! ay ? 0 + be,a,pn %icc,.specs_exit ! if(ay == 0) + fmovs %f0,%f4 ! return 1.0f + + cmp %o3,%l6 ! ax0 ? 0x7f800000 + bgu,a %icc,.specs_exit ! ax0 > 0x7f800000 + fmuls %f4,%f6,%f4 ! return *px * *py; /* |X| or |Y| = Nan */ + + cmp %o4,%l6 ! ay ? 0x7f800000 + bgu,a .specs_exit ! ay > 0x7f800000 + fmuls %f4,%f6,%f4 ! return *px * *py; /* |X| or |Y| = Nan */ + + sethi %hi(0x3f800000),%o5 + bne,a %icc,1f ! if (ay != 0x7f800000) { /* |Y| = Inf */ + srl %o1,31,%o1 ! sx = ux >> 31 + + cmp %o3,%o5 ! ax0 ? 0x3f800000 + be,a .specs_exit ! if (ax0 == 0x3f800000) + fmuls %f6,%f5,%f4 ! return *py * 0.0f; /* +-1 ** +-Inf = NaN */ + + sub %o3,%o5,%o3 ! ax0 - 0x3f800000 + srl %o2,31,%o2 ! uy >> 31 + + srlx %o3,63,%o3 ! (ax0 - 0x3f800000) << 63 + + cmp %o3,%o2 ! ((ax0 - 0x3f800000) << 63) ? (uy >> 31) + bne,a .specs_exit + fzeros %f4 ! return 0.f; + + ba .specs_exit + fabss %f6,%f4 ! return fabss(*py) +1: + cmp %o1,0 ! sx ? 0 + be,pn %icc,.spec1_exit ! if (sx == 0) + or %g0,%g0,%o5 ! yisint0 = 0; + + srl %o4,23,%l7 ! exp = ay >> 23; + cmp %l7,0x97 ! exp ? 0x97 + bge,a,pn %icc,.spec1_exit ! if (exp >= 0x97) /* |Y| >= 2^24 */ + add %g0,2,%o5 ! yisint = 2; + + cmp %l7,0x7f ! exp ? 0x7f + bl,pn %icc,.spec1_exit ! if (exp < 0x7f) + sub %g0,%l7,%l7 ! exp = -exp; + + add %l7,(0x7f + 23),%l7 ! exp += (0x07f + 23); + srl %o4,%l7,%l6 ! i0 = ay >> exp + sll %l6,%l7,%l7 ! i0 << exp + + cmp %l7,%o4 ! (i0 << exp) ? ay + bne,pn %icc,.spec1_exit ! if((i0 << exp) != ay) + and %l6,1,%l6 ! i0 &= 1 + + sub %g0,%l6,%l6 ! i0 = -i0; + add %l6,2,%o5 ! yisint0 = 2 + i0; + +.spec1_exit: + srl %o2,31,%o2 ! uy >> 31 + cmp %o2,0 ! (uy >> 31) ? 0 + movne %icc,%g0,%o3 ! if (uy >> 31) ax0 = 0; + + sll %o5,31,%o5 ! yisint0 <<= 31; + add %o5,%o3,%o5 ! ax0 += yisint0; + + add %i1,stridex,%o2 ! px += stridex; + add %i3,stridey,%i2 ! py += stridey; + st %o5,[%i5] ! return *(float*)&ax0; + + sub counter,1,counter ! counter--; + ba .begin1 + add %i5,stridez,%i5 ! pz += stridez; + +.spec2: + or %g0,%i1,%o1 + or %g0,%i3,%o3 + ld [%l0+2048+64],%f0 ! LOAD 1.0f + or %g0,%o2,%i1 + or %g0,%i2,%i3 + + or %g0,%l7,%o2 + cmp %o4,0 ! ay ? 0 + be,a,pn %icc,.specs_exit ! if(ay == 0) + fmovs %f0,%f4 ! return 1.0f + + srl %o3,23,%l7 ! exp0 = (ax0 >> 23); + sub %l7,127,%l7 ! exp = exp0 = exp0 - 127; + + or %g0,%g0,%o5 ! yisint = 0; + cmp %o3,MASK_0x007fffff ! (int)ax0 ? 0x00800000 + bg,pn %icc,1f ! if ((int)ax0 >= 0x00800000) + nop + + ! X = denormal or negative + st %o3,[%fp+tmp0] ! *((float*) &ax0) = (float) (int)ax0; + ld [%fp+tmp0],%f4 + fitos %f4,%f4 + st %f4,[%fp+tmp0] + ld [%fp+tmp0],%o3 + + srl %o3,23,%l7 ! exp = (ax0 >> 23) + sub %l7,127+149,%l7 ! exp -= (127+149) +1: + cmp %o1,0 ! ux ? 0 + bg,a %icc,.spec_proc ! if((int)ux > 0) + sethi %hi(0xffff0000),%l6 + + srl %o4,23,%o0 ! exp = ay >> 23; + cmp %o0,0x97 ! exp ? 0x97 + bge,a,pn %icc,2f ! if (exp >= 0x97) /* |Y| >= 2^24 */ + add %g0,2,%o5 ! yisint0 = 2; /* Y - even */ + + cmp %o0,0x7f ! exp ? 0x7f + bl,pn %icc,2f ! if(exp < 0x7f) + nop + + sub %g0,%o0,%o0 ! exp = -exp; + add %o0,(0x7f + 23),%o0 ! exp += (0x7f + 23) + srl %o4,%o0,%l6 ! i0 = ay >> ((0x7f + 23) - exp); + sll %l6,%o0,%o0 ! i0 << ((0x7f + 23) - exp + cmp %o0,%o4 ! (i0 << ((0x7f + 23) - exp)) ? ay + bne,pn %icc,2f ! if(i0 << ((0x7f + 23) - exp)) != ay) + nop + + and %l6,1,%l6 ! i0 &= 1; + sub %g0,%l6,%l6 ! i0 = -i0; + add %l6,2,%o5 ! yisint = i0 + 2; +2: + cmp %o3,0 ! ax0 ? 0 + bne,pn %icc,4f ! if(ax0 != 0) + nop + + srl %o1,31,%o1 ! sx = ux >> 31 + srl %o2,31,%o2 ! uy >> 31 + + cmp %o2,0 ! (uy >> 31) ? 0 + be,a,pn %icc,3f ! if((uy >> 31) == 0) + fzeros %f4 ! return ZERO + + fdivs %f0,%f3,%f4 ! fy = ONE/ZERO +3: + andcc %o1,%o5,%g0 ! sx & yisint0 + be,pn %icc,.specs_exit ! if( (sx & yisint0) == 0 ) + nop + + ba .specs_exit + fnegs %f4,%f4 ! fy = -fy; +4: + cmp %o5,0 ! ysisint0 ? 0 + be,a %icc,.specs_exit ! if(yisint0 == 0) + fdivs %f3,%f3,%f4 ! return ZERO/ZERO + + sethi %hi(0xffff0000),%l6 + +.spec_proc: + sll %l7,8,%l7 ! exp0 = exp0 << 8; + st %l7,[%fp+tmp1] ! STORE exp0 + and %o3,MASK_0x007fffff,%g5 ! ax0 &= 0x007fffff; + ld [%i3],%f14 ! ftmp0 = py[0] + sllx %o5,63,%o5 ! ysisint0 <<= 63; + add %g5,CONST_0x8000,%o3 ! i0 = ax0 + 0x8000; + stx %o5,[%fp+tmp5] ! STORE yisint0 + and %o3,%l6,%l7 ! i0 &= 0xffff0000; + sub %g5,%l7,%o1 ! i0 = ax0 - i0; + sra %l7,12,%g5 ! ind0 = i0 >> 12; + st %o1,[%fp+tmp2] ! STORE i0 + fstod %f14,%f54 ! dtmp1 = (double)ftmp0 + and %g5,-8,%g5 ! ind0 &= -8; + add %l2,%g5,%l7 ! (char*)__mt_constlog4f + ind0 + ld [%fp+tmp1],%f18 ! LOAD exp0 + ld [%fp+tmp2],%f16 ! LOAD i0 + ldd [%l7+8],%f62 ! dtmp2 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + ldd [%l2+%g5],%f56 ! dtmp3 = *(double *)((char*)__mt_constlog4f + ind0); + fitod %f18,%f58 ! dtmp4 = (double)exp0 + fitod %f16,%f60 ! dtmp5 = (double)i0 + fmuld %f60,%f62,%f60 ! y0 = dtmp5 * dtmp2; + faddd %f56,%f58,%f58 ! yy0 = dtmp3 + dtmp4; + fmuld KA3,%f60,%f52 ! dtmp0 = KA3 * y0; + faddd %f52,KA2,%f50 ! dtmp0 += KA2; + fmuld %f50,%f60,%f48 ! dtmp0 *= y0; + faddd %f48,KA1,%f46 ! dtmp0 += KA1; + fmuld %f46,%f60,%f62 ! dtmp0 *= y0; + ldd [%fp+tmp5],%f24 ! LOAD yisint0 + faddd %f62,KA0,%f56 ! dtmp0 += KA0; + fmuld %f56,%f60,%f52 ! dtmp0 *= y0; + faddd %f58,%f52,%f50 ! yy0 += dtmp1; + fmuld %f54,%f50,%f52 ! yy0 *= dtmp1; + fcmped %fcc0,HTHRESHOLD,%f52 ! if (yy0 >= HTHRESH) + fcmped %fcc1,LTHRESHOLD,%f52 ! yy0 = HTHRESH; + fmovdl %fcc0,HTHRESHOLD,%f52 ! if (yy0 <= LTHRESH) + fmovdg %fcc1,LTHRESHOLD,%f52 ! yy0 = LTHRESH; + fdtoi %f52,%f20 ! ind0 = (int) yy0; + st %f20,[%fp+tmp3] ! STORE ind0 + fitod %f20,%f58 ! dtmp0 = (double) ind0; + fpackfix %f20,%f20 ! dtmp1 = vis_fpackfix(dtmp1) + ld [%fp+tmp3],%g1 ! LOAD ind0 + fsubd %f52,%f58,%f46 ! y0 = yy0 - dtmp0; + fpadd32 %f20,%f24,%f56 ! dtmp1 += yisint0 + and %g1,255,%o4 ! ind0 &= 255; + sll %o4,3,%o3 ! ind0 <<= 3; + ldd [%l0+%o3],%f54 ! di0 = *(double*)((char*)__mt_constexp2f + ind0); + fmuld KB2,%f46,%f48 ! dtmp0 = KB2 * y0; + fpadd32 %f56,%f54,%f56 ! di0 = vis_fpadd32(di0,dtmp1); + faddd %f48,KB1,%f62 ! dtmp0 += KB1; + fmuld %f62,%f46,%f60 ! yy0 = dtmp0 * y0; + fmuld %f60,%f56,%f52 ! yy0 *= di0; + faddd %f52,%f56,%f58 ! yy0 += di0; + ba .specs_exit + fdtos %f58,%f4 ! ftmp0 = (float)yy0; + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + add %i2,stridey,%o1 + stx %o2,[%fp+tmp_px] + + stx %o1,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + nop + + add %i2,stridey,%o1 + stx %o2,[%fp+tmp_px] + + stx %o1,[%fp+tmp_py] + sub counter,1,counter + + st counter,[%fp+tmp_counter] + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + nop + + add %i2,stridey,%o2 + stx %i3,[%fp+tmp_px] + + add %o2,stridey,%o2 + stx %o2,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + nop + + add %i2,stridey,%o2 + stx %i3,[%fp+tmp_px] + + add %o2,stridey,%o2 + stx %o2,[%fp+tmp_py] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + fzeros %f2 + cmp counter,1 + ble .cont6 + nop + + ld [%fp+tmp_counter],%g1 + + sub %o2,stridex,%o3 + stx %o4,[%fp+tmp_py] + + sub %o3,stridex,%o3 + add %g1,counter,counter + stx %o3,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont6 + or %g0,1,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,4 + ble .cont8 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont8 + or %g0,4,counter + + .align 16 +.update9: + cmp counter,2 + ble .cont9 + fzeros %f16 + + ld [%fp+tmp_counter],%i3 + + sub %o2,stridex,%g1 + stx %i2,[%fp+tmp_py] + + sub %g1,stridex,%g1 + add %i3,counter,counter + stx %g1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont9 + or %g0,2,counter + + .align 16 +.update10: + cmp counter,5 + ble .cont10 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont10 + or %g0,5,counter + + .align 16 +.update11: + cmp counter,5 + ble .cont11 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont11 + or %g0,5,counter + + .align 16 +.update12: + fzeros %f0 + cmp counter,3 + ble .cont12 + nop + + ld [%fp+tmp_counter],%o2 + + sub %i3,stridex,%i1 + stx %i2,[%fp+tmp_py] + + sub %i1,stridex,%i1 + add %o2,counter,counter + stx %i1,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont12 + or %g0,3,counter + + .align 16 +.update13: + cmp counter,3 + ble .cont13 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont13 + or %g0,3,counter + + .align 16 +.update14: + cmp counter,3 + ble .cont14 + nop + + sll stridey,1,%g5 + add %i2,stridey,%o3 + stx %o2,[%fp+tmp_px] + + add %o3,%g5,%o3 + stx %o3,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont14 + or %g0,3,counter + + .align 16 +.update15: + cmp counter,1 + ble .cont15 + fzeros %f2 + + ld [%fp+tmp_counter],%g1 + + sub %o2,stridex,%o3 + stx %o4,[%fp+tmp_py] + + sub %o3,stridex,%o3 + add %g1,counter,counter + stx %o3,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + ba .cont15 + or %g0,1,counter + + .align 16 +.update16: + cmp counter,4 + ble .cont16 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont16 + or %g0,4,counter + + .align 16 +.update17: + cmp counter,4 + ble .cont17 + nop + + sll stridey,1,%g1 + add %o4,stridey,%o0 + stx %o2,[%fp+tmp_px] + + add %o0,%g1,%o0 + stx %o0,[%fp+tmp_py] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + ba .cont17 + or %g0,4,counter + + .align 16 +.update18: + fzeros %f16 + cmp counter,2 + ble .cont18 + nop + + ld [%fp+tmp_counter],%i3 + + sub %o2,stridex,%g1 + stx %i2,[%fp+tmp_py] + + sub %g1,stridex,%g1 + add %i3,counter,counter + stx %g1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + ba .cont18 + or %g0,2,counter + + .align 16 +.update19: + cmp counter,5 + ble .cont19 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont19 + or %g0,5,counter + + .align 16 +.update20: + cmp counter,5 + ble .cont20 + nop + + add %i2,stridey,%i1 + stx %i3,[%fp+tmp_px] + + add %i1,stridey,%i1 + stx %i1,[%fp+tmp_py] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + ba .cont20 + or %g0,5,counter + + .align 16 +.update21: + cmp counter,3 + ble .cont21 + fzeros %f0 + + ld [%fp+tmp_counter],%o2 + + sub %i3,stridex,%i1 + stx %i2,[%fp+tmp_py] + + sub %i1,stridex,%i1 + add %o2,counter,counter + stx %i1,[%fp+tmp_px] + + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont21 + or %g0,3,counter + + .align 16 +.update22: + cmp counter,3 + ble .cont22 + fzeros %f2 + + ld [%fp+tmp_counter],%g1 + + sub %i3,stridex,%i2 + stx %i2,[%fp+tmp_px] + + add %g1,counter,counter + stx %o4,[%fp+tmp_py] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + ba .cont22 + or %g0,3,counter + +.stridex_zero: + ld [%fp+tmp_counter],counter + + stx %i3,[%fp+tmp_py] + + cmp counter,0 + ble,pn %icc,.exit + lda [%i1]0x82,%i1 ! (Y0_2) ax0 = *px; + + and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff; + sub %i3,%l6,%l6 + and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff; + srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23; + srl %l6,31,%l6 + st %l6,[%fp+tmp5] + add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000; + sethi %hi(0xffff0000),%l6 + sub %o3,127,%o3 ! (Y0_2) exp0 -= 127; + and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000; + sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8; + st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0 + sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12; + sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0; + st %o4,[%fp+tmp2] ! (Y0_2) STORE i0 + and %o0,-8,%g5 ! (Y0_2) ind0 &= -8; + ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0; + add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0 + ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8); + fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0; + fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1; + fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0; + faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2; + fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0; + faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1; + ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0; + fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0; + fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0; + ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0); + faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0; + faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1; + fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0; + sub %l2,3200,%o4 + sub %l2,1152-600,%o3 + faddd %f26,%f48,%f46 ! (Y0_1) yy0 += dtmp0; + or %g0,%i5,%g1 + sethi %hi(0x7f800000),%o1 + +.xbegin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_py],%o5 + st %g0,[%fp+tmp_counter] +.xbegin1: + subcc counter,1,counter + bneg,pn %icc,.exit + nop + + lda [%o5]0x82,%i5 ! (Y0_0) ay = py[0]; + + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + and %i5,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + + cmp %i3,%o1 + bge,pn %icc,.xspec + nop + + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + add %o5,stridey,%o5 ! py += stridey + + lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0]; + + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + + and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + cmp %i5,%o1 + bge,pn %icc,.xupdate0 + nop + +.xcont0: + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + + add %o5,stridey,%o5 ! py += stridey + fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy; + + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + cmp %i3,%o1 + bge,pn %icc,.xupdate1 + fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont1: + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH) + + fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy; + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0]; + + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH; + + and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH) + + cmp %l7,%o1 + bge,pn %icc,.xupdate2 + nop +.xcont2: + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH; + + fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH) + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + + add %o5,stridey,%o5 ! py += stridey + fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH; + + cmp %i3,%o1 + bge,pn %icc,.xupdate3 + fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont3: + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH; + + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH; + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + cmp %i5,%o1 + bge,pn %icc,.xupdate4 +.xcont4: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate5 + fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont5: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %g1,stridez,%i3 ! pz += stridez + st %f1,[%g1] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + add %i3,stridez,%i5 ! pz += stridez + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + cmp %l7,%o1 + bge,pn %icc,.xupdate6 + +.xcont6: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + cmp counter,8 + bl,pn %icc,.xtail + nop + + ba .xmain_loop + nop + + .align 16 +.xmain_loop: + fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + sub counter,4,counter + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate7 + fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont7: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH; + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + cmp %i5,%o1 + bge,pn %icc,.xupdate8 + +.xcont8: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + add %i3,stridez,%i5 ! pz += stridez + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate9 + fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont9: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + add %o5,stridey,%o5 ! py += stridey + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0]; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + add %i3,stridez,%i5 ! pz += stridez + lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0]; + fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + cmp %l7,%o1 + bge,pn %icc,.xupdate10 +.xcont10: + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + cmp counter,4 + bge,pt %icc,.xmain_loop + fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0; + +.xtail: + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + add %o5,stridey,%o5 ! py += stridey + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy; + + fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0; + lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0]; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff + fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + cmp %i3,%o1 + bge,pn %icc,.xupdate11 + fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH) +.xcont11: + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0; + + fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH) + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH) + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + add %i3,stridez,%i5 ! pz += stridez + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH; + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH) + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0 + + fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + + fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + + fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0; + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0]; + + fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0; + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0 + + fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + add %i3,stridez,%i5 ! pz += stridez + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0 + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0; + + sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6; + and %g5,255,%i1 ! (Y1_3) ii0 &= 255; + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3; + and %i0,-4,%i0 ! (Y1_3) i0 &= -4; + + fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0; + ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]; + + faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1; + ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + + fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0; + + fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0; + + fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0; + + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i3,%g1 + + ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0 + fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0; + + fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0; + sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6; + and %i2,255,%l7 ! (Y0_2) ii0 &= 255; + + sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3; + and %l6,-4,%g5 ! (Y0_2) i0 &= -4; + + faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1; + ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0] + + ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0]; + + faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0; + + fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0; + + fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0; + + fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0; + add %i3,stridez,%i5 ! pz += stridez + st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0; + + subcc counter,1,counter + bneg,pn %icc,.xbegin + or %g0,%i5,%g1 + + fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0; + + faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0; + + fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0; + add %i5,stridez,%i3 ! pz += stridez + st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0; + + ba .xbegin + or %g0,%i3,%g1 + +.xspec: + bg,a,pn %icc,.yisnan ! if (ay > 0x7f800000) /* |Y| = Nan */ + ld [%o5],%f8 ! fy = *py; + + ld [%fp+tmp5],%l6 ! LOAD (ax-0x3f800000)<<63 + srl %i5,31,%i5 ! uy >> 31 + + cmp %l6,%i5 ! if((ax < 0x3f800000) != (uy >> 31)) + be,a,pn %icc,.xspec_exit ! if((ax < 0x3f800000) != (uy >> 31)) + st %i3,[%g1] ! fy = *(float*)&ay; + + st %g0,[%g1] ! fy = ZERO + add %g1,stridez,%g1 + ba .xbegin1 + add %o5,stridey,%o5 + +.yisnan: + fmuls %f8,%f8,%f8 ! fy = *py * *py; /* |Y| = Nan */ + st %f8,[%g1] + +.xspec_exit: + add %g1,stridez,%g1 + ba .xbegin1 + add %o5,stridey,%o5 + + .align 16 +.xupdate0: + cmp counter,0 + ble .xcont0 + fzeros %f7 + + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont0 + or %g0,0,counter + + .align 16 +.xupdate1: + cmp counter,1 + ble .xcont1 + fzeros %f5 + + sub counter,1,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont1 + or %g0,1,counter + + .align 16 +.xupdate2: + cmp counter,2 + ble .xcont2 + fzeros %f7 + + sub counter,2,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont2 + or %g0,2,counter + + .align 16 +.xupdate3: + cmp counter,3 + ble .xcont3 + fzeros %f5 + + sub counter,3,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont3 + or %g0,3,counter + + .align 16 +.xupdate4: + cmp counter,4 + ble .xcont4 + fzeros %f7 + + sub counter,4,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont4 + or %g0,4,counter + + .align 16 +.xupdate5: + cmp counter,5 + ble .xcont5 + fzeros %f5 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont5 + or %g0,5,counter + + .align 16 +.xupdate6: + cmp counter,5 + ble .xcont6 + fzeros %f7 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont6 + or %g0,5,counter + + .align 16 +.xupdate7: + cmp counter,2 + ble .xcont7 + fzeros %f5 + + sub counter,2,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont7 + or %g0,2,counter + + .align 16 +.xupdate8: + cmp counter,3 + ble .xcont8 + fzeros %f7 + + sub counter,3,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont8 + or %g0,3,counter + + .align 16 +.xupdate9: + cmp counter,4 + ble .xcont9 + fzeros %f5 + + sub counter,4,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont9 + or %g0,4,counter + + .align 16 +.xupdate10: + cmp counter,5 + ble .xcont10 + fzeros %f7 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont10 + or %g0,5,counter + + .align 16 +.xupdate11: + cmp counter,5 + ble .xcont11 + fzeros %f5 + + sub counter,5,counter + stx %o5,[%fp+tmp_py] + + st counter,[%fp+tmp_counter] + ba .xcont11 + or %g0,5,counter + + SET_SIZE(__vpowf) + diff --git a/usr/src/libm/src/mvec/vis/__vrhypot.S b/usr/src/libm/src/mvec/vis/__vrhypot.S new file mode 100644 index 0000000..07954d6 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vrhypot.S @@ -0,0 +1,3878 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vrhypot.S 1.7 06/01/23 SMI" + + .file "__vrhypot.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, + .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, + .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, + .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, + .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, + .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, + .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, + .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, + .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, + .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, + .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, + .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, + .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, + .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, + .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, + .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, + .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, + .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, + .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, + .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, + .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, + .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, + .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, + .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, + .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, + .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, + .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, + .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, + .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, + .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, + .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, + .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, + + .word 0x42300000, 0 ! D2ON36 = 2**36 + .word 0xffffff00, 0 ! DA0 + .word 0xfff00000, 0 ! DA1 + .word 0x3ff00000, 0 ! DONE = 1.0 + .word 0x40000000, 0 ! DTWO = 2.0 + .word 0x7fd00000, 0 ! D2ON1022 + .word 0x3cb00000, 0 ! D2ONM52 + .word 0x43200000, 0 ! D2ON51 + .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff + +#define stridex %l2 +#define stridey %l3 +#define stridez %l5 + +#define TBL_SHIFT 512 + +#define TBL %l1 +#define counter %l4 + +#define _0x7ff00000 %l0 +#define _0x00100000 %o5 +#define _0x7fffffff %l6 + +#define D2ON36 %f4 +#define DTWO %f6 +#define DONE %f8 +#define DA0 %f58 +#define DA1 %f56 + +#define dtmp0 STACK_BIAS-0x80 +#define dtmp1 STACK_BIAS-0x78 +#define dtmp2 STACK_BIAS-0x70 +#define dtmp3 STACK_BIAS-0x68 +#define dtmp4 STACK_BIAS-0x60 +#define dtmp5 STACK_BIAS-0x58 +#define dtmp6 STACK_BIAS-0x50 +#define dtmp7 STACK_BIAS-0x48 +#define dtmp8 STACK_BIAS-0x40 +#define dtmp9 STACK_BIAS-0x38 +#define dtmp10 STACK_BIAS-0x30 +#define dtmp11 STACK_BIAS-0x28 +#define dtmp12 STACK_BIAS-0x20 +#define dtmp13 STACK_BIAS-0x18 +#define dtmp14 STACK_BIAS-0x10 +#define dtmp15 STACK_BIAS-0x08 + +#define ftmp0 STACK_BIAS-0x100 +#define tmp_px STACK_BIAS-0x98 +#define tmp_py STACK_BIAS-0x90 +#define tmp_counter STACK_BIAS-0x88 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x100 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! hx0 = *(int*)px; +! hy0 = *(int*)py; +! +! ((float*)&x0)[0] = ((float*)px)[0]; +! ((float*)&x0)[1] = ((float*)px)[1]; +! ((float*)&y0)[0] = ((float*)py)[0]; +! ((float*)&y0)[1] = ((float*)py)[1]; +! +! hx0 &= 0x7fffffff; +! hy0 &= 0x7fffffff; +! +! diff0 = hy0 - hx0; +! j0 = diff0 >> 31; +! j0 &= diff0; +! j0 = hy0 - j0; +! j0 &= 0x7ff00000; +! +! j0 = 0x7ff00000 - j0; +! ll = (long long)j0 << 32; +! *(long long*)&scl0 = ll; +! +! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! +! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; +! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; +! else res0 = fabs(x0) * fabs(y0); +! +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! ii = hx0 | hy0; +! ii |= lx; +! ii |= ly; +! if ( ii == 0 ) +! { +! res0 = 1.0 / 0.0; +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! x0 = fabs(x0); +! y0 = fabs(y0); +! if ( hx0 < 0x00080000 ) +! { +! x0 = *(long long*)&x0; +! } +! else +! { +! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; +! x0 = vis_fand(x0, dtmp0); +! x0 = *(long long*)&x0; +! x0 += D2ON51; +! } +! x0 *= D2ONM52; +! if ( hy0 < 0x00080000 ) +! { +! y0 = *(long long*)&y0; +! } +! else +! { +! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; +! y0 = vis_fand(y0, dtmp0); +! y0 = *(long long*)&y0; +! y0 += D2ON51; +! } +! y0 *= D2ONM52; +! *(long long*)&scl0 = 0x7fd0000000000000ULL; +! } +! else +! { +! x0 *= scl0; +! y0 *= scl0; +! } +! +! x_hi0 = x0 + D2ON36; +! y_hi0 = y0 + D2ON36; +! x_hi0 -= D2ON36; +! y_hi0 -= D2ON36; +! x_lo0 = x0 - x_hi0; +! y_lo0 = y0 - y_hi0; +! res0_hi = x_hi0 * x_hi0; +! dtmp0 = y_hi0 * y_hi0; +! res0_hi += dtmp0; +! res0_lo = x0 + x_hi0; +! res0_lo *= x_lo0; +! dtmp1 = y0 + y_hi0; +! dtmp1 *= y_lo0; +! res0_lo += dtmp1; +! +! dres = res0_hi + res0_lo; +! dexp0 = vis_fand(dres,DA1); +! iarr = ((int*)&dres)[0]; +! +! iarr >>= 11; +! iarr &= 0x1fc; +! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; +! dd = vis_fpsub32(dtmp0, dexp0); +! +! dtmp0 = dd * dres; +! dtmp0 = DTWO - dtmp0; +! dd *= dtmp0; +! dtmp1 = dd * dres; +! dtmp1 = DTWO - dtmp1; +! dd *= dtmp1; +! dtmp2 = dd * dres; +! dtmp2 = DTWO - dtmp2; +! dres = dd * dtmp2; +! +! res0 = vis_fand(dres,DA0); +! +! dtmp0 = res0_hi * res0; +! dtmp0 = DONE - dtmp0; +! dtmp1 = res0_lo * res0; +! dtmp0 -= dtmp1; +! dtmp0 *= dres; +! res0 += dtmp0; +! +! res0 = sqrt ( res0 ); +! +! res0 = scl0 * res0; +! +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrhypot) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l1) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + sll %i2,3,stridex + sethi %hi(0x7ff00000),_0x7ff00000 + st %i0,[%fp+tmp_counter] + + sll %i4,3,stridey + sethi %hi(0x00100000),_0x00100000 + stx %i1,[%fp+tmp_px] + + sll stridez,3,stridez + sethi %hi(0x7ffffc00),_0x7fffffff + stx %i3,[%fp+tmp_py] + + ldd [TBL+TBL_SHIFT],D2ON36 + add _0x7fffffff,1023,_0x7fffffff + + ldd [TBL+TBL_SHIFT+8],DA0 + + ldd [TBL+TBL_SHIFT+16],DA1 + + ldd [TBL+TBL_SHIFT+24],DONE + + ldd [TBL+TBL_SHIFT+32],DTWO + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i4 + ldx [%fp+tmp_py],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + add %i4,stridex,%i1 + + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; +.cont_spec0: + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; +.cont_spec1: + lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; + mov %i1,%i2 + + lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; + + and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; + mov %i0,%o0 + + cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) + and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; + + cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; + bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) + sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; + + cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 + + and %o1,%o3,%o1 ! (0_0) j0 &= diff0; + bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) + sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; +.cont0: + and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; + + sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; +.cont1: + sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; + + ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; + + lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; + + lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; + add %i4,stridex,%i1 ! px += stridex + + fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; + + lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; + + add %i0,stridey,%i3 ! py += stridey + faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + + and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; + bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; + bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (1_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; + bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; +.cont4: + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; + ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; + + lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; + + lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; + faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; + + lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; + mov %i1,%i2 + + faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; + + faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 + bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + + and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (2_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; + bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; +.cont7: + sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; +.cont8: + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; + ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; + + ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; + add %i4,stridex,%i1 ! px += stridex + fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; + sra %o2,11,%i3 ! (7_1) iarr >>= 11; + faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; + + add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; + + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; + + faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) + fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (3_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; + bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; +.cont11: + sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; +.cont12: + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; + ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; + + lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; + + lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; + + lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; + sra %o2,11,%o4 ! (0_0) iarr >>= 11; + faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; + + add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr + mov %i1,%i2 + lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; + + ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; + faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; + bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) + st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (4_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; + bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; +.cont15: + sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; +.cont16: + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; + ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; + + lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; + add %i1,stridex,%i4 ! px += stridex + + fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; + add %i4,stridex,%i1 ! px += stridex + fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; + sra %o2,11,%i3 ! (1_0) iarr >>= 11; + faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; + fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; + + add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; + + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; + faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (5_0) j0 &= diff0; + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; + bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; +.cont19a: + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; +.cont19b: + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; +.cont20: + fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; + ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; + lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; + + lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; + + lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; + fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; + sra %o2,11,%o4 ! (2_0) iarr >>= 11; + faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; + fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; + + add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr + mov %i1,%i2 + lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; + ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; + faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (6_0) j0 &= diff0; + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; + bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; +.cont23a: + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; +.cont23b: + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; +.cont24: + fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; + ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; + faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; + + fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; + fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; + sra %o2,11,%i3 ! (3_0) iarr >>= 11; + faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; + fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; + + fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; + + fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; +.cont27a: + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; +.cont27b: + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; +.cont28: + fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; + ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; + + fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; + lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; + + lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; + fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; + ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; + sra %o2,11,%o4 ! (4_0) iarr >>= 11; + faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; + fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; + + cmp counter,8 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,8,counter + + .align 16 +.main_loop: + fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; + + faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; + st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; + fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; + + fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; + st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; + bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; + bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (0_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 + bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); +.cont31: + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; + nop + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; +.cont32: + fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; + sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; + faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; + nop + bn,pn %icc,.exit + fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; + + nop + nop + lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + nop + lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; + fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; + nop + ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; + sra %o2,11,%i3 ! (5_1) iarr >>= 11; + nop + faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; + + fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); + lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; + add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; + + faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; + st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; + fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; + nop + bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; + bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (1_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; + bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; +.cont35a: + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; +.cont35b: + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; +.cont36: + fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; + fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; + nop + ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; + sra %o2,11,%g1 ! (6_1) iarr >>= 11; + nop + faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; + + nop + and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; + bn,pn %icc,.exit + fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); + lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; + add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr + fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; + + faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; + nop + bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; + bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (2_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; + bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; +.cont39a: + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; +.cont39b: + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; +.cont40: + fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; + nop + lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + nop + lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; + nop + ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; + fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; + sra %o2,11,%i3 ! (7_1) iarr >>= 11; + nop + faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; + fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; + + faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; + nop + bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; + bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (3_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; + bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; +.cont43a: + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; +.cont43b: + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 +.cont44: + fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; + nop + ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; + sra %o2,11,%o4 ! (0_0) iarr >>= 11; + nop + faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; + mov %i1,%i2 + ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; + + fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; + nop + and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; + faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 + st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; + fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (4_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 + bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); +.cont47a: + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; +.cont47b: + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; +.cont48: + fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; + nop + lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; + + lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; + add %i1,stridex,%i4 ! px += stridex + nop + bn,pn %icc,.exit + + fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; + fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; + nop + ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; + sra %o2,11,%i3 ! (1_0) iarr >>= 11; + nop + faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; + + fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; + nop + faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 + st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; + fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (5_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 + bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); +.cont51a: + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; +.cont51b: + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; +.cont52: + fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; + nop + ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; + sra %o2,11,%o4 ! (2_0) iarr >>= 11; + nop + faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; + + fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; + nop + faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 + st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (6_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 + bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); +.cont55a: + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; +.cont55b: + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; +.cont56: + fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; + faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; + nop + nop + fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; + nop + nop + fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; + + fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; + + nop + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; + nop + ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; + fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; + sra %o2,11,%i3 ! (3_0) iarr >>= 11; + nop + faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; + + fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; + + fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + nop + faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; + + fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); +.cont59a: + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; +.cont59b: + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; +.cont60: + fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; + + fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; + nop + lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; + bn,pn %icc,.exit + + lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; + nop + nop + fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; + fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; + nop + ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; + sra %o2,11,%o4 ! (4_0) iarr >>= 11; + nop + faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; + subcc counter,8,counter ! counter -= 8; + bpos,pt %icc,.main_loop + fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; + + add counter,8,counter + +.tail: + subcc counter,1,counter + bneg .begin + nop + + fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; + ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + + fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; + st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; + faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; + + subcc counter,1,counter + st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; + st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; + + fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; + + fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; + + fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; + + fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; + fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); + + ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; + + sra %o2,11,%i3 ! (5_1) iarr >>= 11; + + and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; + fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; + + fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); + add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; + ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + + fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; + fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); + + st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; + fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; + + fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; + + fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; + + ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; + + fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); + fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; + + fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; + + st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + subcc counter,1,counter + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; + + fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; + + ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; + fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; + + fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); + + fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; + + fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; + + st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; + + ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; + + fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); + + fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; + + fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; + st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; + + fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); + + fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 + st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; + + fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; + st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + + ba .begin + add %i5,stridez,%i5 + + .align 16 +.spec0: + cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 + bne 1f ! if ( hx0 != 0x7ff00000 ) + ld [%i4+4],%i2 ! lx = ((int*)px)[1]; + + cmp %i2,0 ! lx ? 0 + be 3f ! if ( lx == 0 ) + nop +1: + cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 + bne 2f ! if ( hy0 != 0x7ff00000 ) + ld [%i3+4],%o2 ! ly = ((int*)py)[1]; + + cmp %o2,0 ! ly ? 0 + be 3f ! if ( ly == 0 ) +2: + ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; + ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; + + ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; + add %i4,stridex,%i4 ! px += stridex + ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; + + fabsd %f0,%f0 + + fabsd %f2,%f2 + + fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); + add %i3,stridey,%i3 ! py += stridey; + st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; + + st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; + add %i5,stridez,%i5 ! pz += stridez + ba .begin1 + sub counter,1,counter +3: + add %i4,stridex,%i4 ! px += stridex + add %i3,stridey,%i3 ! py += stridey + st %g0,[%i5] ! ((int*)pz)[0] = 0; + + add %i5,stridez,%i5 ! pz += stridez; + st %g0,[%i5+4] ! ((int*)pz)[1] = 0; + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + + cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 + bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) + + ld [%i4+4],%i2 ! lx = ((int*)px)[1]; + or %o7,%l7,%g5 ! ii = hx0 | hy0; + fzero %f0 + + ld [%i3+4],%o2 ! ly = ((int*)py)[1]; + or %i2,%g5,%g5 ! ii |= lx; + + orcc %o2,%g5,%g5 ! ii |= ly; + bnz,a,pn %icc,1f ! if ( ii != 0 ) + sethi %hi(0x00080000),%i2 + + fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; + + st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; + + add %i4,stridex,%i4 ! px += stridex; + add %i3,stridey,%i3 ! py += stridey; + st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; + + add %i5,stridez,%i5 ! pz += stridez; + ba .begin1 + sub counter,1,counter +1: + ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; + + ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; + + ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; + + fabsd %f0,%f0 ! x0 = fabs(x0); + ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; + + ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; + add %fp,dtmp2,%i4 + add %fp,dtmp3,%i3 + + fabsd %f2,%f2 ! y0 = fabs(y0); + ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 + + ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 + cmp %o7,%i2 ! hx0 ? 0x00080000 + bl,a 1f ! if ( hx0 < 0x00080000 ) + fxtod %f0,%f0 ! x0 = *(long long*)&x0; + + fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); + fxtod %f0,%f0 ! x0 = *(long long*)&x0; + faddd %f0,%f10,%f0 ! x0 += D2ON51; +1: + std %f0,[%i4] + + ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 + cmp %l7,%i2 ! hy0 ? 0x00080000 + bl,a 1f ! if ( hy0 < 0x00080000 ) + fxtod %f2,%f2 ! y0 = *(long long*)&y0; + + fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); + fxtod %f2,%f2 ! y0 = *(long long*)&y0; + faddd %f2,%f10,%f2 ! y0 += D2ON51; +1: + std %f2,[%i3] + + stx %g5,[%fp+dtmp15] ! D2ONM52 + + ba .cont_spec1 + stx %g1,[%fp+dtmp0] ! D2ON1022 + + .align 16 +.update0: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + ba .cont1 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update1: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,1 + ble,a 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 1,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + ba .cont1 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update2: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update3: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update4: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; + + cmp counter,2 + ble,a 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 2,counter + stx %i3,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update5: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update6: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update7: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,3 + ble,a 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 3,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update9: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update10: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update11: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,4 + ble,a 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 4,counter + stx %i3,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update13: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update14: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update15: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,5 + ble,a 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 5,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update17: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont20 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update18: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont20 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update19: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,6 + ble,a 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 6,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + ba .cont19b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update21: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont24 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update22: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont24 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update23: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,7 + ble,a 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 7,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + ba .cont23b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update25: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont28 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update26: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont28 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update27: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,8 + ble,a 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 8,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + ba .cont27b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update29: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update30: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update31: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,1 + ble,a 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 1,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update33: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + ba .cont36 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update34: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + ba .cont36 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update35: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,2 + ble,a 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 2,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + ba .cont35b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update37: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + ba .cont40 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update38: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + ba .cont40 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update39: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,3 + ble,a 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 3,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + ba .cont39b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update41: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + ba .cont44 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update42: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + ba .cont44 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update43: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,4 + ble,a 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 4,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + ba .cont43b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update45: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + ba .cont48 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update46: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + ba .cont48 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update47: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,5 + ble,a 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 5,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + ba .cont47b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update49: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont52 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update50: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont52 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update51: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,6 + ble,a 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 6,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + ba .cont51b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update53: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont56 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update54: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont56 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update55: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,7 + ble,a 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 7,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + ba .cont55b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update57: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont60 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update58: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont60 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update59: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,8 + ble,a 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 8,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + ba .cont59b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.exit: + ret + restore + SET_SIZE(__vrhypot) + diff --git a/usr/src/libm/src/mvec/vis/__vrhypotf.S b/usr/src/libm/src/mvec/vis/__vrhypotf.S new file mode 100644 index 0000000..8db59bc --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vrhypotf.S @@ -0,0 +1,1518 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vrhypotf.S 1.5 06/01/23 SMI" + + .file "__vrhypotf.S" + +#include "libm.h" + + RO_DATA + .align 64 +.CONST_TBL: +! i = [0,63] +! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); +! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); +! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); +! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); + + .word 0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd, + .word 0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03, + .word 0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2, + .word 0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671, + .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911, + .word 0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342, + .word 0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a, + .word 0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9, + .word 0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555, + .word 0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54, + .word 0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70, + .word 0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032, + .word 0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74, + .word 0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92, + .word 0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f, + .word 0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3, + .word 0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f, + .word 0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199, + .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577, + .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58, + .word 0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03, + .word 0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37, + .word 0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e, + .word 0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92, + .word 0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826, + .word 0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0, + .word 0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91, + .word 0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50, + .word 0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e, + .word 0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428, + .word 0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4, + .word 0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5, + .word 0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c, + .word 0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55, + .word 0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492, + .word 0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a, + .word 0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a, + .word 0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d, + .word 0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9, + .word 0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3, + .word 0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896, + .word 0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f, + .word 0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9, + .word 0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee, + .word 0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4, + .word 0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62, + .word 0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db, + .word 0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253, + .word 0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a, + .word 0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26, + .word 0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad, + .word 0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c, + .word 0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc, + .word 0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412, + .word 0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488, + .word 0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499, + .word 0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db, + .word 0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438, + .word 0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a, + .word 0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa, + .word 0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d, + .word 0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72, + .word 0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a, + .word 0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9, + .word 0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000, + .word 0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9, + .word 0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b, + .word 0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc, + .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c, + .word 0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957, + .word 0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2, + .word 0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc, + .word 0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66, + .word 0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350, + .word 0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549, + .word 0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d, + .word 0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937, + .word 0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86, + .word 0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213, + .word 0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358, + .word 0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9, + .word 0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c, + .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2, + .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b, + .word 0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39, + .word 0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118, + .word 0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347, + .word 0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11, + .word 0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550, + .word 0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e, + .word 0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169, + .word 0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394, + .word 0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a, + .word 0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c, + .word 0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7, + .word 0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899, + .word 0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e, + .word 0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee, + .word 0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458, + .word 0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588, + .word 0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a, + .word 0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54, + .word 0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44, + .word 0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31, + .word 0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c, + .word 0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96, + .word 0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009, + .word 0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3, + .word 0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426, + .word 0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6, + .word 0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d, + .word 0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2, + .word 0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7, + .word 0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d, + .word 0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1, + .word 0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5, + .word 0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88, + .word 0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72, + .word 0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729, + .word 0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea, + .word 0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098, + .word 0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746, + .word 0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5, + .word 0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f, + .word 0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467, + .word 0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1, + .word 0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d, + .word 0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6, + + .word 0x000fffff, 0xffffffff ! DC0 + .word 0x3ff00000, 0 ! DC1 + .word 0x7fffc000, 0 ! DC2 + .word 0x7fe00000, 0 ! DA0 + .word 0x60000000, 0 ! DA1 + .word 0x80808080, 0x3f800000 ! SCALE , FONE = 1.0f + .word 0x3fefffff, 0xfee7f18f ! KA0 = 9.99999997962321453275e-01 + .word 0xbfdfffff, 0xfe07e52f ! KA1 = -4.99999998166077580600e-01 + .word 0x3fd80118, 0x0ca296d9 ! KA2 = 3.75066768969515586277e-01 + .word 0xbfd400fc, 0x0bbb8e78 ! KA3 = -3.12560092408808548438e-01 + +#define _0x7f800000 %o0 +#define _0x7fffffff %o7 +#define TBL %l2 + +#define TBL_SHIFT 2048 + +#define stridex %l3 +#define stridey %l4 +#define stridez %l5 +#define counter %i0 + +#define DA0 %f52 +#define DA1 %f44 +#define SCALE %f6 + +#define DC0 %f46 +#define DC1 %f8 +#define FZERO %f9 +#define DC2 %f50 + +#define KA3 %f56 +#define KA2 %f58 +#define KA1 %f60 +#define KA0 %f54 + +#define tmp_counter STACK_BIAS-0x04 +#define tmp_px STACK_BIAS-0x20 +#define tmp_py STACK_BIAS-0x18 + +#define ftmp0 STACK_BIAS-0x10 +#define ftmp1 STACK_BIAS-0x0c +#define ftmp2 STACK_BIAS-0x10 +#define ftmp3 STACK_BIAS-0x0c +#define ftmp4 STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! x0 = *px; +! ax = *(int*)px; +! +! y0 = *py; +! ay = *(int*)py; +! +! ax &= 0x7fffffff; +! ay &= 0x7fffffff; +! +! px += stridex; +! py += stridey; +! +! if ( ax >= 0x7f800000 || ay >= 0x7f800000 ) +! { +! *pz = fabsf(x0) * fabsf(y0); +! if( ax == 0x7f800000 ) *pz = 0.0f; +! else if( ay == 0x7f800000 ) *pz = 0.0f; +! pz += stridez; +! continue; +! } +! +! if ( ay == 0 ) +! { +! if ( ax == 0 ) +! { +! *pz = 1.0f / 0.0f; +! pz += stridez; +! continue; +! } +! } +! +! hyp0 = x0 * (double)x0; +! dtmp0 = y0 * (double)y0; +! hyp0 += dtmp0; +! +! ibase0 = ((int*)&hyp0)[0]; +! +! dbase0 = vis_fand(hyp0,DA0); +! dbase0 = vis_fmul8x16(SCALE, dbase0); +! dbase0 = vis_fpsub32(DA1,dbase0); +! +! hyp0 = vis_fand(hyp0,DC0); +! hyp0 = vis_for(hyp0,DC1); +! h_hi0 = vis_fand(hyp0,DC2); +! +! ibase0 >>= 10; +! si0 = ibase0 & 0x7f0; +! xx0 = ((double*)((char*)TBL + si0))[0]; +! +! dtmp1 = hyp0 - h_hi0; +! xx0 = dtmp1 * xx0; +! res0 = ((double*)((char*)arr + si0))[1]; +! dtmp2 = KA3 * xx0; +! dtmp2 += KA2; +! dtmp2 *= xx0; +! dtmp2 += KA1; +! dtmp2 *= xx0; +! dtmp2 += KA0; +! res0 *= dtmp2; +! res0 *= dbase0; +! ftmp0 = (float)res0; +! *pz = ftmp0; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrhypotf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + stx %i1,[%fp+tmp_px] + sll %i2,2,stridex + + stx %i3,[%fp+tmp_py] + sll %i4,2,stridey + + st %i0,[%fp+tmp_counter] + sll stridez,2,stridez + mov %i5,%o1 + + ldd [TBL+TBL_SHIFT],DC0 + ldd [TBL+TBL_SHIFT+8],DC1 + ldd [TBL+TBL_SHIFT+16],DC2 + ldd [TBL+TBL_SHIFT+24],DA0 + ldd [TBL+TBL_SHIFT+32],DA1 + ldd [TBL+TBL_SHIFT+40],SCALE + ldd [TBL+TBL_SHIFT+48],KA0 + + ldd [TBL+TBL_SHIFT+56],KA1 + sethi %hi(0x7f800000),%o0 + + ldd [TBL+TBL_SHIFT+64],KA2 + sethi %hi(0x7ffffc00),%o7 + + ldd [TBL+TBL_SHIFT+72],KA3 + add %o7,1023,%o7 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%o4 + ldx [%fp+tmp_py],%i2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i2]0x82,%l6 ! (3_0) ay = *(int*)py; + + lda [%o4]0x82,%i5 ! (3_0) ax = *(int*)px; + + lda [%i2]0x82,%f2 ! (3_0) y0 = *py; + and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; + + and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; + cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 + bge,pn %icc,.spec0 ! (3_0) if ( ay >= 0x7f800000 ) + lda [%o4]0x82,%f4 ! (3_0) x0 = *px; + + cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.spec0 ! (3_0) if ( ax >= 0x7f800000 ) + nop + + cmp %l6,0 ! (3_0) + be,pn %icc,.spec1 ! (3_0) if ( ay == 0 ) + fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; +.cont_spec1: + lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; + lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; + + add %o4,stridex,%l0 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; + + and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (4_0) y0 = *py; + + faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 + + bge,pn %icc,.update0 ! (4_0) if ( ay >= 0x7f800000 ) + lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; +.cont0: + cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 + bge,pn %icc,.update1 ! (4_0) if ( ax >= 0x7f800000 ) + st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; +.cont1: + cmp %l6,0 ! (4_1) ay ? 0 + be,pn %icc,.update2 ! (4_1) if ( ay == 0 ) + fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; +.cont2: + lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; + lda [%l0+stridex]0x82,%i5 ! (0_0) ax = *(int*)px; + + add %l0,stridex,%i1 ! px += stridex + + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; + + and %i5,_0x7fffffff,%i5 ! (0_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (0_0) y0 = *py; + + cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 + bge,pn %icc,.update3 ! (0_0) if ( ay >= 0x7f800000 ) + faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; +.cont3: + lda [%i1]0x82,%f4 ! (0_0) x0 = *px; + + cmp %i5,_0x7f800000 ! (0_0) ax ? 0x7f800000 + bge,pn %icc,.update4 ! (0_0) if ( ax >= 0x7f800000 ) + st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; +.cont4: + cmp %l6,0 ! (0_0) ay ? 0 + be,pn %icc,.update5 ! (0_0) if ( ay == 0 ) + fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; +.cont5: + lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; + lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; + + add %i1,stridex,%g5 ! px += stridex + + add %i2,stridey,%o3 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; + fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; + lda [%o3]0x82,%f2 ! (1_0) y0 = *py; + + faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 + + lda [%g5]0x82,%f4 ! (1_0) x0 = *px; + bge,pn %icc,.update6 ! (1_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); +.cont6: + cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 + bge,pn %icc,.update7 ! (1_0) if ( ax >= 0x7f800000 ) + ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; +.cont7: + st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; + + cmp %l6,0 ! (1_0) ay ? 0 + be,pn %icc,.update8 ! (1_0) if ( ay == 0 ) + fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); +.cont8: + fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; + sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; + + and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; + lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; + add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 + lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; + + add %g5,stridex,%i4 ! px += stridex + ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; + + and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; + add %o3,stridey,%i2 ! py += stridey + fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (2_0) y0 = *py; + + faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 + fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; + + lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; + bge,pn %icc,.update9 ! (2_0) if ( ay >= 0x7f800000 + for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); +.cont9: + cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.update10 ! (2_0) if ( ax >= 0x7f800000 ) + ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; +.cont10: + st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; + + fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (2_0) ay ? 0 + be,pn %icc,.update11 ! (2_0) if ( ay == 0 ) + fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); +.cont11: + fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; + sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; + + and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; + add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; + + add %i4,stridex,%o4 ! px += stridex + ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; + + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; + fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (3_0) y0 = *py; + + faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 + fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; + lda [%o4]0x82,%f4 ! (3_0) x0 = *px; + bge,pn %icc,.update12 ! (3_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); +.cont12: + cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update13 ! (3_0) if ( ax >= 0x7f800000 ) + ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; +.cont13: + st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; + + fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (3_0) + be,pn %icc,.update14 ! (3_0) if ( ay == 0 ) + fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); +.cont14: + fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; + sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; + faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; + + and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; + + fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; + add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 + lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; + + add %o4,stridex,%l0 ! px += stridex + ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; + fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); + + and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (4_0) y0 = *py; + fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 + ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; + + fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; + lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; + bge,pn %icc,.update15 ! (4_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); +.cont15: + fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 + ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; + + bge,pn %icc,.update16 ! (4_0) if ( ax >= 0x7f800000 ) + st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; +.cont16: + fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; + fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); + + mov %o1,%i4 + cmp counter,5 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,5,counter + + .align 16 +.main_loop: + fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; + sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; + cmp %l6,0 ! (4_1) ay ? 0 + faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; + + fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; + and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; + add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 + lda [%l0+stridex]0x82,%o1 ! (0_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; + + add %l0,stridex,%i1 ! px += stridex + ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; + be,pn %icc,.update17 ! (4_1) if ( ay == 0 ) + faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; +.cont17: + fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; + fand %f18,DC0,%f30 ! (2_1) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; + and %o1,_0x7fffffff,%o1 ! (0_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (0_0) y0 = *py; + fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); + + faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 + ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; + lda [%i1]0x82,%f4 ! (0_0) x0 = *px; + bge,pn %icc,.update18 ! (0_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (2_1) hyp0 = vis_for(hyp0,DC1); +.cont18: + fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %o1,_0x7f800000 ! (0_0) ax ? 0x7f800000 + ld [%fp+ftmp3],%l0 ! (2_1) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; + + add %i4,stridez,%i3 ! pz += stridez + st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update19 ! (0_0) if ( ax >= 0x7f800000 ) + fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; +.cont19: + fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (0_0) ay ? 0 + st %f1,[%i4] ! (3_2) *pz = ftmp0; + fand %f28,DC2,%f30 ! (2_1) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; + sra %l0,10,%i4 ! (2_1) ibase0 >>= 10; + be,pn %icc,.update20 ! (0_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; +.cont20: + fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; + and %i4,2032,%g1 ! (2_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; + add %g1,TBL,%l0 ! (2_1) (char*)TBL + si0 + lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (2_1) dtmp1 = hyp0 - h_hi0; + + nop + add %i1,stridex,%g5 ! px += stridex + ldd [TBL+%g1],%f42 ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; + add %i2,stridey,%o3 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; + fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; + lda [%o3]0x82,%f2 ! (1_0) y0 = *py; + fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 + ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f32 ! (2_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; + lda [%g5]0x82,%f4 ! (1_0) x0 = *px; + bge,pn %icc,.update21 ! (1_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); +.cont21: + fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 + ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 + + add %i3,stridez,%o1 ! pz += stridez + st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update22 ! (1_0) if ( ax >= 0x7f800000 ) + fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; +.cont22: + fmuld KA3,%f32,%f34 ! (2_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (1_0) ay ? 0 + st %f1,[%i3] ! (4_2) *pz = ftmp0; + fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; + sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; + be,pn %icc,.update23 ! (1_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; +.cont23: + fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; + and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; + lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; + add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 + lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; + + nop + add %g5,stridex,%i4 ! px += stridex + ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (2_1) dtmp2 += KA2; + + fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; + and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; + add %o3,stridey,%i2 ! py += stridey + fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (2_0) y0 = *py; + fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 + ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f32,%f10 ! (2_1) dtmp2 *= xx0; + lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; + bge,pn %icc,.update24 ! (2_0) if ( ay >= 0x7f800000 + for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); +.cont24: + fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 + ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; + + add %o1,stridez,%g1 ! pz += stridez + st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update25 ! (2_0) if ( ax >= 0x7f800000 ) + fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; +.cont25: + fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (2_0) ay ? 0 + st %f1,[%o1] ! (0_1) *pz = ftmp0; + fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; + sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; + be,pn %icc,.update26 ! (2_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (2_1) dtmp2 += KA1; +.cont26: + fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; + and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; + add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; + + nop + add %i4,stridex,%o4 ! px += stridex + ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; + + fmuld %f40,%f32,%f40 ! (2_1) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; + fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (3_0) y0 = *py; + fand %f18,DA0,%f24 ! (2_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 + ldd [%l0+8],%f62 ! (2_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; + lda [%o4]0x82,%f4 ! (3_0) x0 = *px; + bge,pn %icc,.update27 ! (3_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); +.cont27: + fmul8x16 SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 + ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f42 ! (2_1) dtmp2 += KA0; + + add %g1,stridez,%o3 ! pz += stridez + st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; + bge,pn %icc,.update28 ! (3_0) if ( ax >= 0x7f800000 ) + fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; +.cont28: + fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; + cmp %l6,0 ! (3_0) + st %f1,[%g1] ! (1_1) *pz = ftmp0; + fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); + + fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; + sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; + be,pn %icc,.update29 ! (3_0) if ( ay == 0 ) + faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; +.cont29: + fmuld %f62,%f42,%f38 ! (2_1) res0 *= dtmp2; + and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; + lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; + fpsub32 DA1,%f24,%f24 ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; + add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 + lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; + fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; + + add %o3,stridez,%i4 ! pz += stridez + add %o4,stridex,%l0 ! px += stridex + ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; + add %i2,stridey,%i2 ! py += stridey + and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; + fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); + + fmuld %f38,%f24,%f38 ! (2_1) res0 *= dbase0; + and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; + lda [%i2]0x82,%f2 ! (4_0) y0 = *py; + fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); + + faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; + cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 + ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; + + fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; + lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; + bge,pn %icc,.update30 ! (4_0) if ( ay >= 0x7f800000 ) + for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); +.cont30: + fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 + ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; + faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; + + bge,pn %icc,.update31 ! (4_0) if ( ax >= 0x7f800000 ) + st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; +.cont31: + subcc counter,5,counter ! counter -= 5; + fdtos %f38,%f1 ! (2_1) ftmp0 = (float)res0; + + fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; + st %f1,[%o3] ! (2_1) *pz = ftmp0; + bpos,pt %icc,.main_loop + fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); + + add counter,5,counter + +.tail: + subcc counter,1,counter + bneg .begin + mov %i4,%o1 + + sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; + faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; + + fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; + and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; + fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); + + add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 + fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; + + ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; + faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; + + fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; + + fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; + fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); + + ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; + fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; + + fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; + + fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); + faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; + + add %i4,stridez,%i3 ! pz += stridez + fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; + + fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; + st %f1,[%i4] ! (3_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + mov %i3,%o1 + + faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; + + fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; + fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); + + + faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; + + fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; + + fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; + fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); + + ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; + + fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; + + fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 + + add %i3,stridez,%o1 ! pz += stridez + fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; + + st %f1,[%i3] ! (4_2) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + nop + + faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; + + fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; + fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; + + fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; + fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); + + ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; + + fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); + faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; + + add %o1,stridez,%g1 ! pz += stridez + fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; + + st %f1,[%o1] ! (0_1) *pz = ftmp0; + + subcc counter,1,counter + bneg .begin + mov %g1,%o1 + + fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; + fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); + + fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; + + fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; + st %f1,[%g1] ! (1_1) *pz = ftmp0; + + ba .begin + add %g1,stridez,%o1 ! pz += stridez + + .align 16 +.spec0: + fabss %f2,%f2 ! fabsf(y0); + + fabss %f4,%f4 ! fabsf(x0); + + fcmps %f2,%f4 + + cmp %l6,_0x7f800000 ! ay ? 0x7f800000 + be,a 1f ! if( ay == 0x7f800000 ) + st %g0,[%o1] ! *pz = 0.0f; + + cmp %i5,_0x7f800000 ! ax ? 0x7f800000 + be,a 1f ! if( ax == 0x7f800000 ) + st %g0,[%o1] ! *pz = 0.0f; + + fmuls %f2,%f4,%f2 ! fabsf(x0) * fabsf(y0); + st %f2,[%o1] ! *pz = fabsf(x0) + fabsf(y0); +1: + add %o4,stridex,%o4 ! px += stridex; + add %i2,stridey,%i2 ! py += stridey; + + add %o1,stridez,%o1 ! pz += stridez; + ba .begin1 + sub counter,1,counter ! counter--; + + .align 16 +.spec1: + cmp %i5,0 ! ax ? 0 + bne,pt %icc,.cont_spec1 ! if ( ax != 0 ) + nop + + add %o4,stridex,%o4 ! px += stridex; + add %i2,stridey,%i2 ! py += stridey; + + fdivs %f7,%f9,%f2 ! 1.0f / 0.0f + st %f2,[%o1] ! *pz = 1.0f / 0.0f; + + add %o1,stridez,%o1 ! pz += stridez; + ba .begin1 + sub counter,1,counter ! counter--; + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont0 + mov 1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont1 + mov 1,counter + + .align 16 +.update2: + cmp %i5,0 + bne .cont2 + + cmp counter,1 + ble .cont2 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont2 + mov 1,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont3 + mov 2,counter + + .align 16 +.update4: + cmp counter,2 + ble .cont4 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont4 + mov 2,counter + + .align 16 +.update5: + cmp %i5,0 + bne .cont5 + + cmp counter,2 + ble .cont5 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont5 + mov 2,counter + + .align 16 +.update6: + cmp counter,3 + ble .cont6 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont6 + mov 3,counter + + .align 16 +.update7: + cmp counter,3 + ble .cont7 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont7 + mov 3,counter + + .align 16 +.update8: + cmp %i5,0 + bne .cont8 + + cmp counter,3 + ble .cont8 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont8 + mov 3,counter + + .align 16 +.update9: + cmp counter,4 + ble .cont9 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont9 + mov 4,counter + + .align 16 +.update10: + cmp counter,4 + ble .cont10 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont10 + mov 4,counter + + .align 16 +.update11: + cmp %i5,0 + bne .cont11 + + cmp counter,4 + ble .cont11 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont11 + mov 4,counter + + .align 16 +.update12: + cmp counter,5 + ble .cont12 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont12 + mov 5,counter + + .align 16 +.update13: + cmp counter,5 + ble .cont13 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont13 + mov 5,counter + + .align 16 +.update14: + cmp %i5,0 + bne .cont14 + + cmp counter,5 + ble .cont14 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont14 + mov 5,counter + + .align 16 +.update15: + cmp counter,6 + ble .cont15 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont15 + mov 6,counter + + .align 16 +.update16: + cmp counter,6 + ble .cont16 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont16 + mov 6,counter + + .align 16 +.update17: + cmp %i5,0 + bne .cont17 + + cmp counter,1 + ble .cont17 + fmovd DC1,%f62 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont17 + mov 1,counter + + .align 16 +.update18: + cmp counter,2 + ble .cont18 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont18 + mov 2,counter + + .align 16 +.update19: + cmp counter,2 + ble .cont19 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont19 + mov 2,counter + + .align 16 +.update20: + cmp %o1,0 + bne .cont20 + + cmp counter,2 + ble .cont20 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i1,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont20 + mov 2,counter + + .align 16 +.update21: + cmp counter,3 + ble .cont21 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont21 + mov 3,counter + + .align 16 +.update22: + cmp counter,3 + ble .cont22 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont22 + mov 3,counter + + .align 16 +.update23: + cmp %i5,0 + bne .cont23 + + cmp counter,3 + ble .cont23 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + + stx %o3,[%fp+tmp_py] + ba .cont23 + mov 3,counter + + .align 16 +.update24: + cmp counter,4 + ble .cont24 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont24 + mov 4,counter + + .align 16 +.update25: + cmp counter,4 + ble .cont25 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont25 + mov 4,counter + + .align 16 +.update26: + cmp %i5,0 + bne .cont26 + + cmp counter,4 + ble .cont26 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont26 + mov 4,counter + + .align 16 +.update27: + cmp counter,5 + ble .cont27 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont27 + mov 5,counter + + .align 16 +.update28: + cmp counter,5 + ble .cont28 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont28 + mov 5,counter + + .align 16 +.update29: + cmp %i5,0 + bne .cont29 + + cmp counter,5 + ble .cont29 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %o4,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont29 + mov 5,counter + + .align 16 +.update30: + cmp counter,6 + ble .cont30 + ld [TBL+TBL_SHIFT+44],%f2 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont30 + mov 6,counter + + .align 16 +.update31: + cmp counter,6 + ble .cont31 + ld [TBL+TBL_SHIFT+44],%f4 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l0,[%fp+tmp_px] + + stx %i2,[%fp+tmp_py] + ba .cont31 + mov 6,counter + + .align 16 +.exit: + ret + restore + SET_SIZE(__vrhypotf) + diff --git a/usr/src/libm/src/mvec/vis/__vrsqrt.S b/usr/src/libm/src/mvec/vis/__vrsqrt.S new file mode 100644 index 0000000..08c9146 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vrsqrt.S @@ -0,0 +1,2156 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vrsqrt.S 1.4 06/01/23 SMI" + + .file "__vrsqrt.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01; + .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01; + .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01; + .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01; + .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01; + .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01; + + .word 0x001fffff, 0xffffffff ! DC0 + .word 0x3fe00000, 0x00000000 ! DC1 + .word 0x00002000, 0x00000000 ! DC2 + .word 0x7fffc000, 0x00000000 ! DC3 + .word 0x0007ffff, 0xffffffff ! DC4 + + .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51) + .word 0x3ff00000, 0x00000000 ! DONE = 1.0 + +#define stridex %l5 +#define stridey %l7 +#define counter %l0 +#define TBL %l3 +#define _0x7ff00000 %o0 +#define _0x00100000 %o1 + +#define DC0 %f56 +#define DC1 %f54 +#define DC2 %f48 +#define DC3 %f46 +#define K6 %f42 +#define K5 %f20 +#define K4 %f52 +#define K3 %f50 +#define K2 %f14 +#define K1 %f12 +#define DONE %f4 + +#define tmp_counter %g5 +#define tmp_px %o5 + +#define tmp0 STACK_BIAS-0x40 +#define tmp1 STACK_BIAS-0x38 +#define tmp2 STACK_BIAS-0x30 +#define tmp3 STACK_BIAS-0x28 +#define tmp4 STACK_BIAS-0x20 +#define tmp5 STACK_BIAS-0x18 +#define tmp6 STACK_BIAS-0x10 +#define tmp7 STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&res)[0] = ((float*)px)[0]; +! ((float*)&res)[1] = ((float*)px)[1]; +! hx = *(int*)px; +! if ( hx >= 0x7ff00000 ) +! { +! res = DONE / res; +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! continue; +! } +! if ( hx < 0x00100000 ) +! { +! ax = hx & 0x7fffffff; +! lx = ((int*)px)[1]; +! +! if ( (ax | lx) == 0 ) +! { +! res = DONE / res; +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! continue; +! } +! else if ( hx >= 0 ) +! { +! if ( hx < 0x00080000 ) +! { +! res = *(long long*)&res; +! hx = *(int*)&res - (537 << 21); +! } +! else +! { +! res = vis_fand(res,DC4); +! res = *(long long*)&res; +! res += D2ON51; +! hx = *(int*)&res - (537 << 21); +! } +! } +! else +! { +! res = sqrt(res); +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! px += stridex; +! py += stridey; +! continue; +! } +! } +! +! iexp = hx >> 21; +! iexp = -iexp; +! iexp += 0x5fe; +! lexp = iexp << 52; +! dlexp = *(double*)&lexp; +! hx >>= 10; +! hx &= 0x7f8; +! hx += 8; +! hx &= -16; +! +! res = vis_fand(res,DC0); +! res = vis_for(res,DC1); +! res_c = vis_fpadd32(res,DC2); +! res_c = vis_fand(res_c,DC3); +! +! addr = (char*)arr + hx; +! dexp_hi = ((double*)addr)[0]; +! dexp_lo = ((double*)addr)[1]; +! dtmp0 = dexp_hi * dexp_hi; +! xx = res - res_c; +! xx *= dtmp0; +! res = K6 * xx; +! res += K5; +! res *= xx; +! res += K4; +! res *= xx; +! res += K3; +! res *= xx; +! res += K2; +! res *= xx; +! res += K1; +! res *= xx; +! res = dexp_hi * res; +! res += dexp_lo; +! res += dexp_hi; +! +! res *= dlexp; +! +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrsqrt) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o3) + PIC_SET(l7,__vlibm_TBL_rsqrt,l3) + wr %g0,0x82,%asi + + ldd [%o3],K1 + sethi %hi(0x7ff00000),%o0 + mov %i3,%o4 + + ldd [%o3+0x08],K2 + sethi %hi(0x00100000),%o1 + mov %i1,tmp_px + + ldd [%o3+0x10],K3 + sll %i2,3,stridex + mov %i0,tmp_counter + + ldd [%o3+0x18],K4 + sll %i4,3,stridey + + ldd [%o3+0x20],K5 + ldd [%o3+0x28],K6 + ldd [%o3+0x30],DC0 + ldd [%o3+0x38],DC1 + ldd [%o3+0x40],DC2 + ldd [%o3+0x48],DC3 + +.begin: + mov tmp_counter,counter + mov tmp_px,%i1 + clr tmp_counter +.begin1: + cmp counter,0 + ble,pn %icc,.exit + ldd [%o3+0x60],DONE + + lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; + sethi %hi(0x7ffffc00),%i0 + + lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; + add %i0,1023,%i0 + + fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + + lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; + sethi %hi(0x00080000),%i4 + + lda [%i1+4]%asi,%l4 + add %i1,stridex,%l6 ! px += stridex + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + and %g1,%i0,%i2 + + cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 + bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 ) + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + + cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 + bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; +.cont_spec: + fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); + + add %o2,8,%l4 ! (6_1) hx += 8; + + add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; + + lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (6_1) iexp << 52; + and %l4,-16,%l4 ! (6_1) hx = -16; + + add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; + stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; + + add %l6,stridex,%l6 ! px += stridex + ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (0_0) hx >>= 10; + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; + + cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 ) + fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3); +.cont0: + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi; + + cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 + bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 ) + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; +.cont1: + fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); + + add %o2,8,%l2 ! (0_0) hx += 8; + fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c; + + lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (0_0) iexp << 52; + and %l2,-16,%l2 ! (0_0) hx = -16; + + add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; + add %l6,stridex,%l6 ! px += stridex + stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; + + fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0; + ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0]; + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (1_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 ) + lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; +.cont2: + fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); + + fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 + bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; +.cont3: + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + + add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; + fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; + add %o2,8,%i2 ! (1_0) hx += 8; + fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c; + + lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (1_0) iexp << 52; + and %i2,-16,%i2 ! (1_0) hx = -16; + + add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; + + fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0; + add %l6,stridex,%l6 ! px += stridex + ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (6_1) res += K5; + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (2_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 ) + lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; +.cont4: + fmuld %f62,%f26,%f40 ! (6_1) res *= xx; + fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); + + fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 + bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; +.cont5: + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + + add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; + fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; + add %o2,8,%i4 ! (2_0) hx += 8; + fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c; + + faddd %f40,K4,%f40 ! (6_1) res += K4; + + lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (2_0) iexp << 52; + and %i4,-16,%i4 ! (2_0) hx = -16; + + add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; + + fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0; + ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (0_0) res += K5; + + fmuld %f40,%f26,%f34 ! (6_1) res *= xx; + add %l6,stridex,%l6 ! px += stridex + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (3_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 ) + lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; +.cont6: + fmuld %f62,%f32,%f60 ! (0_0) res *= xx; + cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 + fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3); + + fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; + bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + faddd %f34,K3,%f6 ! (6_1) res += K3; +.cont7: + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); + + add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; + fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; + add %o2,8,%i5 ! (3_0) hx += 8; + fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c; + + fmuld %f6,%f26,%f22 ! (6_1) res *= xx; + faddd %f60,K4,%f60 ! (0_0) res += K4; + + lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (3_0) iexp << 52; + and %i5,-16,%i5 ! (3_0) hx = -16; + + add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; + + fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; + add %l6,stridex,%i0 ! px += stridex + ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (1_0) res += K5; + + faddd %f22,K2,%f10 ! (6_1) res += K2; + fmuld %f60,%f32,%f34 ! (0_0) res *= xx; + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (4_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 ) + lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; +.cont8: + fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3); + fmuld %f62,%f38,%f62 ! (1_0) res *= xx; + + fmuld %f10,%f26,%f58 ! (6_1) res *= xx; + cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + faddd %f34,K3,%f60 ! (0_0) res += K3; + + fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; + bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); +.cont9: + add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; + fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx; + add %o2,8,%l1 ! (4_0) hx += 8; + fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c; + + fmuld %f60,%f32,%f60 ! (0_0) res *= xx; + faddd %f62,K4,%f6 ! (1_0) res += K4; + + lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (4_0) iexp << 52; + and %l1,-16,%l1 ! (4_0) hx = -16; + faddd %f58,K1,%f58 ! (6_1) res += K1; + + add %i0,stridex,%i1 ! px += stridex + add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; + + fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; + ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; + faddd %f10,K5,%f62 ! (2_0) res += K5; + + fmuld %f6,%f38,%f34 ! (1_0) res *= xx; + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + nop + faddd %f60,K2,%f60 ! (0_0) res += K2; + + for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; + fmuld %f58,%f26,%f26 ! (6_1) res *= xx; + + sra %g1,10,%o2 ! (5_0) hx >>= 10; + cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 + bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 ) + lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; +.cont10: + fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); + fmuld %f62,%f36,%f62 ! (2_0) res *= xx; + + fmuld %f60,%f32,%f58 ! (0_0) res *= xx; + cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (1_0) res += K3; + + fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; + bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 ) + nop + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); +.cont11: + ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; + fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; + fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; + add %o2,8,%i3 ! (5_0) hx += 8; + fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; + + fmuld %f34,%f38,%f24 ! (1_0) res *= xx; + or %g0,%o4,%i0 + + cmp counter,7 + bl,pn %icc,.tail + faddd %f62,K4,%f34 ! (2_0) res += K4; + + ba .main_loop + sub counter,7,counter ! counter + + .align 16 +.main_loop: + add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; + and %i3,-16,%i3 ! (5_1) hx = -16; + lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; + faddd %f58,K1,%f58 ! (0_1) res += K1; + + add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx; + sllx %o7,52,%o7 ! (5_1) iexp << 52; + stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp; + faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; + + faddd %f22,K5,%f62 ! (3_1) res += K5; + add %i1,stridex,%l6 ! px += stridex + ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0]; + fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; + + faddd %f24,K2,%f26 ! (1_1) res += K2; + add %i0,stridey,%i1 ! px += stridey + ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; + fmuld %f34,%f36,%f34 ! (2_1) res *= xx; + + fmuld %f58,%f32,%f58 ! (0_1) res *= xx; + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi; + faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; + + fmuld %f62,%f40,%f32 ! (3_1) res *= xx; + cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 + ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3); + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 ) + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (2_1) res += K3; +.cont12: + fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; + cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + + fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; + bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 ) + ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); +.cont13: + fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; + add %o2,8,%l4 ! (6_1) hx += 8; + st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c; + + fmuld %f34,%f36,%f28 ! (2_1) res *= xx; + add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; + st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; + faddd %f32,K4,%f32 ! (3_1) res += K4; + + lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (6_1) iexp << 52; + and %l4,-16,%l4 ! (6_1) hx = -16; + faddd %f26,K1,%f26 ! (1_1) res += K1; + + add %i1,stridey,%i0 ! px += stridey + add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; + stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; + faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; + + fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0; + add %l6,stridex,%l6 ! px += stridex + ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (4_1) res += K5; + + fmuld %f32,%f40,%f34 ! (3_1) res *= xx; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f32 ! (2_1) res += K2; + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi; + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; + + fmuld %f62,%f60,%f38 ! (4_1) res *= xx; + cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3); + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 ) + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (3_1) res += K3; +.cont14: + fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res; + cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; + fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + + fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; + bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 ) + ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); +.cont15: + fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx; + add %o2,8,%l2 ! (0_0) hx += 8; + st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c; + + fmuld %f34,%f40,%f44 ! (3_1) res *= xx; + nop + st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f38,K4,%f38 ! (4_1) res += K4; + + lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (0_0) iexp << 52; + and %l2,-16,%l2 ! (0_0) hx = -16; + faddd %f32,K1,%f32 ! (2_1) res += K1; + + add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; + add %l6,stridex,%l6 ! px += stridex + stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; + faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; + + fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0; + add %i0,stridey,%i1 ! px += stridey + ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (5_1) res += K5; + + fmuld %f38,%f60,%f34 ! (4_1) res *= xx; + sra %g1,10,%o2 ! (1_0) hx >>= 10; + ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; + faddd %f44,K2,%f38 ! (3_1) res += K2; + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi; + + fmuld %f62,%f58,%f36 ! (5_1) res *= xx; + bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); +.cont16: + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (4_1) res += K3; + + fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; + bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); +.cont17: + fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp; + add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; + ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; + add %o2,8,%i2 ! (1_0) hx += 8; + st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c; + + fmuld %f34,%f60,%f28 ! (4_1) res *= xx; + nop + st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f36,K4,%f36 ! (5_1) res += K4; + + lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (1_0) iexp << 52; + and %i2,-16,%i2 ! (1_0) hx = -16; + faddd %f38,K1,%f38 ! (3_1) res += K1; + + add %i1,stridey,%i0 ! px += stridey + add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; + faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo; + + fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0; + add %l6,stridex,%l6 ! px += stridex + ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (6_1) res += K5; + + fmuld %f36,%f58,%f34 ! (5_1) res *= xx; + sra %g1,10,%o2 ! (2_0) hx >>= 10; + ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f36 ! (4_1) res += K2; + + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); + + fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; + + fmuld %f62,%f26,%f40 ! (6_1) res *= xx; + bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); +.cont18: + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + faddd %f34,K3,%f34 ! (5_1) res += K3; + + fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res; + bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); +.cont19: + fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; + add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; + ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; + add %o2,8,%i4 ! (2_0) hx += 8; + st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c; + + fmuld %f34,%f58,%f44 ! (5_1) res *= xx; + nop + st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f40,K4,%f40 ! (6_1) res += K4; + + lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (2_0) iexp << 52; + and %i4,-16,%i4 ! (2_0) hx = -16; + faddd %f36,K1,%f36 ! (4_1) res += K1; + + add %l6,stridex,%l6 ! px += stridex + add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; + faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; + + fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0; + add %i0,stridey,%i1 ! px += stridey + ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (0_0) res += K5; + + fmuld %f40,%f26,%f34 ! (6_1) res *= xx; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; + faddd %f44,K2,%f40 ! (5_1) res += K2; + + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); + + fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi; + + fmuld %f62,%f32,%f60 ! (0_0) res *= xx; + bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; + fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); +.cont20: + fmuld %f40,%f58,%f40 ! (5_1) res *= xx; + cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + faddd %f34,K3,%f10 ! (6_1) res += K3; + + fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; + bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); +.cont21: + fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; + add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; + ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; + add %o2,8,%i5 ! (3_0) hx += 8; + st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c; + + fmuld %f10,%f26,%f4 ! (6_1) res *= xx; + nop + st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f60,K4,%f60 ! (0_0) res += K4; + + lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; + sllx %o7,52,%o7 ! (3_0) iexp << 52; + and %i5,-16,%i5 ! (3_0) hx = -16; + faddd %f40,K1,%f40 ! (5_1) res += K1; + + add %l6,stridex,%i0 ! px += stridex + add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; + stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; + faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; + + fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; + add %i1,stridey,%l6 ! px += stridey + ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; + faddd %f62,K5,%f62 ! (1_0) res += K5; + + faddd %f4,K2,%f10 ! (6_1) res += K2; + sra %g1,10,%o2 ! (4_0) hx >>= 10; + nop + fmuld %f60,%f32,%f34 ! (0_0) res *= xx; + + fmuld %f40,%f58,%f40 ! (5_1) res *= xx; + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; + for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); + + fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; + + fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3); + bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; + fmuld %f62,%f38,%f62 ! (1_0) res *= xx; +.cont22: + fmuld %f10,%f26,%f58 ! (6_1) res *= xx; + cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + faddd %f34,K3,%f60 ! (0_0) res += K3; + + fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res; + bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 ) + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); +.cont23: + fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; + add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; + ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx; + add %o2,8,%l1 ! (4_0) hx += 8; + st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c; + + fmuld %f60,%f32,%f60 ! (0_0) res *= xx; + sllx %o7,52,%o7 ! (4_0) iexp << 52; + st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f62,K4,%f6 ! (1_0) res += K4; + + lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; + add %i0,stridex,%i1 ! px += stridex + and %l1,-16,%l1 ! (4_0) hx = -16; + faddd %f58,K1,%f58 ! (6_1) res += K1; + + add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; + add %l6,stridey,%i0 ! px += stridey + stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; + faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo; + + fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; + nop + ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; + faddd %f30,K5,%f62 ! (2_0) res += K5; + + fmuld %f6,%f38,%f34 ! (1_0) res *= xx; + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0]; + faddd %f60,K2,%f60 ! (0_0) res += K2; + + for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; + fmuld %f58,%f26,%f26 ! (6_1) res *= xx; + + fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; + cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 + lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; + faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi; + + fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); + bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 ) + ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp; + fmuld %f62,%f36,%f62 ! (2_0) res *= xx; +.cont24: + fmuld %f60,%f32,%f58 ! (0_0) res *= xx; + sra %g1,10,%o2 ! (5_0) hx >>= 10; + cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 + faddd %f34,K3,%f34 ! (1_0) res += K3; + + fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; + bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 ) + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); +.cont25: + fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp; + subcc counter,7,counter ! counter -= 7; + ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; + fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); + + fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; + add %o2,8,%i3 ! (5_0) hx += 8; + st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0]; + fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; + + fmuld %f34,%f38,%f24 ! (1_0) res *= xx; + st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1]; + bpos,pt %icc,.main_loop + faddd %f62,K4,%f34 ! (2_0) res += K4; + + add counter,7,counter +.tail: + add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; + subcc counter,1,counter + bneg,a .begin + mov %i0,%o4 + + faddd %f58,K1,%f58 ! (0_1) res += K1; + + faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; + + faddd %f22,K5,%f62 ! (3_1) res += K5; + fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; + + faddd %f24,K2,%f26 ! (1_1) res += K2; + add %i1,stridex,%l6 ! px += stridex + ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; + fmuld %f34,%f36,%f34 ! (2_1) res *= xx; + + fmuld %f58,%f32,%f58 ! (0_1) res *= xx; + + add %i0,stridey,%i1 ! px += stridey + faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; + + fmuld %f62,%f40,%f32 ! (3_1) res *= xx; + ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + faddd %f34,K3,%f34 ! (2_1) res += K3; + + fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; + + fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; + ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; + + fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; + st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; + + fmuld %f34,%f36,%f28 ! (2_1) res *= xx; + st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; + faddd %f32,K4,%f32 ! (3_1) res += K4; + + subcc counter,1,counter + bneg,a .begin + mov %i1,%o4 + + faddd %f26,K1,%f26 ! (1_1) res += K1; + + faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; + + add %l6,stridex,%l6 ! px += stridex + faddd %f62,K5,%f62 ! (4_1) res += K5; + + fmuld %f32,%f40,%f34 ! (3_1) res *= xx; + add %i1,stridey,%i0 ! px += stridey + ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f32 ! (2_1) res += K2; + + fmuld %f26,%f38,%f26 ! (1_1) res *= xx; + + faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; + + fmuld %f62,%f60,%f38 ! (4_1) res *= xx; + ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + faddd %f34,K3,%f34 ! (3_1) res += K3; + + fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res; + + fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; + ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; + + st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; + + fmuld %f34,%f40,%f44 ! (3_1) res *= xx; + st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; + faddd %f38,K4,%f38 ! (4_1) res += K4; + + subcc counter,1,counter + bneg,a .begin + mov %i0,%o4 + + faddd %f32,K1,%f32 ! (2_1) res += K1; + + add %l6,stridex,%l6 ! px += stridex + faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; + + add %i0,stridey,%i1 ! px += stridey + + fmuld %f38,%f60,%f34 ! (4_1) res *= xx; + ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; + faddd %f44,K2,%f38 ! (3_1) res += K2; + + fmuld %f32,%f36,%f32 ! (2_1) res *= xx; + + faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi; + + ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; + + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + faddd %f34,K3,%f34 ! (4_1) res += K3; + + fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; + + fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp; + ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1]; + + st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; + + fmuld %f34,%f60,%f28 ! (4_1) res *= xx; + st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; + + subcc counter,1,counter + bneg,a .begin + mov %i1,%o4 + + faddd %f38,K1,%f38 ! (3_1) res += K1; + + faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo; + + add %l6,stridex,%l6 ! px += stridex + + add %i1,stridey,%i0 ! px += stridey + ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0]; + faddd %f28,K2,%f36 ! (4_1) res += K2; + + fmuld %f38,%f40,%f38 ! (3_1) res *= xx; + + faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; + + ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; + + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + + fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res; + + fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; + ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; + + st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; + + st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; + + subcc counter,1,counter + bneg,a .begin + mov %i0,%o4 + + faddd %f36,K1,%f36 ! (4_1) res += K1; + + faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; + + add %i0,stridey,%i1 ! px += stridey + + add %l6,stridex,%l6 ! px += stridex + ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; + + fmuld %f36,%f60,%f36 ! (4_1) res *= xx; + + faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi; + + ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; + + fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; + + fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; + ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; + + st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; + + st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; + + subcc counter,1,counter + bneg,a .begin + mov %i1,%o4 + + faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; + + add %l6,stridex,%i0 ! px += stridex + + add %i1,stridey,%l6 ! px += stridey + + faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; + + ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; + + fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; + + st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; + + st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; + + ba .begin + add %i1,stridey,%o4 + + .align 16 +.spec0: + fdivd DONE,%f0,%f0 ! res = DONE / res; + add %i1,stridex,%i1 ! px += stridex + st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; + st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; + add %o4,stridey,%o4 ! py += stridey + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + orcc %i2,%l4,%g0 + bz,a 2f + fdivd DONE,%f0,%f0 ! res = DONE / res; + + cmp %g1,0 + bl,a 2f + fsqrtd %f0,%f0 ! res = sqrt(res); + + cmp %g1,%i4 + bge,a 1f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp0] + + fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp0],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (6_1) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + ba .cont_spec + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + +1: + fand %f0,%f18,%f0 ! res = vis_fand(res,DC4); + + ldd [%o3+0x58],%f28 + fxtod %f0,%f0 ! res = *(long long*)&res; + + faddd %f0,%f28,%f0 ! res += D2ON51; + st %f0,[%fp+tmp0] + + fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp0],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (6_1) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + ba .cont_spec + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + +2: + add %i1,stridex,%i1 ! px += stridex + st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; + st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; + add %o4,stridey,%o4 ! py += stridey + ba .begin1 + sub counter,1,counter + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont0 + mov 1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + sub %l6,stridex,%i1 + + ld [%i1+4],%i2 + cmp %g1,0 + bl 1f + + orcc %g1,%i2,%g0 + bz 1f + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f8,%f8 ! res = *(long long*)&res; + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + ba .cont1 + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; +2: + fand %f8,%f18,%f8 + fxtod %f8,%f8 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f8,%f18,%f8 + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + ba .cont1 + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; +1: + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont1 + mov 1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + nop + + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont2 + mov 2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + sub %l6,stridex,%i1 + + ld [%i1+4],%i2 + cmp %g1,0 + bl 1f + + orcc %g1,%i2,%g0 + bz 1f + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (1_0) hx >>= 10; + sub %o7,537,%o7 + ba .cont3 + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; +2: + fand %f0,%f18,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f0,%f18,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); + + sra %g1,10,%o2 ! (1_0) hx >>= 10; + sub %o7,537,%o7 + ba .cont3 + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; +1: + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont3 + mov 2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + nop + + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont4 + mov 3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + sub %l6,stridex,%i1 + + ld [%i1+4],%i3 + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i4 + + cmp %g1,%i4 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (2_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + ba .cont5 + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +2: + fand %f6,%f18,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f6,%f18,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (2_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + ba .cont5 + for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont5 + mov 3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + nop + + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont6 + mov 4,counter + + .align 16 +.update7: + sub %l6,stridex,%i1 + cmp counter,4 + ble .cont7 + faddd %f34,K3,%f6 ! (6_1) res += K3; + + ld [%i1+4],%i3 + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i5 + + cmp %g1,%i5 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + ba .cont7 + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +2: + fand %f0,%f18,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f0,%f18,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + ba .cont7 + for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont7 + mov 4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + nop + + mov %l6,tmp_px + sub counter,5,tmp_counter + + ba .cont8 + mov 5,counter + + .align 16 +.update9: + ld [%l6+4],%i3 + cmp counter,5 + ble .cont9 + fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); + + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i1 + + cmp %g1,%i1 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f8,%f8 ! res = *(long long*)&res; + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (4_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont9 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +2: + fand %f8,%f18,%f8 + fxtod %f8,%f8 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f8,%f18,%f8 + st %f8,[%fp+tmp7] + + fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (4_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont9 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +1: + mov %l6,tmp_px + sub counter,5,tmp_counter + + ba .cont9 + mov 5,counter + + .align 16 +.update10: + cmp counter,6 + ble .cont10 + nop + + mov %i0,tmp_px + sub counter,6,tmp_counter + + ba .cont10 + mov 6,counter + + .align 16 +.update11: + ld [%i0+4],%i3 + cmp counter,6 + ble .cont11 + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f18 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (5_0) hx >>= 10; + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + ba .cont11 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +2: + fand %f0,%f18,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f18 + faddd %f0,%f18,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (5_0) hx >>= 10; + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + ba .cont11 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +1: + mov %i0,tmp_px + sub counter,6,tmp_counter + + ba .cont11 + mov 6,counter + + .align 16 +.update12: + cmp counter,0 + ble .cont12 + faddd %f34,K3,%f34 ! (2_1) res += K3; + + sub %l6,stridex,tmp_px + sub counter,0,tmp_counter + + ba .cont12 + mov 0,counter + + .align 16 +.update13: + sub %l6,stridex,%l4 + cmp counter,0 + ble .cont13 + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); + + ld [%l4+4],%l4 + cmp %g1,0 + bl 1f + + orcc %g1,%l4,%g0 + bz 1f + sethi %hi(0x00080000),%l4 + + cmp %g1,%l4 + bge,a 2f + ldd [%o3+0x50],%f62 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + ba .cont13 + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); +2: + fand %f6,%f62,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f62 + faddd %f6,%f62,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; + sra %g1,10,%o2 ! (6_1) hx >>= 10; + for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; + ba .cont13 + fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); +1: + sub %l6,stridex,tmp_px + sub counter,0,tmp_counter + + ba .cont13 + mov 0,counter + + .align 16 +.update14: + cmp counter,1 + ble .cont14 + faddd %f34,K3,%f34 ! (3_1) res += K3; + + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont14 + mov 1,counter + + .align 16 +.update15: + sub %l6,stridex,%l2 + cmp counter,1 + ble .cont15 + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); + + ld [%l2+4],%l2 + cmp %g1,0 + bl 1f + + orcc %g1,%l2,%g0 + bz 1f + sethi %hi(0x00080000),%l2 + + cmp %g1,%l2 + bge,a 2f + ldd [%o3+0x50],%f62 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + + sub %o7,537,%o7 + for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; + ba .cont15 + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); +2: + fand %f0,%f62,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f62 + faddd %f0,%f62,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (0_0) hx >>= 10; + for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); + + sub %o7,537,%o7 + + sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; + + and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; + add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; + ba .cont15 + fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); +1: + sub %l6,stridex,tmp_px + sub counter,1,tmp_counter + + ba .cont15 + mov 1,counter + + .align 16 +.update16: + cmp counter,2 + ble .cont16 + fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); + + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont16 + mov 2,counter + + .align 16 +.update17: + sub %l6,stridex,%i2 + cmp counter,2 + ble .cont17 + fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); + + ld [%i2+4],%i2 + cmp %g1,0 + bl 1f + + orcc %g1,%i2,%g0 + bz 1f + sethi %hi(0x00080000),%i2 + + cmp %g1,%i2 + bge,a 2f + ldd [%o3+0x50],%f2 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (1_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + ba .cont17 + for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); +2: + fand %f6,%f2,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f2 + faddd %f6,%f2,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (1_0) hx >>= 10; + + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; + ba .cont17 + for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,2,tmp_counter + + ba .cont17 + mov 2,counter + + .align 16 +.update18: + cmp counter,3 + ble .cont18 + fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); + + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont18 + mov 3,counter + + .align 16 +.update19: + sub %l6,stridex,%i4 + cmp counter,3 + ble .cont19 + fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); + + ld [%i4+4],%i4 + cmp %g1,0 + bl 1f + + orcc %g1,%i4,%g0 + bz 1f + sethi %hi(0x00080000),%i4 + + cmp %g1,%i4 + bge,a 2f + ldd [%o3+0x50],%f2 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (2_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + ba .cont19 + for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +2: + fand %f0,%f2,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f2 + faddd %f0,%f2,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (2_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; + ba .cont19 + for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,3,tmp_counter + + ba .cont19 + mov 3,counter + + .align 16 +.update20: + cmp counter,4 + ble .cont20 + fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); + + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont20 + mov 4,counter + + .align 16 +.update21: + sub %l6,stridex,%i5 + cmp counter,4 + ble .cont21 + fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); + + ld [%i5+4],%i5 + cmp %g1,0 + bl 1f + + orcc %g1,%i5,%g0 + bz 1f + sethi %hi(0x00080000),%i5 + + cmp %g1,%i5 + bge,a 2f + ldd [%o3+0x50],%f34 + + fxtod %f6,%f6 ! res = *(long long*)&res; + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + ba .cont21 + for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +2: + fand %f6,%f34,%f6 + fxtod %f6,%f6 ! res = *(long long*)&res; + ldd [%o3+0x58],%f34 + faddd %f6,%f34,%f6 + st %f6,[%fp+tmp7] + + fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; + sra %g1,10,%o2 ! (3_0) hx >>= 10; + + sub %o7,537,%o7 + and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; + + sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; + ba .cont21 + for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); +1: + sub %l6,stridex,tmp_px + sub counter,4,tmp_counter + + ba .cont21 + mov 4,counter + + .align 16 +.update22: + cmp counter,5 + ble .cont22 + fmuld %f62,%f38,%f62 ! (1_0) res *= xx; + + sub %i0,stridex,tmp_px + sub counter,5,tmp_counter + + ba .cont22 + mov 5,counter + + .align 16 +.update23: + sub %i0,stridex,%l1 + cmp counter,5 + ble .cont23 + fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); + + ld [%l1+4],%l1 + cmp %g1,0 + bl 1f + + orcc %g1,%l1,%g0 + bz 1f + sethi %hi(0x00080000),%l1 + + cmp %g1,%l1 + bge,a 2f + ldd [%o3+0x50],%f34 + + fxtod %f0,%f0 ! res = *(long long*)&res; + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (4_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont23 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +2: + fand %f0,%f34,%f0 + fxtod %f0,%f0 ! res = *(long long*)&res; + ldd [%o3+0x58],%f34 + faddd %f0,%f34,%f0 + st %f0,[%fp+tmp7] + + fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (4_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; + ba .cont23 + for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); +1: + sub %i0,stridex,tmp_px + sub counter,5,tmp_counter + + ba .cont23 + mov 5,counter + + .align 16 +.update24: + cmp counter,6 + ble .cont24 + fmuld %f62,%f36,%f62 ! (2_0) res *= xx; + + sub %i1,stridex,tmp_px + sub counter,6,tmp_counter + + ba .cont24 + mov 6,counter + + .align 16 +.update25: + sub %i1,stridex,%i3 + cmp counter,6 + ble .cont25 + fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); + + ld [%i3+4],%i3 + cmp %g1,0 + bl 1f + + orcc %g1,%i3,%g0 + bz 1f + nop + + sub %i1,stridex,%i3 + ld [%i3],%f10 + ld [%i3+4],%f11 + + sethi %hi(0x00080000),%i3 + + cmp %g1,%i3 + bge,a 2f + ldd [%o3+0x50],%f60 + + fxtod %f10,%f10 ! res = *(long long*)&res; + st %f10,[%fp+tmp7] + + fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (5_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + ba .cont25 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +2: + fand %f10,%f60,%f10 + fxtod %f10,%f10 ! res = *(long long*)&res; + ldd [%o3+0x58],%f60 + faddd %f10,%f60,%f10 + st %f10,[%fp+tmp7] + + fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); + ld [%fp+tmp7],%g1 + + sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; + + sra %g1,10,%o2 ! (5_0) hx >>= 10; + sub %o7,537,%o7 + + and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; + sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; + + ba .cont25 + for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); +1: + sub %i1,stridex,tmp_px + sub counter,6,tmp_counter + + ba .cont25 + mov 6,counter + +.exit: + ret + restore + SET_SIZE(__vrsqrt) + diff --git a/usr/src/libm/src/mvec/vis/__vrsqrtf.S b/usr/src/libm/src/mvec/vis/__vrsqrtf.S new file mode 100644 index 0000000..beb56c1 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vrsqrtf.S @@ -0,0 +1,1718 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vrsqrtf.S 1.4 06/01/23 SMI" + + .file "__vrsqrtf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +! i = [0,63] +! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24; +! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); +! i = [64,127] +! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23; +! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); + +.CONST_TBL: + .word 0x3e800000, 0x00000000, 0x3ff6a09e, 0x667f3bcd, + .word 0x3e7f81f8, 0x1f81f820, 0x3ff673e3, 0x2ef63a03, + .word 0x3e7f07c1, 0xf07c1f08, 0x3ff6482d, 0x37a5a3d2, + .word 0x3e7e9131, 0xabf0b767, 0x3ff61d72, 0xb7978671, + .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3ff5f3aa, 0x673fa911, + .word 0x3e7dae60, 0x76b981db, 0x3ff5cacb, 0x7802f342, + .word 0x3e7d41d4, 0x1d41d41d, 0x3ff5a2cd, 0x8c69d61a, + .word 0x3e7cd856, 0x89039b0b, 0x3ff57ba8, 0xb0ee01b9, + .word 0x3e7c71c7, 0x1c71c71c, 0x3ff55555, 0x55555555, + .word 0x3e7c0e07, 0x0381c0e0, 0x3ff52fcc, 0x468d6b54, + .word 0x3e7bacf9, 0x14c1bad0, 0x3ff50b06, 0xa8fc6b70, + .word 0x3e7b4e81, 0xb4e81b4f, 0x3ff4e6fd, 0xf33cf032, + .word 0x3e7af286, 0xbca1af28, 0x3ff4c3ab, 0xe93bcf74, + .word 0x3e7a98ef, 0x606a63be, 0x3ff4a10a, 0x97af7b92, + .word 0x3e7a41a4, 0x1a41a41a, 0x3ff47f14, 0x4fe17f9f, + .word 0x3e79ec8e, 0x951033d9, 0x3ff45dc3, 0xa3c34fa3, + .word 0x3e799999, 0x9999999a, 0x3ff43d13, 0x6248490f, + .word 0x3e7948b0, 0xfcd6e9e0, 0x3ff41cfe, 0x93ff5199, + .word 0x3e78f9c1, 0x8f9c18fa, 0x3ff3fd80, 0x77e70577, + .word 0x3e78acb9, 0x0f6bf3aa, 0x3ff3de94, 0x8077db58, + .word 0x3e786186, 0x18618618, 0x3ff3c036, 0x50e00e03, + .word 0x3e781818, 0x18181818, 0x3ff3a261, 0xba6d7a37, + .word 0x3e77d05f, 0x417d05f4, 0x3ff38512, 0xba21f51e, + .word 0x3e778a4c, 0x8178a4c8, 0x3ff36845, 0x766eec92, + .word 0x3e7745d1, 0x745d1746, 0x3ff34bf6, 0x3d156826, + .word 0x3e7702e0, 0x5c0b8170, 0x3ff33021, 0x8127c0e0, + .word 0x3e76c16c, 0x16c16c17, 0x3ff314c3, 0xd92a9e91, + .word 0x3e768168, 0x16816817, 0x3ff2f9d9, 0xfd52fd50, + .word 0x3e7642c8, 0x590b2164, 0x3ff2df60, 0xc5df2c9e, + .word 0x3e760581, 0x60581606, 0x3ff2c555, 0x2988e428, + .word 0x3e75c988, 0x2b931057, 0x3ff2abb4, 0x3c0eb0f4, + .word 0x3e758ed2, 0x308158ed, 0x3ff2927b, 0x2cd320f5, + .word 0x3e755555, 0x55555555, 0x3ff279a7, 0x4590331c, + .word 0x3e751d07, 0xeae2f815, 0x3ff26135, 0xe91daf55, + .word 0x3e74e5e0, 0xa72f0539, 0x3ff24924, 0x92492492, + .word 0x3e74afd6, 0xa052bf5b, 0x3ff23170, 0xd2be638a, + .word 0x3e747ae1, 0x47ae147b, 0x3ff21a18, 0x51ff630a, + .word 0x3e7446f8, 0x6562d9fb, 0x3ff20318, 0xcc6a8f5d, + .word 0x3e741414, 0x14141414, 0x3ff1ec70, 0x124e98f9, + .word 0x3e73e22c, 0xbce4a902, 0x3ff1d61c, 0x070ae7d3, + .word 0x3e73b13b, 0x13b13b14, 0x3ff1c01a, 0xa03be896, + .word 0x3e738138, 0x13813814, 0x3ff1aa69, 0xe4f2777f, + .word 0x3e73521c, 0xfb2b78c1, 0x3ff19507, 0xecf5b9e9, + .word 0x3e7323e3, 0x4a2b10bf, 0x3ff17ff2, 0xe00ec3ee, + .word 0x3e72f684, 0xbda12f68, 0x3ff16b28, 0xf55d72d4, + .word 0x3e72c9fb, 0x4d812ca0, 0x3ff156a8, 0x72b5ef62, + .word 0x3e729e41, 0x29e4129e, 0x3ff1426f, 0xac0654db, + .word 0x3e727350, 0xb8812735, 0x3ff12e7d, 0x02c40253, + .word 0x3e724924, 0x92492492, 0x3ff11ace, 0xe560242a, + .word 0x3e721fb7, 0x8121fb78, 0x3ff10763, 0xcec30b26, + .word 0x3e71f704, 0x7dc11f70, 0x3ff0f43a, 0x45cdedad, + .word 0x3e71cf06, 0xada2811d, 0x3ff0e150, 0xdce2b60c, + .word 0x3e71a7b9, 0x611a7b96, 0x3ff0cea6, 0x317186dc, + .word 0x3e718118, 0x11811812, 0x3ff0bc38, 0xeb8ba412, + .word 0x3e715b1e, 0x5f75270d, 0x3ff0aa07, 0xbd7b7488, + .word 0x3e7135c8, 0x1135c811, 0x3ff09811, 0x63615499, + .word 0x3e711111, 0x11111111, 0x3ff08654, 0xa2d4f6db, + .word 0x3e70ecf5, 0x6be69c90, 0x3ff074d0, 0x4a8b1438, + .word 0x3e70c971, 0x4fbcda3b, 0x3ff06383, 0x31ff307a, + .word 0x3e70a681, 0x0a6810a7, 0x3ff0526c, 0x39213bfa, + .word 0x3e708421, 0x08421084, 0x3ff0418a, 0x4806de7d, + .word 0x3e70624d, 0xd2f1a9fc, 0x3ff030dc, 0x4ea03a72, + .word 0x3e704104, 0x10410410, 0x3ff02061, 0x446ffa9a, + .word 0x3e702040, 0x81020408, 0x3ff01018, 0x28467ee9, + .word 0x3e800000, 0x00000000, 0x3ff00000, 0x00000000, + .word 0x3e7f81f8, 0x1f81f820, 0x3fefc0bd, 0x88a0f1d9, + .word 0x3e7f07c1, 0xf07c1f08, 0x3fef82ec, 0x882c0f9b, + .word 0x3e7e9131, 0xabf0b767, 0x3fef467f, 0x2814b0cc, + .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3fef0b68, 0x48d2af1c, + .word 0x3e7dae60, 0x76b981db, 0x3feed19b, 0x75e78957, + .word 0x3e7d41d4, 0x1d41d41d, 0x3fee990c, 0xdad55ed2, + .word 0x3e7cd856, 0x89039b0b, 0x3fee61b1, 0x38f18adc, + .word 0x3e7c71c7, 0x1c71c71c, 0x3fee2b7d, 0xddfefa66, + .word 0x3e7c0e07, 0x0381c0e0, 0x3fedf668, 0x9b7e6350, + .word 0x3e7bacf9, 0x14c1bad0, 0x3fedc267, 0xbea45549, + .word 0x3e7b4e81, 0xb4e81b4f, 0x3fed8f72, 0x08e6b82d, + .word 0x3e7af286, 0xbca1af28, 0x3fed5d7e, 0xa914b937, + .word 0x3e7a98ef, 0x606a63be, 0x3fed2c85, 0x34ed6d86, + .word 0x3e7a41a4, 0x1a41a41a, 0x3fecfc7d, 0xa32a9213, + .word 0x3e79ec8e, 0x951033d9, 0x3feccd60, 0x45f5d358, + .word 0x3e799999, 0x9999999a, 0x3fec9f25, 0xc5bfedd9, + .word 0x3e7948b0, 0xfcd6e9e0, 0x3fec71c7, 0x1c71c71c, + .word 0x3e78f9c1, 0x8f9c18fa, 0x3fec453d, 0x90f057a2, + .word 0x3e78acb9, 0x0f6bf3aa, 0x3fec1982, 0xb2ece47b, + .word 0x3e786186, 0x18618618, 0x3febee90, 0x56fb9c39, + .word 0x3e781818, 0x18181818, 0x3febc460, 0x92eb3118, + .word 0x3e77d05f, 0x417d05f4, 0x3feb9aed, 0xba588347, + .word 0x3e778a4c, 0x8178a4c8, 0x3feb7232, 0x5b79db11, + .word 0x3e7745d1, 0x745d1746, 0x3feb4a29, 0x3c1d9550, + .word 0x3e7702e0, 0x5c0b8170, 0x3feb22cd, 0x56d87d7e, + .word 0x3e76c16c, 0x16c16c17, 0x3feafc19, 0xd8606169, + .word 0x3e768168, 0x16816817, 0x3fead60a, 0x1d0fb394, + .word 0x3e7642c8, 0x590b2164, 0x3feab099, 0xae8f539a, + .word 0x3e760581, 0x60581606, 0x3fea8bc4, 0x41a3d02c, + .word 0x3e75c988, 0x2b931057, 0x3fea6785, 0xb41bacf7, + .word 0x3e758ed2, 0x308158ed, 0x3fea43da, 0x0adc6899, + .word 0x3e755555, 0x55555555, 0x3fea20bd, 0x700c2c3e, + .word 0x3e751d07, 0xeae2f815, 0x3fe9fe2c, 0x315637ee, + .word 0x3e74e5e0, 0xa72f0539, 0x3fe9dc22, 0xbe484458, + .word 0x3e74afd6, 0xa052bf5b, 0x3fe9ba9d, 0xa6c73588, + .word 0x3e747ae1, 0x47ae147b, 0x3fe99999, 0x9999999a, + .word 0x3e7446f8, 0x6562d9fb, 0x3fe97913, 0x63068b54, + .word 0x3e741414, 0x14141414, 0x3fe95907, 0xeb87ab44, + .word 0x3e73e22c, 0xbce4a902, 0x3fe93974, 0x368cfa31, + .word 0x3e73b13b, 0x13b13b14, 0x3fe91a55, 0x6151761c, + .word 0x3e738138, 0x13813814, 0x3fe8fba8, 0xa1bf6f96, + .word 0x3e73521c, 0xfb2b78c1, 0x3fe8dd6b, 0x4563a009, + .word 0x3e7323e3, 0x4a2b10bf, 0x3fe8bf9a, 0xb06e1af3, + .word 0x3e72f684, 0xbda12f68, 0x3fe8a234, 0x5cc04426, + .word 0x3e72c9fb, 0x4d812ca0, 0x3fe88535, 0xd90703c6, + .word 0x3e729e41, 0x29e4129e, 0x3fe8689c, 0xc7e07e7d, + .word 0x3e727350, 0xb8812735, 0x3fe84c66, 0xdf0ca4c2, + .word 0x3e724924, 0x92492492, 0x3fe83091, 0xe6a7f7e7, + .word 0x3e721fb7, 0x8121fb78, 0x3fe8151b, 0xb86fee1d, + .word 0x3e71f704, 0x7dc11f70, 0x3fe7fa02, 0x3f1068d1, + .word 0x3e71cf06, 0xada2811d, 0x3fe7df43, 0x7579b9b5, + .word 0x3e71a7b9, 0x611a7b96, 0x3fe7c4dd, 0x663ebb88, + .word 0x3e718118, 0x11811812, 0x3fe7aace, 0x2afa8b72, + .word 0x3e715b1e, 0x5f75270d, 0x3fe79113, 0xebbd7729, + .word 0x3e7135c8, 0x1135c811, 0x3fe777ac, 0xde80baea, + .word 0x3e711111, 0x11111111, 0x3fe75e97, 0x46a0b098, + .word 0x3e70ecf5, 0x6be69c90, 0x3fe745d1, 0x745d1746, + .word 0x3e70c971, 0x4fbcda3b, 0x3fe72d59, 0xc45f1fc5, + .word 0x3e70a681, 0x0a6810a7, 0x3fe7152e, 0x9f44f01f, + .word 0x3e708421, 0x08421084, 0x3fe6fd4e, 0x79325467, + .word 0x3e70624d, 0xd2f1a9fc, 0x3fe6e5b7, 0xd16657e1, + .word 0x3e704104, 0x10410410, 0x3fe6ce69, 0x31d5858d, + .word 0x3e702040, 0x81020408, 0x3fe6b761, 0x2ec892f6, + + .word 0x3fefffff, 0xfee7f18f ! K0 = 9.99999997962321453275e-01 + .word 0xbfdfffff, 0xfe07e52f ! K1 = -4.99999998166077580600e-01 + .word 0x3fd80118, 0x0ca296d9 ! K2 = 3.75066768969515586277e-01 + .word 0xbfd400fc, 0x0bbb8e78 ! K3 = -3.12560092408808548438e-01 + .word 0x7ffe0000, 0x7ffe0000 ! DC0 + .word 0x3f800000, 0x40000000 ! FTWO + +#define stridex %l4 +#define stridex2 %l1 +#define stridey %l3 +#define stridey2 %i2 +#define TBL %l2 +#define counter %i5 + +#define K3 %f38 +#define K2 %f36 +#define K1 %f34 +#define K0 %f32 +#define DC0 %f4 +#define FONE %f2 +#define FTWO %f3 + +#define _0x00800000 %o2 +#define _0x7f800000 %o4 + +#define tmp0 STACK_BIAS-0x30 +#define tmp1 STACK_BIAS-0x28 +#define tmp2 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x18 +#define tmp_counter STACK_BIAS-0x10 +#define tmp_px STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&ddx0)[0] = *px; +! ax0 = *(int*)px; +! +! ((float*)&ddx0)[1] = *(px + stridex); +! ax1 = *(int*)(px + stridex); +! +! px += stridex2; +! +! if ( ax0 >= 0x7f800000 ) +! { +! RETURN ( FONE / ((float*)&dres0)[0] ); +! } +! if ( ax0 < 0x00800000 ) +! { +! float res = ((float*)&dres0)[0]; +! +! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ +! { +! RETURN ( FONE / res ) +! } +! else if ( ax0 >= 0 ) /* X = denormal */ +! { +! double res0, xx0, tbl_div0, tbl_sqrt0; +! float fres0; +! int iax0, si0, iexp0; +! +! res = *(int*)&res; +! res *= FTWO; +! ax0 = *(int*)&res; +! iexp0 = ax0 >> 24; +! iexp0 = 0x3f + 0x4b - iexp0; +! iexp0 = iexp0 << 23; +! +! si0 = (ax0 >> 13) & 0x7f0; +! +! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; +! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; +! iax0 = ax0 & 0x7ffe0000; +! iax0 = ax0 - iax0; +! xx0 = iax0 * tbl_div0; +! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); +! +! fres0 = res0; +! iexp0 += *(int*)&fres0; +! RETURN(*(float*)&iexp0) +! } +! else /* X = negative */ +! { +! RETURN ( sqrtf(res) ) +! } +! } +! if ( ax1 >= 0x7f800000 ) +! { +! RETURN ( FONE / ((float*)&dres0)[1] ) +! } +! if ( ax1 < 0x00800000 ) +! { +! float res = ((float*)&dres0)[1]; +! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ +! { +! RETURN ( FONE / res ) +! } +! else if ( ax0 >= 0 ) /* X = denormal */ +! { +! double res0, xx0, tbl_div0, tbl_sqrt0; +! float fres0; +! int iax1, si0, iexp0; +! +! res = *(int*)&res; +! res *= FTWO; +! ax1 = *(int*)&res; +! iexp0 = ax1 >> 24; +! iexp0 = 0x3f + 0x4b - iexp0; +! iexp0 = iexp0 << 23; +! +! si0 = (ax1 >> 13) & 0x7f0; +! +! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; +! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; +! iax1 = ax1 & 0x7ffe0000; +! iax1 = ax1 - iax1; +! xx0 = iax1 * tbl_div0; +! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); +! +! fres0 = res0; +! iexp0 += *(int*)&fres0; +! RETURN(*(float*)&iexp0) +! } +! else /* X = negative */ +! { +! RETURN ( sqrtf(res) ) +! } +! } +! +! iexp0 = ax0 >> 24; +! iexp1 = ax1 >> 24; +! iexp0 = 0x3f - iexp0; +! iexp1 = 0x3f - iexp1; +! iexp1 &= 0x1ff; +! lexp0 = iexp0 << 55; +! lexp1 = iexp1 << 23; +! +! lexp0 |= lexp1; +! +! fdx0 = *((double*)&lexp0); +! +! si0 = ax0 >> 13; +! si1 = ax1 >> 13; +! si0 &= 0x7f0; +! si1 &= 0x7f0; +! +! addr0 = (char*)TBL + si0; +! addr1 = (char*)TBL + si1; +! tbl_div0 = ((double*)((char*)TBL + si0))[0]; +! tbl_div1 = ((double*)((char*)TBL + si1))[0]; +! tbl_sqrt0 = ((double*)addr0)[1]; +! tbl_sqrt1 = ((double*)addr1)[1]; +! dfx0 = vis_fand(ddx0,DC0); +! dfx0 = vis_fpsub32(ddx0,dfx0); +! dtmp0 = (double)(((int*)&dfx0)[0]); +! dtmp1 = (double)(((int*)&dfx0)[1]); +! xx0 = dtmp0 * tbl_div0; +! xx1 = dtmp1 * tbl_div1; +! res0 = K3 * xx0; +! res1 = K3 * xx1; +! res0 += K2; +! res1 += K2; +! res0 *= xx0; +! res1 *= xx1; +! res0 += K1; +! res1 += K1; +! res0 *= xx0; +! res1 *= xx1; +! res0 += K0; +! res1 += K0; +! res0 = tbl_sqrt0 * res0; +! res1 = tbl_sqrt1 * res1; +! ((float*)&dres0)[0] = (float)res0; +! ((float*)&dres0)[1] = (float)res1; +! dres0 = vis_fpadd32(dres0,fdx0); +! *py = ((float*)&dres0)[0]; +! *(py + stridey) = ((float*)&dres0)[1]; +! py += stridey2; +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrsqrtf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + + st %i0,[%fp+tmp_counter] + stx %i1,[%fp+tmp_px] + + ldd [TBL+2048],K0 + sll %i2,2,stridex + + ldd [TBL+2048+8],K1 + sll %i4,2,stridey + mov %i3,%i2 + + ldd [TBL+2048+16],K2 + sethi %hi(0x7f800000),_0x7f800000 + sll stridex,1,stridex2 + + ldd [TBL+2048+24],K3 + sethi %hi(0x00800000),_0x00800000 + + ldd [TBL+2048+32],DC0 + add %g0,0x3f,%l0 + + ldd [TBL+2048+40],FONE +! ld [TBL+2048+44],FTWO +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%l7 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; + + lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); + sethi %hi(0x7ffffc00),%o0 + + lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; + add %l7,stridex2,%i1 ! px += stridex2 + add %o0,0x3ff,%o0 + + lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); + fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + add %i1,stridex2,%o5 ! px += stridex2 + + cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000 + bge,pn %icc,.spec0 ! (4_1) if ( ax0 >= 0x7f800000 ) + nop + + cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000 + bl,pn %icc,.spec1 ! (4_1) if ( ax0 < 0x00800000 ) + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; +.cont_spec: + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; + + and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; + + stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; + + lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px; + fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; + + lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex); + + lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px; + + lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex); + cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000 + bge,pn %icc,.update0 ! (5_1) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0; +.cont0: + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; + cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000 + bl,pn %icc,.update1 ! (5_1) if ( ax1 < 0x00800000 ) + fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); +.cont1: + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000 + + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f62 ! (4_1) res0 += K2; + + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + bge,pn %icc,.update2 ! (0_0) if ( ax0 >= 0x7f800000 ) + faddd %f50,K2,%f60 ! (5_1) res1 += K2; +.cont2: + cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000 + and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff; + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + bl,pn %icc,.update3 ! (0_0) if ( ax0 < 0x00800000 ) + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); +.cont3: + fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0; + sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55; + + fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1; + or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1; + stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0); + + fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0; + sll stridex,1,stridex2 ! stridex2 = stridex * 2; + + lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px; + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex); + add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0; + faddd %f30,K1,%f62 ! (4_1) res0 += K1; + + lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px; + add %o5,stridex2,%l7 ! px += stridex2 + faddd %f48,K1,%f42 ! (5_1) res1 += K1; + + lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex); + cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000 + bge,pn %icc,.update4 ! (1_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0; +.cont4: + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; + cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000 + bl,pn %icc,.update5 ! (1_0) if ( ax1 < 0x00800000 ) + fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0); +.cont5: + fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0; + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000 + + fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1; + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f40 ! (0_0) res0 += K2; + + ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1]; + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff; + faddd %f50,K2,%f60 ! (1_0) res0 += K2; + + ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0; + or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1; + faddd %f48,K0,%f62 ! (4_1) res0 += K0; + + fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1; + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + faddd %f58,K0,%f60 ! (5_1) res1 += K0; + + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; + bge,pn %icc,.update6 ! (2_0) if ( ax0 >= 0x7f800000 ) + lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; +.cont6: + cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000 + bl,pn %icc,.update7 ! (2_0) if ( ax0 < 0x00800000 ) + nop +.cont7: + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); + cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000 + fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0; + faddd %f40,K1,%f46 ! (0_0) res0 += K1; + + lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; + add %l7,stridex2,%i1 ! px += stridex2 + fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1; + faddd %f48,K1,%f62 ! (1_0) res1 += K1; + + lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); + add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0; + bge,pn %icc,.update8 ! (3_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0; +.cont8: + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; + cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000 + bl,pn %icc,.update9 ! (3_0) if ( ax1 < 0x00800000 ) + fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); +.cont9: + fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0; + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + add %i1,stridex2,%o5 ! px += stridex2 + fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1; + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f58 ! (2_0) res0 += K2; + + ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1]; + and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + faddd %f50,K2,%f60 ! (3_0) res1 += K2; + + ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0); + sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0; + or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; + faddd %f48,K0,%f22 ! (0_0) res0 += K0; + + fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1; + stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + faddd %f40,K0,%f26 ! (1_0) res1 += K0; + + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; + fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0); + + or %g0,%i2,%l7 + add stridey,stridey,stridey2 + + cmp counter,6 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,6,counter ! counter + + .align 16 +.main_loop: + lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px; + cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000 + bge,pn %icc,.update10 ! (4_1) if ( ax0 >= 0x7f800000 ) + fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; +.cont10: + lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex); + cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000 + fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0; + faddd %f62,K1,%f42 ! (2_1) res0 += K1; + + lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px; + fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1; + bl,pn %icc,.update11 ! (4_1) if ( ax0 < 0x00800000 ) + faddd %f58,K1,%f62 ! (3_1) res1 += K1; +.cont11: + lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex); + cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000 + bge,pn %icc,.update12 ! (5_1) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0; +.cont12: + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; + cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000 + bl,pn %icc,.update13 ! (5_1) if ( ax1 < 0x00800000 ) + fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); +.cont13: + fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0; + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000 + fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f62,%f24,%f58 ! (3_1) res1 *= xx1; + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f62 ! (4_1) res0 += K2; + + ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1]; + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + bge,pn %icc,.update14 ! (0_0) if ( ax0 >= 0x7f800000 ) + faddd %f50,K2,%f60 ! (5_1) res1 += K2; +.cont14: + ldd [%o1+8],%f28 ! (3_1) tbl_sqrt1 = ((double*)addr0)[1]; + cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000 + and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff; + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0); + sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + bl,pn %icc,.update15 ! (0_0) if ( ax0 < 0x00800000 ) + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); +.cont15: + fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0; + sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55; + st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0]; + faddd %f48,K0,%f62 ! (2_1) res0 += K0; + + fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1; + or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1; + stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0); + faddd %f58,K0,%f60 ! (3_1) res1 += K0; + + fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0; + sll stridex,1,stridex2 ! stridex2 = stridex * 2; + st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1]; + fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0); + + lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px; + add %l7,stridey2,%i1 ! py += stridey2 + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex); + add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0; + fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0; + faddd %f30,K1,%f62 ! (4_1) res0 += K1; + + lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px; + add %o5,stridex2,%l7 ! px += stridex2 + fmuld %f28,%f60,%f56 ! (3_1) res1 = tbl_sqrt1 * res1; + faddd %f48,K1,%f42 ! (5_1) res1 += K1; + + lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex); + cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000 + bge,pn %icc,.update16 ! (1_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0; +.cont16: + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; + cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000 + bl,pn %icc,.update17 ! (1_0) if ( ax1 < 0x00800000 ) + fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0); +.cont17: + fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0; + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000 + fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1; + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + fdtos %f56,%f21 ! (3_1) ((float*)&dres0)[0] = (float)res0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f40 ! (0_0) res0 += K2; + + ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1]; + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff; + faddd %f50,K2,%f60 ! (1_0) res0 += K2; + + ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0); + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + add %i1,stridey2,%o3 ! py += stridey2 + fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0; + or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1; + st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0]; + faddd %f48,K0,%f62 ! (4_1) res0 += K0; + + fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1; + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + faddd %f58,K0,%f60 ! (5_1) res1 += K0; + + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; + bge,pn %icc,.update18 ! (2_0) if ( ax0 >= 0x7f800000 ) + st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1]; + fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); +.cont18: + cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000 + bl,pn %icc,.update19 ! (2_0) if ( ax0 < 0x00800000 ) + lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; +.cont19: + lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); + cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000 + fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0; + faddd %f40,K1,%f46 ! (0_0) res0 += K1; + + lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; + add %l7,stridex2,%i1 ! px += stridex2 + fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1; + faddd %f48,K1,%f62 ! (1_0) res1 += K1; + + lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); + add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0; + bge,pn %icc,.update20 ! (3_0) if ( ax1 >= 0x7f800000 ) + fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0; +.cont20: + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; + cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000 + bl,pn %icc,.update21 ! (3_0) if ( ax1 < 0x00800000 ) + fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); +.cont21: + fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0; + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + add %i1,stridex2,%o5 ! px += stridex2 + fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0; + + fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1; + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; + faddd %f52,K2,%f58 ! (2_0) res0 += K2; + + ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1]; + and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + faddd %f50,K2,%f60 ! (3_0) res1 += K2; + + ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1]; + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0); + sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; + add %o3,stridey2,%l7 ! py += stridey2 + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0; + or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; + st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0]; + faddd %f48,K0,%f22 ! (0_0) res0 += K0; + + fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1; + subcc counter,6,counter ! counter -= 6; + stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + faddd %f40,K0,%f26 ! (1_0) res1 += K0; + + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; + st %f1,[stridey+%o3] ! (3_1) *(py + stridey) = ((float*)&dres0)[1]; + bpos,pt %icc,.main_loop + fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0); + + add counter,6,counter +.tail: + sll stridex,1,stridex2 + subcc counter,1,counter + bneg,a .begin + mov %l7,%i2 + + fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0; + faddd %f62,K1,%f42 ! (2_1) res0 += K1; + + fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1; + + fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0; + fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0; + + fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1; + + ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1]; + + ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0); + + st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0]; + subcc counter,1,counter + bneg,a .begin + add %l7,stridey,%i2 + + faddd %f48,K0,%f62 ! (2_1) res0 += K0; + st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1]; + subcc counter,1,counter + bneg,a .begin + add %l7,stridey2,%i2 + fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0); + + add %l7,stridey2,%i1 ! py += stridey2 + + fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0; + + fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0; + + ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0); + add %i1,stridey2,%o3 ! py += stridey2 + + st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0]; + subcc counter,1,counter + bneg,a .begin + add %i1,stridey,%i2 + + st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1]; + subcc counter,1,counter + bneg,a .begin + mov %o3,%i2 + fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); + + st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0]; + ba .begin + add %o3,stridey,%i2 + + .align 16 +.spec0: + fdivs FONE,%f14,%f14 ! x0 = FONE / x0; + add %l7,stridex,%l7 ! px += stridex + st %f14,[%i2] ! *py = x0; + sub counter,1,counter + ba .begin1 + add %i2,stridey,%i2 ! py += stridey + + .align 16 +.spec1: + andcc %g1,%o0,%g0 + bz,a 1f + fdivs FONE,%f14,%f14 ! x0 = DONE / x0; + + cmp %g1,0 + bl,a 1f + fsqrts %f14,%f14 ! x0 = sqrtf(x0); + + fitod %f14,%f0 + fdtos %f0,%f14 + fmuls %f14,FTWO,%f14 + st %f14,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o0 + sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; + fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + ba .cont_spec + sub %g1,%o0,%g1 +1: + add %l7,stridex,%l7 ! px += stridex + sub counter,1,counter + st %f14,[%i2] ! *py = x0; + ba .begin1 + add %i2,stridey,%i2 ! py += stridey + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + sub %i1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + mov 1,counter + + .align 16 +.update1: + sethi %hi(0x7ffffc00),%o0 + cmp counter,1 + ble .cont1 + + add %o0,0x3ff,%o0 + + andcc %g5,%o0,%g0 + bz,a 1f + nop + + cmp %g5,0 + bl,a 1f + nop + + fitod %f15,%f0 + fdtos %f0,%f15 + fmuls %f15,FTWO,%f15 + st %f15,[%fp+tmp3] + ld [%fp+tmp3],%g5 + sethi %hi(0x4b000000),%o0 + sub %g5,%o0,%g5 + + fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + + sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + + fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%l7,%l1 ! (5_0) iexp1 = 0x3f - iexp1; + + sll %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0); + fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; + + ba .cont1 + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; +1: + sub %i1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + mov 1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + sub %o5,stridex,%o1 + + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + mov 2,counter + + .align 16 +.update3: + sethi %hi(0x7ffffc00),%o1 + cmp counter,2 + ble .cont3 + + add %o1,0x3ff,%o1 + + andcc %g1,%o1,%g0 + bz,a 1f + sub %o5,stridex,%o1 + + cmp %g1,0 + bl,a 1f + sub %o5,stridex,%o1 + + fitod %f18,%f0 + fdtos %f0,%f18 + fmuls %f18,FTWO,%f18 + st %f18,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o1 + sub %g1,%o1,%g1 + + fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + ba .cont3 + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); +1: + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + mov 2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + sub %l7,stridex2,%o1 + + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + mov 3,counter + + .align 16 +.update5: + sethi %hi(0x7ffffc00),%o1 + cmp counter,3 + ble .cont5 + + add %o1,0x3ff,%o1 + + andcc %i4,%o1,%g0 + bz,a 1f + sub %l7,stridex2,%o1 + + cmp %i4,0 + bl,a 1f + sub %l7,stridex2,%o1 + + fitod %f19,%f0 + fdtos %f0,%f19 + fmuls %f19,FTWO,%f19 + st %f19,[%fp+tmp3] + ld [%fp+tmp3],%i4 + sethi %hi(0x4b000000),%o1 + sub %i4,%o1,%i4 + + fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + + sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%i1,%i0 ! (1_0) iexp1 = 0x3f - iexp1; + + sll %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0); + + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + ba .cont5 + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; +1: + sub %o1,stridex,%o1 + stx %o1,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + mov 3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + sub %l7,stridex,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + mov 4,counter + + .align 16 +.update7: + sethi %hi(0x7ffffc00),%o3 + cmp counter,4 + ble .cont7 + + add %o3,0x3ff,%o3 + + andcc %g1,%o3,%g0 + bz,a 1f + sub %l7,stridex,%o3 + + cmp %g1,0 + bl,a 1f + sub %l7,stridex,%o3 + + fitod %f24,%f0 + fdtos %f0,%f24 + fmuls %f24,FTWO,%f24 + st %f24,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o3 + sub %g1,%o3,%g1 + + fands %f24,DC0,%f0 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f24,%f0,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + + sll %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + ba .cont7 + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; +1: + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + mov 4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + nop + + sub %l7,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + mov 5,counter + + .align 16 +.update9: + sethi %hi(0x7ffffc00),%o3 + cmp counter,5 + ble .cont9 + sub %l7,stridex,%i3 + + add %o3,0x3ff,%o3 + + andcc %o5,%o3,%g0 + bz 1f + ld [%i3],%f0 + + cmp %o5,0 + bl,a 1f + nop + + fitod %f0,%f0 + fdtos %f0,%f0 + fmuls %f0,FTWO,%f0 + st %f0,[%fp+tmp3] + ld [%fp+tmp3],%o5 + sethi %hi(0x4b000000),%o3 + sub %o5,%o3,%o5 + + fands %f0,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + + sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32s %f0,%f8,%f0 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%o3,%i3 ! (3_0) iexp1 = 0x3f - iexp1; + + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + fitod %f0,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0); + + fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + ba .cont9 + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; +1: + stx %i3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + mov 5,counter + + .align 16 +.update10: + cmp counter,0 + ble .cont10 + sub %i1,stridex,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + + ba .cont10 + mov 0,counter + + .align 16 +.update11: + sethi %hi(0x7ffffc00),%i4 + cmp counter,0 + ble .cont11 + sub %i1,stridex,%o3 + + sub %o3,stridex,%o3 + add %i4,0x3ff,%i4 + ld [%o3],%i3 + + andcc %i3,%i4,%g0 + bz 1f + + cmp %i3,0 + bl,a 1f + nop + + fitod %f14,%f0 + fdtos %f0,%f14 + fmuls %f14,FTWO,%f14 + st %f14,[%fp+tmp3] + ld [%fp+tmp3],%i3 + sethi %hi(0x4b000000),%o3 + sub %i3,%o3,%i3 + + fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + sra %i3,13,%l5 ! (4_0) si0 = ax0 >> 13; + + and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; + + ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %i3,24,%i3 ! (4_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; + fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); + + sllx %o0,23,%o0 ! (4_0) lexp0 = iexp0 << 55; + + st %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); + + ba .cont11 + fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; +1: + stx %o3,[%fp+tmp_px] + + st counter,[%fp+tmp_counter] + + ba .cont11 + mov 0,counter + + .align 16 +.update12: + cmp counter,1 + ble .cont12 + nop + + sub %i1,stridex,%i1 + stx %i1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + mov 1,counter + + .align 16 +.update13: + sethi %hi(0x7ffffc00),%o3 + cmp counter,1 + ble .cont13 + + add %o3,0x3ff,%o3 + + andcc %g5,%o3,%g0 + bz 1f + + cmp %g5,0 + bl,a 1f + nop + + fitod %f15,%f0 + fdtos %f0,%f15 + fmuls %f15,FTWO,%f15 + st %f15,[%fp+tmp3] + ld [%fp+tmp3],%g5 + sethi %hi(0x4b000000),%o3 + sub %g5,%o3,%g5 + + fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0); + + sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; + sra %g5,24,%o3 ! (5_0) iexp1 = ax1 >> 24; + and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; + fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%o3,%l1 ! (5_0) iexp1 = 0x3f - iexp1; + + add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; + + sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; + st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0); + + fitod %f17,%f0 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); + + fmuld %f0,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; + ba .cont13 + fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; +1: + sub %i1,stridex,%i1 + stx %i1,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + mov 1,counter + + .align 16 +.update14: + cmp counter,2 + ble .cont14 + sub %o5,stridex,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + mov 2,counter + + .align 16 +.update15: + sethi %hi(0x7ffffc00),%i3 + cmp counter,2 + ble .cont15 + sub %o5,stridex,%o3 + + add %i3,0x3ff,%i3 + + andcc %g1,%i3,%g0 + bz 1f + sub %o3,stridex,%o3 + + cmp %g1,0 + bl,a 1f + nop + + fitod %f18,%f0 + fdtos %f0,%f18 + fmuls %f18,FTWO,%f18 + st %f18,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%o3 + sub %g1,%o3,%g1 + + fands %f18,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; + and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; + + ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f18,%f0,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; + + ba .cont15 + fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); +1: + stx %o3,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + mov 2,counter + + .align 16 +.update16: + cmp counter,3 + ble .cont16 + sub %l7,stridex2,%o3 + + sub %o3,stridex,%o3 + stx %o3,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + mov 3,counter + + .align 16 +.update17: + sethi %hi(0x7ffffc00),%i3 + cmp counter,3 + ble .cont17 + sub %l7,stridex2,%o3 + + add %i3,0x3ff,%i3 + + andcc %i4,%i3,%g0 + bz 1f + sub %o3,stridex,%o3 + + cmp %i4,0 + bl,a 1f + nop + + fitod %f19,%f0 + fdtos %f0,%f19 + fmuls %f19,FTWO,%f19 + st %f19,[%fp+tmp3] + ld [%fp+tmp3],%i4 + sethi %hi(0x4b000000),%o3 + sub %i4,%o3,%i4 + + fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); + + sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; + + sra %i4,24,%i0 ! (1_0) iexp1 = ax1 >> 24; + and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; + fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%i0,%i0 ! (1_0) iexp1 = 0x3f - iexp1; + + sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; + fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0); + + add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; + fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; + + ba .cont17 + fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; +1: + stx %o3,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + mov 3,counter + + .align 16 +.update18: + cmp counter,4 + ble .cont18 + fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); + + sub %l7,stridex2,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + mov 4,counter + + .align 16 +.update19: + sethi %hi(0x7ffffc00),%i3 + cmp counter,4 + ble,a .cont19 + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + add %i3,0x3ff,%i3 + + andcc %g1,%i3,%g0 + bz 1f + nop + + cmp %g1,0 + bl,a 1f + nop + + fitod %f24,%f24 + fdtos %f24,%f24 + fmuls %f24,FTWO,%f24 + st %f24,[%fp+tmp3] + ld [%fp+tmp3],%g1 + sethi %hi(0x4b000000),%i3 + sub %g1,%i3,%g1 + + fands %f24,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; + + and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; + + ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; + fpsub32s %f24,%f8,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; + + sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; + + sllx %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55; + add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; + fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); + + st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); + fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; + + ba .cont19 + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; +1: + sub %l7,stridex2,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + mov 4,counter + ba .cont19 + fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + .align 16 +.update20: + cmp counter,5 + ble .cont20 + nop + + sub %l7,stridex,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + mov 5,counter + + .align 16 +.update21: + sethi %hi(0x7ffffc00),%i3 + cmp counter,5 + ble,a .cont21 + nop + + sub %l7,stridex,%i4 + add %i3,0x3ff,%i3 + + andcc %o5,%i3,%g0 + bz 1f + ld [%i4],%f8 + + cmp %o5,0 + bl,a 1f + nop + + fitod %f8,%f8 + fdtos %f8,%f8 + fmuls %f8,FTWO,%f8 + st %f8,[%fp+tmp3] + ld [%fp+tmp3],%o5 + sethi %hi(0x4b000000),%i3 + sub %o5,%i3,%o5 + + fands %f8,DC0,%f24 ! (2_0) dfx0 = vis_fand(ddx0,DC0); + + sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; + + sra %o5,24,%i3 ! (3_0) iexp1 = ax1 >> 24; + and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; + fpsub32s %f8,%f24,%f24 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); + + ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; + sub %l0,%i3,%i3 ! (3_0) iexp1 = 0x3f - iexp1; + + sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; + fitod %f24,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); + + add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; + st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0); + + fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; + + ba .cont21 + fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; +1: + sub %l7,stridex,%i3 + stx %i3,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont21 + mov 5,counter + + .align 16 +.exit: + ret + restore + + SET_SIZE(__vrsqrtf) + diff --git a/usr/src/libm/src/mvec/vis/__vsin.S b/usr/src/libm/src/mvec/vis/__vsin.S new file mode 100644 index 0000000..3f93d4c --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsin.S @@ -0,0 +1,3002 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsin.S 1.9 06/01/23 SMI" + + .file "__vsin.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x3ec718e3,0xa6972785 + .word 0x3ef9fd39,0x94293940 + .word 0xbf2a019f,0x75ee4be1 + .word 0xbf56c16b,0xba552569 + .word 0x3f811111,0x1108c703 + .word 0x3fa55555,0x554f5b35 + .word 0xbfc55555,0x555554d0 + .word 0xbfdfffff,0xffffff85 + .word 0x3ff00000,0x00000000 + .word 0xbfc55555,0x5551fc28 + .word 0x3f811107,0x62eacc9d + .word 0xbfdfffff,0xffff6328 + .word 0x3fa55551,0x5f7acf0c + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a600000 + .word 0x3ba3198a,0x2e000000 + .word 0x397b839a,0x252049c1 + .word 0x80000000,0x00004000 + .word 0xffff8000,0x00000000 ! N.B.: low-order words used + .word 0x3fc90000,0x80000000 ! for sign bit hacking; see + .word 0x3fc40000,0x00000000 ! references to "thresh" below + +#define p4 0x0 +#define q4 0x08 +#define p3 0x10 +#define q3 0x18 +#define p2 0x20 +#define q2 0x28 +#define p1 0x30 +#define q1 0x38 +#define one 0x40 +#define pp1 0x48 +#define pp2 0x50 +#define qq1 0x58 +#define qq2 0x60 +#define invpio2 0x68 +#define round 0x70 +#define pio2_1 0x78 +#define pio2_2 0x80 +#define pio2_3 0x88 +#define pio2_3t 0x90 +#define f30val 0x98 +#define mask 0xa0 +#define thresh 0xa8 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define n2 STACK_BIAS-0x24 +#define n1 STACK_BIAS-0x28 +#define n0 STACK_BIAS-0x2c +#define x2_1 STACK_BIAS-0x40 +#define x1_1 STACK_BIAS-0x50 +#define x0_1 STACK_BIAS-0x60 +#define y2_0 STACK_BIAS-0x70 +#define y1_0 STACK_BIAS-0x80 +#define y0_0 STACK_BIAS-0x90 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x90 + +!-------------------------------------------------------------- +! Some defines to keep code more readable +#define LIM_l6 %l6 +! in primary range, contains |x| upper limit when cos(x)=1. +! in transferring to medium range, denotes what loop was active. +!-------------------------------------------------------------- + + ENTRY(__vsin) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(g5) + PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) + PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) + PIC_SET(g5,constants,l5) + mov %l5,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads + +! ========== primary range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 0x3fc90000 +! l6 0x3e400000 +! l7 0x3fe921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 oy0 +! o4 oy1 +! o5 oy2 +! o7 scratch + +! f0 x0 +! f2 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 +! f42 +! f44 0xffff800000000000 +! f46 p1 +! f48 p2 +! f50 p3 +! f52 p4 +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + sethi %hi(0x80000000),%i5 ! load/set up constants + sethi %hi(0x3fc90000),%l5 + sethi %hi(0x3e400000),LIM_l6 + sethi %hi(0x3fe921fb),%l7 + or %l7,%lo(0x3fe921fb),%l7 + ldd [%g1+f30val],%f30 + ldd [%g1+mask],%f44 + ldd [%g1+p1],%f46 + ldd [%g1+p2],%f48 + ldd [%g1+p3],%f50 + ldd [%g1+p4],%f52 + ldd [%g1+one],%f54 + ldd [%g1+pp1],%f56 + ldd [%g1+pp2],%f58 + ldd [%g1+qq1],%f60 + ldd [%g1+qq2],%f62 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,x0_1,%o3 ! precondition loop + add %fp,x0_1,%o4 + add %fp,x0_1,%o5 + ld [%i1],%l0 ! hx = *x + ld [%i1],%f0 + ld [%i1+4],%f1 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + add %i1,%i2,%i1 ! x += stridex + + ba,pt %icc,.loop0 +! delay slot + nop + + .align 32 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,LIM_l6,%g1 + sub %l7,%l0,%o7 + fands %f0,%f30,%f9 ! save signbit + + lda [%i1]%asi,%f10 + orcc %o7,%g1,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + fabsd %f0,%f0 + fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,LIM_l6,%g1 + sub %l7,%l1,%o7 + fands %f10,%f30,%f19 ! save signbit + + lda [%i1]%asi,%f20 + orcc %o7,%g1,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.endloop2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + fabsd %f10,%f10 + fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only + +.loop2: + st %f6,[%o3] + sub %l2,LIM_l6,%g1 + sub %l7,%l2,%o7 + fands %f20,%f30,%f29 ! save signbit + + st %f7,[%o3+4] + orcc %g1,%o7,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb + +! delay slot + add %i3,%i4,%i3 ! y += stridey + cmp %l0,%l5 + fabsd %f20,%f20 + bl,pn %icc,.case4 + +! delay slot + st %f16,[%o4] + cmp %l1,%l5 + fpadd32s %f0,%f31,%f8 + bl,pn %icc,.case2 + +! delay slot + st %f17,[%o4+4] + cmp %l2,%l5 + fpadd32s %f10,%f31,%f18 + bl,pn %icc,.case1 + +! delay slot + st %f26,[%o5] + mov %o0,%o3 + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f20,%f31,%f28 + + st %f27,[%o5+4] + fand %f8,%f44,%f2 + mov %o1,%o4 + + fand %f18,%f44,%f12 + mov %o2,%o5 + sub %l0,%o7,%l0 + + fand %f28,%f44,%f22 + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + add %l3,8,%g1 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%f0 + + fmuld %f24,%f40,%f24 + lda [%i1+4]%asi,%f1 + + fmuld %f6,%f34,%f6 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f16,%f38,%f16 + + fmuld %f26,%f42,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f6,%f2,%f6 + + faddd %f16,%f12,%f16 + + faddd %f26,%f22,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f36,%f16 + + faddd %f26,%f40,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f6,%f9,%f6 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case1: + st %f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f8,%f44,%f2 + + sub %l0,%o7,%l0 + sub %l1,%o7,%l1 + fand %f18,%f44,%f12 + fmuld %f20,%f20,%f22 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmuld %f22,%f52,%f24 + mov %o2,%o5 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + faddd %f24,%f50,%f24 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f24,%f48,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f24,%f46,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%f0 + + fmuld %f6,%f34,%f6 + lda [%i1+4]%asi,%f1 + + fmuld %f16,%f38,%f16 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f22,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f2,%f6 + + faddd %f16,%f12,%f16 + + faddd %f20,%f24,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f36,%f16 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f26,%f29,%f26 + addcc %i0,-1,%i0 + + fors %f6,%f9,%f6 + bg,pt %icc,.loop0 + +! delay slot + fors %f16,%f19,%f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case2: + st %f26,[%o5] + cmp %l2,%l5 + fpadd32s %f20,%f31,%f28 + bl,pn %icc,.case3 + +! delay slot + st %f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f8,%f44,%f2 + + sub %l0,%o7,%l0 + sub %l2,%o7,%l2 + fand %f28,%f44,%f22 + fmuld %f10,%f10,%f12 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmuld %f12,%f52,%f14 + mov %o1,%o4 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + faddd %f14,%f50,%f14 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + fmuld %f12,%f14,%f14 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + faddd %f14,%f48,%f14 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f12,%f14,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + faddd %f14,%f46,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f24,%f40,%f24 + lda [%i1]%asi,%f0 + + fmuld %f6,%f34,%f6 + lda [%i1+4]%asi,%f1 + + fmuld %f26,%f42,%f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f12,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f24,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f2,%f6 + + faddd %f26,%f22,%f26 + + faddd %f10,%f14,%f16 + + faddd %f6,%f32,%f6 + + faddd %f26,%f40,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f16,%f19,%f16 + addcc %i0,-1,%i0 + + fors %f6,%f9,%f6 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case3: + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f8,%f44,%f2 + fmuld %f10,%f10,%f12 + + sub %l0,%o7,%l0 + fmuld %f20,%f20,%f22 + + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + mov %o0,%o3 + + fmuld %f12,%f52,%f14 + mov %o1,%o4 + + fmuld %f22,%f52,%f24 + mov %o2,%o5 + + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + + faddd %f14,%f50,%f14 + + faddd %f24,%f50,%f24 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + + faddd %f14,%f48,%f14 + + faddd %f24,%f48,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f14,%f46,%f14 + + faddd %f24,%f46,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f2 + + fmuld %f4,%f32,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f12,%f14,%f14 + lda [%i1]%asi,%f0 + + fmuld %f6,%f34,%f6 + lda [%i1+4]%asi,%f1 + + fmuld %f22,%f24,%f24 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + fmuld %f20,%f24,%f24 + + faddd %f10,%f14,%f16 + + faddd %f6,%f2,%f6 + + faddd %f20,%f24,%f26 + + fors %f16,%f19,%f16 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + faddd %f6,%f32,%f6 + addcc %i0,-1,%i0 + + fors %f26,%f29,%f26 + bg,pt %icc,.loop0 + +! delay slot + fors %f6,%f9,%f6 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case4: + st %f17,[%o4+4] + cmp %l1,%l5 + fpadd32s %f10,%f31,%f18 + bl,pn %icc,.case6 + +! delay slot + st %f26,[%o5] + cmp %l2,%l5 + fpadd32s %f20,%f31,%f28 + bl,pn %icc,.case5 + +! delay slot + st %f27,[%o5+4] + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f18,%f44,%f12 + + sub %l1,%o7,%l1 + sub %l2,%o7,%l2 + fand %f28,%f44,%f22 + fmuld %f0,%f0,%f2 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd %f0,%f6 + fmuld %f2,%f52,%f4 + mov %o0,%o3 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + faddd %f4,%f50,%f4 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + fmuld %f2,%f4,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + faddd %f4,%f48,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + faddd %f4,%f46,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f24,%f40,%f24 + lda [%i1]%asi,%f0 + + fmuld %f16,%f38,%f16 + lda [%i1+4]%asi,%f1 + + fmuld %f26,%f42,%f26 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f2,%f4,%f4 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + fmuld %f6,%f4,%f4 + + faddd %f16,%f12,%f16 + + faddd %f26,%f22,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f36,%f16 + + faddd %f26,%f40,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f6,%f9,%f6 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case5: + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f18,%f44,%f12 + fmuld %f0,%f0,%f2 + + sub %l1,%o7,%l1 + fmuld %f20,%f20,%f22 + + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + mov %o1,%o4 + + fmovd %f0,%f6 + fmuld %f2,%f52,%f4 + mov %o0,%o3 + + fmuld %f22,%f52,%f24 + mov %o2,%o5 + + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + + faddd %f4,%f50,%f4 + + faddd %f24,%f50,%f24 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f36 + + fmuld %f2,%f4,%f4 + + fmuld %f22,%f24,%f24 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + + faddd %f4,%f48,%f4 + + faddd %f24,%f48,%f24 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f2,%f4,%f4 + + fmuld %f22,%f24,%f24 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f4,%f46,%f4 + + faddd %f24,%f46,%f24 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f12 + + fmuld %f14,%f36,%f14 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f2,%f4,%f4 + lda [%i1]%asi,%f0 + + fmuld %f16,%f38,%f16 + lda [%i1+4]%asi,%f1 + + fmuld %f22,%f24,%f24 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f6,%f4,%f4 + + faddd %f16,%f14,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f12,%f16 + + faddd %f20,%f24,%f26 + + fors %f6,%f9,%f6 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + faddd %f16,%f36,%f16 + addcc %i0,-1,%i0 + + fors %f26,%f29,%f26 + bg,pt %icc,.loop0 + +! delay slot + fors %f16,%f19,%f16 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case6: + st %f27,[%o5+4] + cmp %l2,%l5 + fpadd32s %f20,%f31,%f28 + bl,pn %icc,.case7 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fand %f28,%f44,%f22 + fmuld %f0,%f0,%f2 + + sub %l2,%o7,%l2 + fmuld %f10,%f10,%f12 + + fsubd %f20,%f22,%f20 + srl %l2,10,%l2 + mov %o2,%o5 + + fmovd %f0,%f6 + fmuld %f2,%f52,%f4 + mov %o0,%o3 + + fmuld %f12,%f52,%f14 + mov %o1,%o4 + + fmuld %f20,%f20,%f22 + andn %l2,0x1f,%l2 + + faddd %f4,%f50,%f4 + + faddd %f14,%f50,%f14 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f40 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + ldd [%g1+%l2],%f42 + + faddd %f4,%f48,%f4 + + faddd %f14,%f48,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + faddd %f4,%f46,%f4 + + faddd %f14,%f46,%f14 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f22 + + fmuld %f24,%f40,%f24 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f2,%f4,%f4 + lda [%i1]%asi,%f0 + + fmuld %f26,%f42,%f26 + lda [%i1+4]%asi,%f1 + + fmuld %f12,%f14,%f14 + add %i1,%i2,%i1 ! x += stridex + + fmuld %f6,%f4,%f4 + + faddd %f26,%f24,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f22,%f26 + + faddd %f10,%f14,%f16 + + fors %f6,%f9,%f6 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + faddd %f26,%f40,%f26 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + .align 32 +.case7: + fmuld %f0,%f0,%f2 + fmovd %f0,%f6 + mov %o0,%o3 + + fmuld %f10,%f10,%f12 + mov %o1,%o4 + + fmuld %f20,%f20,%f22 + mov %o2,%o5 + + fmuld %f2,%f52,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fmuld %f12,%f52,%f14 + lda [%i1]%asi,%f0 + + fmuld %f22,%f52,%f24 + lda [%i1+4]%asi,%f1 + + faddd %f4,%f50,%f4 + add %i1,%i2,%i1 ! x += stridex + + faddd %f14,%f50,%f14 + + faddd %f24,%f50,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f4,%f48,%f4 + + faddd %f14,%f48,%f14 + + faddd %f24,%f48,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + faddd %f4,%f46,%f4 + + faddd %f14,%f46,%f14 + + faddd %f24,%f46,%f24 + + fmuld %f2,%f4,%f4 + + fmuld %f12,%f14,%f14 + + fmuld %f22,%f24,%f24 + + fmuld %f6,%f4,%f4 + + fmuld %f10,%f14,%f14 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f10,%f14,%f16 + + faddd %f20,%f24,%f26 + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + + fors %f6,%f9,%f6 + addcc %i0,-1,%i0 + + fors %f16,%f19,%f16 + bg,pt %icc,.loop0 + +! delay slot + fors %f26,%f29,%f26 + + ba,pt %icc,.endloop0 +! delay slot + nop + + + .align 32 +.endloop2: + cmp %l1,%l5 + bl,pn %icc,1f +! delay slot + fabsd %f10,%f10 + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f10,%f31,%f18 + add %l3,8,%g1 + fand %f18,%f44,%f12 + sub %l1,%o7,%l1 + fsubd %f10,%f12,%f10 + srl %l1,10,%l1 + fmuld %f10,%f10,%f12 + andn %l1,0x1f,%l1 + fmuld %f12,%f58,%f20 + ldd [%l3+%l1],%f36 + faddd %f20,%f56,%f20 + fmuld %f12,%f62,%f14 + ldd [%g1+%l1],%f38 + fmuld %f12,%f20,%f20 + faddd %f14,%f60,%f14 + faddd %f20,%f54,%f20 + fmuld %f12,%f14,%f14 + fmuld %f10,%f20,%f20 + ldd [%l4+%l1],%f12 + fmuld %f14,%f36,%f14 + fmuld %f20,%f38,%f20 + faddd %f20,%f14,%f20 + faddd %f20,%f12,%f20 + ba,pt %icc,2f +! delay slot + faddd %f20,%f36,%f20 +1: + fmuld %f10,%f10,%f12 + fmuld %f12,%f52,%f14 + faddd %f14,%f50,%f14 + fmuld %f12,%f14,%f14 + faddd %f14,%f48,%f14 + fmuld %f12,%f14,%f14 + faddd %f14,%f46,%f14 + fmuld %f12,%f14,%f14 + fmuld %f10,%f14,%f14 + faddd %f10,%f14,%f20 +2: + fors %f20,%f19,%f20 + st %f20,[%o1] + st %f21,[%o1+4] + +.endloop1: + cmp %l0,%l5 + bl,pn %icc,1f +! delay slot + fabsd %f0,%f0 + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f0,%f31,%f8 + add %l3,8,%g1 + fand %f8,%f44,%f2 + sub %l0,%o7,%l0 + fsubd %f0,%f2,%f0 + srl %l0,10,%l0 + fmuld %f0,%f0,%f2 + andn %l0,0x1f,%l0 + fmuld %f2,%f58,%f20 + ldd [%l3+%l0],%f32 + faddd %f20,%f56,%f20 + fmuld %f2,%f62,%f4 + ldd [%g1+%l0],%f34 + fmuld %f2,%f20,%f20 + faddd %f4,%f60,%f4 + faddd %f20,%f54,%f20 + fmuld %f2,%f4,%f4 + fmuld %f0,%f20,%f20 + ldd [%l4+%l0],%f2 + fmuld %f4,%f32,%f4 + fmuld %f20,%f34,%f20 + faddd %f20,%f4,%f20 + faddd %f20,%f2,%f20 + ba,pt %icc,2f +! delay slot + faddd %f20,%f32,%f20 +1: + fmuld %f0,%f0,%f2 + fmuld %f2,%f52,%f4 + faddd %f4,%f50,%f4 + fmuld %f2,%f4,%f4 + faddd %f4,%f48,%f4 + fmuld %f2,%f4,%f4 + faddd %f4,%f46,%f4 + fmuld %f2,%f4,%f4 + fmuld %f0,%f4,%f4 + faddd %f0,%f4,%f20 +2: + fors %f20,%f9,%f20 + st %f20,[%o0] + st %f21,[%o0+4] + +.endloop0: + st %f6,[%o3] + st %f7,[%o3+4] + st %f16,[%o4] + st %f17,[%o4+4] + st %f26,[%o5] + st %f27,[%o5+4] + +! return. finished off with only primary range arguments. + + ret + restore + + + .align 32 +.range0: + cmp %l0,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch if x is not tiny +! delay slot, annulled if branch not taken + mov 0x1,LIM_l6 ! set "processing loop0" + st %f0,[%o0] ! *y = *x with inexact if x nonzero + st %f1,[%o0+4] + fdtoi %f0,%f2 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovd %f10,%f0 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range1: + cmp %l1,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch if x is not tiny +! delay slot, annulled if branch not taken + mov 0x2,LIM_l6 ! set "processing loop1" + st %f10,[%o1] ! *y = *x with inexact if x nonzero + st %f11,[%o1+4] + fdtoi %f10,%f12 + addcc %i0,-1,%i0 + ble,pn %icc,.endloop1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovd %f20,%f10 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.range2: + cmp %l2,LIM_l6 + bg,a,pt %icc,.MEDIUM ! branch if x is not tiny +! delay slot, annulled if branch not taken + mov 0x3,LIM_l6 ! set "processing loop2" + st %f20,[%o2] ! *y = *x with inexact if x nonzero + st %f21,[%o2+4] + fdtoi %f20,%f22 +1: + addcc %i0,-1,%i0 + ble,pn %icc,.endloop2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.MEDIUM: + +! ========== medium range ========== + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 __vlibm_TBL_sincos_hi +! l4 __vlibm_TBL_sincos_lo +! l5 constants +! l6 in transition from pri-range and here, use for biguns +! l7 0x413921fb + +! the following are 64-bit registers in both V8+ and V9 + +! g1 scratch +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 n0 +! o4 n1 +! o5 n2 +! o7 scratch + +! f0 x0 +! f2 n0,y0 +! f4 +! f6 +! f8 scratch for table base +! f9 signbit0 +! f10 x1 +! f12 n1,y1 +! f14 +! f16 +! f18 scratch for table base +! f19 signbit1 +! f20 x2 +! f22 n2,y2 +! f24 +! f26 +! f28 scratch for table base +! f29 signbit2 +! f30 0x80000000 +! f31 0x4000 +! f32 +! f34 +! f36 +! f38 +! f40 invpio2 +! f42 round +! f44 0xffff800000000000 +! f46 pio2_1 +! f48 pio2_2 +! f50 pio2_3 +! f52 pio2_3t +! f54 one +! f56 pp1 +! f58 pp2 +! f60 qq1 +! f62 qq2 + + PIC_SET(g5,constants,l5) + + ! %o3,%o4,%o5 need to be stored + st %f6,[%o3] + sethi %hi(0x413921fb),%l7 + st %f7,[%o3+4] + or %l7,%lo(0x413921fb),%l7 + st %f16,[%o4] + st %f17,[%o4+4] + st %f26,[%o5] + st %f27,[%o5+4] + ldd [%l5+invpio2],%f40 + ldd [%l5+round],%f42 + ldd [%l5+pio2_1],%f46 + ldd [%l5+pio2_2],%f48 + ldd [%l5+pio2_3],%f50 + ldd [%l5+pio2_3t],%f52 + std %f54,[%fp+x0_1+8] ! set up stack data + std %f54,[%fp+x1_1+8] + std %f54,[%fp+x2_1+8] + stx %g0,[%fp+y0_0+8] + stx %g0,[%fp+y1_0+8] + stx %g0,[%fp+y2_0+8] + +! branched here in the middle of the array. Need to adjust +! for the members of the triple that were selected in the primary +! loop. + +! no adjustment since all three selected here + subcc LIM_l6,0x1,%g0 ! continue in LOOP0? + bz,a %icc,.LOOP0 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st triple since 2d and 3d done here + subcc LIM_l6,0x2,%g0 ! continue in LOOP1? + fors %f0,%f9,%f0 ! restore sign bit + fmuld %f0,%f40,%f2 ! adj LOOP0 + bz,a %icc,.LOOP1 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + +! ajust 1st and 2d triple since 3d done here + subcc LIM_l6,0x3,%g0 ! continue in LOOP2? + !done fmuld %f0,%f40,%f2 ! adj LOOP0 + sub %i3,%i4,%i3 ! adjust to not double increment + fors %f10,%f19,%f10 ! restore sign bit + fmuld %f10,%f40,%f12 ! adj LOOP1 + faddd %f2,%f42,%f2 ! adj LOOP1 + bz,a %icc,.LOOP2 + mov 0x0,LIM_l6 ! delay slot set biguns=0 + + .align 32 +.LOOP0: + lda [%i1]%asi,%l1 ! preload next argument + mov %i3,%o0 ! py0 = y + lda [%i1]%asi,%f10 + cmp %l0,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG0 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP1 + +! delay slot + andn %l1,%i5,%l1 + nop + fmuld %f0,%f40,%f2 + fabsd %f54,%f54 ! a nop for alignment only + +.LOOP1: + lda [%i1]%asi,%l2 ! preload next argument + mov %i3,%o1 ! py1 = y + + lda [%i1]%asi,%f20 + cmp %l1,%l7 + add %i3,%i4,%i3 ! y += stridey + bg,pn %icc,.BIG1 ! if hx > 0x413921fb + +! delay slot + lda [%i1+4]%asi,%f21 + addcc %i0,-1,%i0 + add %i1,%i2,%i1 ! x += stridex + ble,pn %icc,.ENDLOOP2 + +! delay slot + andn %l2,%i5,%l2 + nop + fmuld %f10,%f40,%f12 + faddd %f2,%f42,%f2 + +.LOOP2: + st %f3,[%fp+n0] + mov %i3,%o2 ! py2 = y + + cmp %l2,%l7 + add %i3,%i4,%i3 ! y += stridey + fmuld %f20,%f40,%f22 + bg,pn %icc,.BIG2 ! if hx > 0x413921fb + +! delay slot + add %l5,thresh+4,%o7 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + +! - + + add %l5,thresh,%g1 + faddd %f22,%f42,%f22 + st %f23,[%fp+n2] + + fsubd %f2,%f42,%f2 ! n + + fsubd %f12,%f42,%f12 ! n + + fsubd %f22,%f42,%f22 ! n + + fmuld %f2,%f46,%f4 + + fmuld %f12,%f46,%f14 + + fmuld %f22,%f46,%f24 + + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + + fsubd %f20,%f24,%f24 + fmuld %f22,%f48,%f26 + + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 + + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 + + fsubd %f24,%f26,%f20 + ld [%fp+n2],%o5 + + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + + fsubd %f24,%f20,%f36 + and %o5,1,%o5 + + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + + fsubd %f36,%f26,%f36 + fmuld %f22,%f50,%f28 + sll %o5,3,%o5 + + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + + fsubd %f28,%f36,%f28 + ld [%g1+%o5],%f26 + + fsubd %f0,%f8,%f4 + + fsubd %f10,%f18,%f14 + + fsubd %f20,%f28,%f24 + + fsubd %f0,%f4,%f32 + + fsubd %f10,%f14,%f34 + + fsubd %f20,%f24,%f36 + + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + + fsubd %f36,%f28,%f36 + fmuld %f22,%f52,%f22 + + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + + fsubd %f22,%f36,%f22 + ld [%o7+%o5],%f28 + + fsubd %f4,%f2,%f0 ! x + + fsubd %f14,%f12,%f10 ! x + + fsubd %f24,%f22,%f20 ! x + + fsubd %f4,%f0,%f4 + + fsubd %f14,%f10,%f14 + + fsubd %f24,%f20,%f24 + + fands %f0,%f30,%f9 ! save signbit + + fands %f10,%f30,%f19 ! save signbit + + fands %f20,%f30,%f29 ! save signbit + + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + + fabsd %f20,%f20 + std %f20,[%fp+x2_1] + + fsubd %f4,%f2,%f2 ! y + + fsubd %f14,%f12,%f12 ! y + + fsubd %f24,%f22,%f22 ! y + + fcmpgt32 %f6,%f0,%l0 + + fcmpgt32 %f16,%f10,%l1 + + fcmpgt32 %f26,%f20,%l2 + +! -- 16 byte aligned + fxors %f2,%f9,%f2 + + fxors %f12,%f19,%f12 + + fxors %f22,%f29,%f22 + + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,.CASE4 + +! delay slot + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,.CASE2 + +! delay slot + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + andcc %l2,2,%g0 + bne,pn %icc,.CASE1 + +! delay slot + fpadd32s %f0,%f31,%f8 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fpadd32s %f10,%f31,%f18 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + + fmuld %f0,%f6,%f6 + ldd [%g1+%l0],%f2 + + fmuld %f10,%f16,%f16 + ldd [%g1+%l1],%f12 + + fmuld %f20,%f26,%f26 + ldd [%g1+%l2],%f22 + + fmuld %f4,%f32,%f4 + ldd [%l4+%l0],%f0 + + fmuld %f14,%f34,%f14 + ldd [%l4+%l1],%f10 + + fmuld %f24,%f36,%f24 + ldd [%l4+%l2],%f20 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + + faddd %f26,%f36,%f26 + +.FIXSIGN: + ld [%fp+n0],%o3 + add %l5,thresh-4,%g1 + + ld [%fp+n1],%o4 + + ld [%fp+n2],%o5 + and %o3,2,%o3 + + sll %o3,2,%o3 + and %o4,2,%o4 + lda [%i1]%asi,%l0 ! preload next argument + + sll %o4,2,%o4 + and %o5,2,%o5 + ld [%g1+%o3],%f8 + + sll %o5,2,%o5 + ld [%g1+%o4],%f18 + + ld [%g1+%o5],%f28 + fxors %f9,%f8,%f9 + + lda [%i1]%asi,%f0 + fxors %f29,%f28,%f29 + + lda [%i1+4]%asi,%f1 + fxors %f19,%f18,%f19 + + fors %f6,%f9,%f6 ! tack on sign + add %i1,%i2,%i1 ! x += stridex + st %f6,[%o0] + + fors %f26,%f29,%f26 ! tack on sign + st %f7,[%o0+4] + + fors %f16,%f19,%f16 ! tack on sign + st %f26,[%o2] + + st %f27,[%o2+4] + addcc %i0,-1,%i0 + + st %f16,[%o1] + andn %l0,%i5,%l0 ! hx &= ~0x80000000 + bg,pt %icc,.LOOP0 + +! delay slot + st %f17,[%o1+4] + + ba,pt %icc,.ENDLOOP0 +! delay slot + nop + + .align 32 +.CASE1: + fpadd32s %f10,%f31,%f18 + sethi %hi(0x3fc3c000),%o7 + ld [%fp+x0_1],%l0 + + fand %f8,%f44,%f4 + add %l3,8,%g1 + ld [%fp+x1_1],%l1 + + fand %f18,%f44,%f14 + sub %l0,%o7,%l0 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + sub %l1,%o7,%l1 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f4,%f32,%f4 + std %f22,[%fp+y2_0] + + fmuld %f14,%f34,%f14 + + fmuld %f6,%f2,%f6 + + fmuld %f16,%f12,%f16 + + fmuld %f20,%f24,%f24 + + faddd %f6,%f4,%f6 + + faddd %f16,%f14,%f16 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f0,%f6 + + faddd %f16,%f10,%f16 + + faddd %f24,%f22,%f24 + + faddd %f6,%f32,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + .align 32 +.CASE2: + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + andcc %l2,2,%g0 + bne,pn %icc,.CASE3 + +! delay slot + sethi %hi(0x3fc3c000),%o7 + fpadd32s %f20,%f31,%f28 + ld [%fp+x2_1],%l2 + + fand %f8,%f44,%f4 + sub %l0,%o7,%l0 + add %l3,8,%g1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f4,%f32,%f4 + std %f12,[%fp+y1_0] + + fmuld %f24,%f36,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f26,%f22,%f26 + + fmuld %f10,%f14,%f14 + + faddd %f6,%f4,%f6 + + faddd %f26,%f24,%f26 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f6,%f0,%f6 + + faddd %f26,%f20,%f26 + + faddd %f14,%f12,%f14 + + faddd %f6,%f32,%f6 + + faddd %f26,%f36,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f34,%f14,%f16 + + .align 32 +.CASE3: + fand %f8,%f44,%f4 + add %l3,8,%g1 + sub %l0,%o7,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fsubd %f0,%f4,%f0 + srl %l0,10,%l0 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f14,%f16,%f14 + + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + + faddd %f24,%f26,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + + fmuld %f4,%f32,%f4 + + fmuld %f20,%f24,%f24 + + fmuld %f6,%f2,%f6 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f6,%f0,%f6 + + faddd %f34,%f14,%f16 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f6,%f32,%f6 + + .align 32 +.CASE4: + fands %f29,%f28,%f29 ! if (n & 1) clear sign bit + sethi %hi(0x3fc3c000),%o7 + andcc %l1,2,%g0 + bne,pn %icc,.CASE6 + +! delay slot + andcc %l2,2,%g0 + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + bne,pn %icc,.CASE5 + +! delay slot + add %l3,8,%g1 + ld [%fp+x2_1],%l2 + fpadd32s %f20,%f31,%f28 + + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fand %f28,%f44,%f24 + sub %l2,%o7,%l2 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f0,%f4,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f4,%f6,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f14,%f34,%f14 + std %f2,[%fp+y0_0] + + fmuld %f24,%f36,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f16,%f12,%f16 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + faddd %f16,%f14,%f16 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f16,%f10,%f16 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f16,%f34,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE5: + fand %f18,%f44,%f14 + sub %l1,%o7,%l1 + + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fsubd %f10,%f14,%f10 + srl %l1,10,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + + fmuld %f14,%f34,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f16,%f12,%f16 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f16,%f14,%f16 + + faddd %f4,%f2,%f4 + + faddd %f24,%f22,%f24 + + faddd %f16,%f10,%f16 + + faddd %f32,%f4,%f6 + + faddd %f36,%f24,%f26 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f16,%f34,%f16 + + .align 32 +.CASE6: + ld [%fp+x2_1],%l2 + add %l3,8,%g1 + bne,pn %icc,.CASE7 +! delay slot + fpadd32s %f20,%f31,%f28 + + fand %f28,%f44,%f24 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f0,%f0,%f0 + sub %l2,%o7,%l2 + + fsubd %f20,%f24,%f20 + srl %l2,10,%l2 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + faddd %f20,%f22,%f20 + andn %l2,0x1f,%l2 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + fmuld %f20,%f20,%f22 + add %l2,%o5,%l2 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f22,%f58,%f26 + ldd [%l3+%l2],%f36 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f26,%f56,%f26 + fmuld %f22,%f62,%f24 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f22,%f26,%f26 + faddd %f24,%f60,%f24 + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + faddd %f4,%f6,%f4 + + faddd %f26,%f54,%f26 + fmuld %f22,%f24,%f24 + ldd [%g1+%l2],%f22 + + faddd %f14,%f16,%f14 + + fmuld %f0,%f4,%f4 + + fmuld %f20,%f26,%f26 + ldd [%l4+%l2],%f20 + + fmuld %f24,%f36,%f24 + + fmuld %f10,%f14,%f14 + + fmuld %f26,%f22,%f26 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + faddd %f26,%f24,%f26 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f26,%f20,%f26 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f26,%f36,%f26 + + .align 32 +.CASE7: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + + fmuld %f20,%f20,%f20 + ldd [%l5+%o5],%f36 + add %l5,%o5,%l2 + + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + + fmuld %f20,%f36,%f24 + ldd [%l2+0x10],%f26 + add %fp,%o5,%o5 + + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + + faddd %f24,%f26,%f24 + ldd [%l2+0x20],%f36 + + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + + fmuld %f20,%f24,%f24 + ldd [%l2+0x30],%f26 + + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + + faddd %f24,%f36,%f24 + ldd [%o5+x2_1],%f36 + + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + + fmuld %f20,%f24,%f24 + std %f22,[%fp+y2_0] + + faddd %f4,%f6,%f4 + + faddd %f14,%f16,%f14 + + faddd %f24,%f26,%f24 + + fmuld %f0,%f4,%f4 + + fmuld %f10,%f14,%f14 + + fmuld %f20,%f24,%f24 + + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + + fmuld %f36,%f24,%f24 + ldd [%o5+y2_0],%f22 + + faddd %f4,%f2,%f4 + + faddd %f14,%f12,%f14 + + faddd %f24,%f22,%f24 + + faddd %f32,%f4,%f6 + + faddd %f34,%f14,%f16 + ba,pt %icc,.FIXSIGN + +! delay slot + faddd %f36,%f24,%f26 + + + .align 32 +.ENDLOOP2: + fmuld %f10,%f40,%f12 + add %l5,thresh,%g1 + faddd %f12,%f42,%f12 + st %f13,[%fp+n1] + fsubd %f12,%f42,%f12 ! n + fmuld %f12,%f46,%f14 + fsubd %f10,%f14,%f14 + fmuld %f12,%f48,%f16 + fsubd %f14,%f16,%f10 + ld [%fp+n1],%o4 + fsubd %f14,%f10,%f34 + and %o4,1,%o4 + fsubd %f34,%f16,%f34 + fmuld %f12,%f50,%f18 + sll %o4,3,%o4 + fsubd %f18,%f34,%f18 + ld [%g1+%o4],%f16 + fsubd %f10,%f18,%f14 + fsubd %f10,%f14,%f34 + add %l5,thresh+4,%o7 + fsubd %f34,%f18,%f34 + fmuld %f12,%f52,%f12 + fsubd %f12,%f34,%f12 + ld [%o7+%o4],%f18 + fsubd %f14,%f12,%f10 ! x + fsubd %f14,%f10,%f14 + fands %f10,%f30,%f19 ! save signbit + fabsd %f10,%f10 + std %f10,[%fp+x1_1] + fsubd %f14,%f12,%f12 ! y + fcmpgt32 %f16,%f10,%l1 + fxors %f12,%f19,%f12 + fands %f19,%f18,%f19 ! if (n & 1) clear sign bit + andcc %l1,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f10,%f31,%f18 + ld [%fp+x1_1],%l1 + fand %f18,%f44,%f14 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f10,%f14,%f10 + sub %l1,%o7,%l1 + srl %l1,10,%l1 + faddd %f10,%f12,%f10 + andn %l1,0x1f,%l1 + fmuld %f10,%f10,%f12 + add %l1,%o4,%l1 + fmuld %f12,%f58,%f16 + ldd [%l3+%l1],%f34 + faddd %f16,%f56,%f16 + fmuld %f12,%f62,%f14 + fmuld %f12,%f16,%f16 + faddd %f14,%f60,%f14 + faddd %f16,%f54,%f16 + fmuld %f12,%f14,%f14 + ldd [%g1+%l1],%f12 + fmuld %f10,%f16,%f16 + ldd [%l4+%l1],%f10 + fmuld %f14,%f34,%f14 + fmuld %f16,%f12,%f16 + faddd %f16,%f14,%f16 + faddd %f16,%f10,%f16 + ba,pt %icc,2f + faddd %f16,%f34,%f16 +1: + fmuld %f10,%f10,%f10 + ldd [%l5+%o4],%f34 + add %l5,%o4,%l1 + fmuld %f10,%f34,%f14 + ldd [%l1+0x10],%f16 + add %fp,%o4,%o4 + faddd %f14,%f16,%f14 + ldd [%l1+0x20],%f34 + fmuld %f10,%f14,%f14 + ldd [%l1+0x30],%f16 + faddd %f14,%f34,%f14 + ldd [%o4+x1_1],%f34 + fmuld %f10,%f14,%f14 + std %f12,[%fp+y1_0] + faddd %f14,%f16,%f14 + fmuld %f10,%f14,%f14 + fmuld %f34,%f14,%f14 + ldd [%o4+y1_0],%f12 + faddd %f14,%f12,%f14 + faddd %f34,%f14,%f16 +2: + add %l5,thresh-4,%g1 + ld [%fp+n1],%o4 + and %o4,2,%o4 + sll %o4,2,%o4 + ld [%g1+%o4],%f18 + fxors %f19,%f18,%f19 + fors %f16,%f19,%f16 ! tack on sign + st %f16,[%o1] + st %f17,[%o1+4] + +.ENDLOOP1: + fmuld %f0,%f40,%f2 + add %l5,thresh,%g1 + faddd %f2,%f42,%f2 + st %f3,[%fp+n0] + fsubd %f2,%f42,%f2 ! n + fmuld %f2,%f46,%f4 + fsubd %f0,%f4,%f4 + fmuld %f2,%f48,%f6 + fsubd %f4,%f6,%f0 + ld [%fp+n0],%o3 + fsubd %f4,%f0,%f32 + and %o3,1,%o3 + fsubd %f32,%f6,%f32 + fmuld %f2,%f50,%f8 + sll %o3,3,%o3 + fsubd %f8,%f32,%f8 + ld [%g1+%o3],%f6 + fsubd %f0,%f8,%f4 + fsubd %f0,%f4,%f32 + add %l5,thresh+4,%o7 + fsubd %f32,%f8,%f32 + fmuld %f2,%f52,%f2 + fsubd %f2,%f32,%f2 + ld [%o7+%o3],%f8 + fsubd %f4,%f2,%f0 ! x + fsubd %f4,%f0,%f4 + fands %f0,%f30,%f9 ! save signbit + fabsd %f0,%f0 + std %f0,[%fp+x0_1] + fsubd %f4,%f2,%f2 ! y + fcmpgt32 %f6,%f0,%l0 + fxors %f2,%f9,%f2 + fands %f9,%f8,%f9 ! if (n & 1) clear sign bit + andcc %l0,2,%g0 + bne,pn %icc,1f +! delay slot + nop + fpadd32s %f0,%f31,%f8 + ld [%fp+x0_1],%l0 + fand %f8,%f44,%f4 + sethi %hi(0x3fc3c000),%o7 + add %l3,8,%g1 + fsubd %f0,%f4,%f0 + sub %l0,%o7,%l0 + srl %l0,10,%l0 + faddd %f0,%f2,%f0 + andn %l0,0x1f,%l0 + fmuld %f0,%f0,%f2 + add %l0,%o3,%l0 + fmuld %f2,%f58,%f6 + ldd [%l3+%l0],%f32 + faddd %f6,%f56,%f6 + fmuld %f2,%f62,%f4 + fmuld %f2,%f6,%f6 + faddd %f4,%f60,%f4 + faddd %f6,%f54,%f6 + fmuld %f2,%f4,%f4 + ldd [%g1+%l0],%f2 + fmuld %f0,%f6,%f6 + ldd [%l4+%l0],%f0 + fmuld %f4,%f32,%f4 + fmuld %f6,%f2,%f6 + faddd %f6,%f4,%f6 + faddd %f6,%f0,%f6 + ba,pt %icc,2f + faddd %f6,%f32,%f6 +1: + fmuld %f0,%f0,%f0 + ldd [%l5+%o3],%f32 + add %l5,%o3,%l0 + fmuld %f0,%f32,%f4 + ldd [%l0+0x10],%f6 + add %fp,%o3,%o3 + faddd %f4,%f6,%f4 + ldd [%l0+0x20],%f32 + fmuld %f0,%f4,%f4 + ldd [%l0+0x30],%f6 + faddd %f4,%f32,%f4 + ldd [%o3+x0_1],%f32 + fmuld %f0,%f4,%f4 + std %f2,[%fp+y0_0] + faddd %f4,%f6,%f4 + fmuld %f0,%f4,%f4 + fmuld %f32,%f4,%f4 + ldd [%o3+y0_0],%f2 + faddd %f4,%f2,%f4 + faddd %f32,%f4,%f6 +2: + add %l5,thresh-4,%g1 + ld [%fp+n0],%o3 + and %o3,2,%o3 + sll %o3,2,%o3 + ld [%g1+%o3],%f8 + fxors %f9,%f8,%f9 + fors %f6,%f9,%f6 ! tack on sign + st %f6,[%o0] + st %f7,[%o0+4] + +.ENDLOOP0: + +! check for huge arguments remaining + + tst LIM_l6 + be,pt %icc,.exit +! delay slot + nop + +! ========== huge range (use C code) ========== + +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vsin_big + mov %l7,%o5 ! delay slot + +.exit: + ret + restore + + + .align 32 +.SKIP0: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f10,%f0 + ld [%i1+4],%f1 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f20,%f10 + ld [%i1+4],%f11 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.SKIP2: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG0: + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f0,%f0,%f0 ! y = x - x + st %f0,[%o0] + st %f1,[%o0+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP0 +! delay slot, harmless if branch taken + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovd %f10,%f0 + ba,pt %icc,.LOOP0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG1: + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f10,%f10,%f10 ! y = x - x + st %f10,[%o1] + st %f11,[%o1+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP1 +! delay slot, harmless if branch taken + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovd %f20,%f10 + ba,pt %icc,.LOOP1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 32 +.BIG2: + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,1f ! if hx < 0x7ff00000 +! delay slot, annulled if branch not taken + mov %l7,LIM_l6 ! set biguns flag or + fsubd %f20,%f20,%f20 ! y = x - x + st %f20,[%o2] + st %f21,[%o2+4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,.ENDLOOP2 +! delay slot + nop + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f21 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.LOOP2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vsin) + diff --git a/usr/src/libm/src/mvec/vis/__vsin_ultra3.S b/usr/src/libm/src/mvec/vis/__vsin_ultra3.S new file mode 100644 index 0000000..172b2ad --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsin_ultra3.S @@ -0,0 +1,3431 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsin_ultra3.S 1.8 06/01/23 SMI" + + .file "__vsin_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vsin + .type __vsin,#function + __vsin = __vsin_ultra3 +#endif + + RO_DATA + .align 64 +constants: + .word 0x42c80000,0x00000000 ! 3 * 2^44 + .word 0x43380000,0x00000000 ! 3 * 2^51 + .word 0x3fe45f30,0x6dc9c883 ! invpio2 + .word 0x3ff921fb,0x54442c00 ! pio2_1 + .word 0x3d318469,0x898cc400 ! pio2_2 + .word 0x3a71701b,0x839a2520 ! pio2_3 + .word 0xbfc55555,0x55555533 ! pp1 + .word 0x3f811111,0x10e7d53b ! pp2 + .word 0xbf2a0167,0xe6b3cf9b ! pp3 + .word 0xbfdfffff,0xffffff65 ! qq1 + .word 0x3fa55555,0x54f88ed0 ! qq2 + .word 0xbf56c12c,0xdd185f60 ! qq3 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define biguns STACK_BIAS-0x20 +#define nk3 STACK_BIAS-0x24 +#define nk2 STACK_BIAS-0x28 +#define nk1 STACK_BIAS-0x2c +#define nk0 STACK_BIAS-0x30 +#define junk STACK_BIAS-0x38 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 0x80000000 + +! l0 hx0 +! l1 hx1 +! l2 hx2 +! l3 hx3 +! l4 k0 +! l5 k1 +! l6 k2 +! l7 k3 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_sincos2 +! g5 scratch + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 0x3e400000 +! o5 0x3fe921fb,0x4099251e +! o7 scratch + +! f0 hx0 +! f2 +! f4 +! f6 +! f8 hx1 +! f10 +! f12 +! f14 +! f16 hx2 +! f18 +! f20 +! f22 +! f24 hx3 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 +! f38 + +#define c3two44 %f40 +#define c3two51 %f42 +#define invpio2 %f44 +#define pio2_1 %f46 +#define pio2_2 %f48 +#define pio2_3 %f50 +#define pp1 %f52 +#define pp2 %f54 +#define pp3 %f56 +#define qq1 %f58 +#define qq2 %f60 +#define qq3 %f62 + + ENTRY(__vsin_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_sincos2,o1) + mov %o1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + st %g0,[%fp+biguns] ! biguns = 0 + ldd [%o0+0x00],c3two44 ! load/set up constants + ldd [%o0+0x08],c3two51 + ldd [%o0+0x10],invpio2 + ldd [%o0+0x18],pio2_1 + ldd [%o0+0x20],pio2_2 + ldd [%o0+0x28],pio2_3 + ldd [%o0+0x30],pp1 + ldd [%o0+0x38],pp2 + ldd [%o0+0x40],pp3 + ldd [%o0+0x48],qq1 + ldd [%o0+0x50],qq2 + ldd [%o0+0x58],qq3 + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e400000),%o4 + sethi %hi(0x3fe921fb),%o5 + or %o5,%lo(0x3fe921fb),%o5 + sllx %o5,32,%o5 + sethi %hi(0x4099251e),%o7 + or %o7,%lo(0x4099251e),%o7 + or %o5,%o7,%o5 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + add %fp,junk,%o1 ! loop prologue + add %fp,junk,%o2 + add %fp,junk,%o3 + ld [%i1],%l0 ! *x + ld [%i1],%f0 + ld [%i1+4],%f3 + andn %l0,%i5,%l0 ! mask off sign + ba .loop0 + add %i1,%i2,%i1 ! x += stridex + +! 16-byte aligned + .align 16 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%o4,%g5 + sub %o5,%l0,%o7 + fabss %f0,%f2 + + lda [%i1]%asi,%f8 + orcc %o7,%g5,%g0 + mov %i3,%o0 ! py0 = y + bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f11 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%o4,%g5 + sub %o5,%l1,%o7 + fabss %f8,%f10 + + lda [%i1]%asi,%f16 + orcc %o7,%g5,%g0 + mov %i3,%o1 ! py1 = y + bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f19 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] + +.loop2: + lda [%i1]%asi,%l3 ! preload next argument + sub %l2,%o4,%g5 + sub %o5,%l2,%o7 + fabss %f16,%f18 + + lda [%i1]%asi,%f24 + orcc %o7,%g5,%g0 + mov %i3,%o2 ! py2 = y + bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f27 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! y += stridey + ble,pn %icc,.last3 + +! delay slot + andn %l3,%i5,%l3 + add %i1,%i2,%i1 ! x += stridex + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + +.loop3: + sub %l3,%o4,%g5 + sub %o5,%l3,%o7 + fabss %f24,%f26 + st %f5,[%fp+nk0] + + orcc %o7,%g5,%g0 + mov %i3,%o3 ! py3 = y + bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e +! delay slot + st %f13,[%fp+nk1] + +!!! DONE? +.cont: + srlx %o5,32,%o7 + add %i3,%i4,%i3 ! y += stridey + fmovs %f3,%f1 + st %f21,[%fp+nk2] + + sub %o7,%l0,%l0 + sub %o7,%l1,%l1 + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + + sub %o7,%l2,%l2 + sub %o7,%l3,%l3 + fmovs %f11,%f9 + + or %l0,%l1,%l0 + or %l2,%l3,%l2 + fmovs %f19,%f17 + + fmovs %f27,%f25 + fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range + + fmuld %f8,invpio2,%f14 + ld [%fp+nk0],%l4 + + fmuld %f16,invpio2,%f22 + ld [%fp+nk1],%l5 + + orcc %l0,%l2,%g0 + bl,pn %icc,.medium +! delay slot + fmuld %f24,invpio2,%f30 + ld [%fp+nk2],%l6 + + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 + + sll %l5,5,%l5 + ldd [%l4+%g1],%f4 + fcmpd %fcc1,%f8,pio2_3 + + sll %l6,5,%l6 + ldd [%l5+%g1],%f12 + fcmpd %fcc2,%f16,pio2_3 + + sll %l7,5,%l7 + ldd [%l6+%g1],%f20 + fcmpd %fcc3,%f24,pio2_3 + + ldd [%l7+%g1],%f28 + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + + fsubd %f26,%f28,%f26 + + fmuld %f2,%f2,%f0 ! z = x * x + + fmuld %f10,%f10,%f8 + + fmuld %f18,%f18,%f16 + + fmuld %f26,%f26,%f24 + + fmuld %f0,pp3,%f6 + + fmuld %f8,pp3,%f14 + + fmuld %f16,pp3,%f22 + + fmuld %f24,pp3,%f30 + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f8,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f16,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f24,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f2,%f6,%f6 + + fmuld %f10,%f14,%f14 + + fmuld %f18,%f22,%f22 + + fmuld %f26,%f30,%f30 + + faddd %f6,%f2,%f6 + fmuld %f0,%f4,%f4 + ldd [%l4+16],%f2 + + faddd %f14,%f10,%f14 + fmuld %f8,%f12,%f12 + ldd [%l5+16],%f10 + + faddd %f22,%f18,%f22 + fmuld %f16,%f20,%f20 + ldd [%l6+16],%f18 + + faddd %f30,%f26,%f30 + fmuld %f24,%f28,%f28 + ldd [%l7+16],%f26 + + fmuld %f2,%f6,%f6 + + fmuld %f10,%f14,%f14 + + fmuld %f18,%f22,%f22 + + fmuld %f26,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s + st %f6,[%o0] + + fmovdl %fcc1,%f12,%f14 + st %f14,[%o1] + + fmovdl %fcc2,%f20,%f22 + st %f22,[%o2] + + fmovdl %fcc3,%f28,%f30 + st %f30,[%o3] + addcc %i0,-1,%i0 + + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + faddd %f6,c3two51,%f4 + st %f5,[%fp+nk0] + + faddd %f14,c3two51,%f12 + st %f13,[%fp+nk1] + + faddd %f22,c3two51,%f20 + st %f21,[%fp+nk2] + + faddd %f30,c3two51,%f28 + st %f29,[%fp+nk3] + + fsubd %f4,c3two51,%f6 + + fsubd %f12,c3two51,%f14 + + fsubd %f20,c3two51,%f22 + + fsubd %f28,c3two51,%f30 + + fmuld %f6,pio2_1,%f2 + ld [%fp+nk0],%l0 ! n + + fmuld %f14,pio2_1,%f10 + ld [%fp+nk1],%l1 + + fmuld %f22,pio2_1,%f18 + ld [%fp+nk2],%l2 + + fmuld %f30,pio2_1,%f26 + ld [%fp+nk3],%l3 + + fsubd %f0,%f2,%f0 + fmuld %f6,pio2_2,%f4 + + fsubd %f8,%f10,%f8 + fmuld %f14,pio2_2,%f12 + + fsubd %f16,%f18,%f16 + fmuld %f22,pio2_2,%f20 + + fsubd %f24,%f26,%f24 + fmuld %f30,pio2_2,%f28 + + fsubd %f0,%f4,%f32 + + fsubd %f8,%f12,%f34 + + fsubd %f16,%f20,%f36 + + fsubd %f24,%f28,%f38 + + fsubd %f0,%f32,%f0 + fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 + + fsubd %f8,%f34,%f8 + fcmple32 %f34,pio2_3,%l5 + + fsubd %f16,%f36,%f16 + fcmple32 %f36,pio2_3,%l6 + + fsubd %f24,%f38,%f24 + fcmple32 %f38,pio2_3,%l7 + + fsubd %f0,%f4,%f0 + fmuld %f6,pio2_3,%f6 + sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 + + fsubd %f8,%f12,%f8 + fmuld %f14,pio2_3,%f14 + sll %l5,30,%l5 + + fsubd %f16,%f20,%f16 + fmuld %f22,pio2_3,%f22 + sll %l6,30,%l6 + + fsubd %f24,%f28,%f24 + fmuld %f30,pio2_3,%f30 + sll %l7,30,%l7 + + fsubd %f6,%f0,%f6 + sra %l4,31,%l4 + + fsubd %f14,%f8,%f14 + sra %l5,31,%l5 + + fsubd %f22,%f16,%f22 + sra %l6,31,%l6 + + fsubd %f30,%f24,%f30 + sra %l7,31,%l7 + + fsubd %f32,%f6,%f0 ! reduced x + xor %l0,%l4,%l0 + + fsubd %f34,%f14,%f8 + xor %l1,%l5,%l1 + + fsubd %f36,%f22,%f16 + xor %l2,%l6,%l2 + + fsubd %f38,%f30,%f24 + xor %l3,%l7,%l3 + + fabsd %f0,%f2 + sub %l0,%l4,%l0 + + fabsd %f8,%f10 + sub %l1,%l5,%l1 + + fabsd %f16,%f18 + sub %l2,%l6,%l2 + + fabsd %f24,%f26 + sub %l3,%l7,%l3 + + faddd %f2,c3two44,%f4 + st %f5,[%fp+nk0] + and %l4,2,%l4 + + faddd %f10,c3two44,%f12 + st %f13,[%fp+nk1] + and %l5,2,%l5 + + faddd %f18,c3two44,%f20 + st %f21,[%fp+nk2] + and %l6,2,%l6 + + faddd %f26,c3two44,%f28 + st %f29,[%fp+nk3] + and %l7,2,%l7 + + fsubd %f32,%f0,%f4 + xor %l0,%l4,%l0 + + fsubd %f34,%f8,%f12 + xor %l1,%l5,%l1 + + fsubd %f36,%f16,%f20 + xor %l2,%l6,%l2 + + fsubd %f38,%f24,%f28 + xor %l3,%l7,%l3 + + fzero %f38 + ld [%fp+nk0],%l4 + + fsubd %f4,%f6,%f6 ! w + ld [%fp+nk1],%l5 + + fsubd %f12,%f14,%f14 + ld [%fp+nk2],%l6 + + fnegd %f38,%f38 + ld [%fp+nk3],%l7 + sll %l4,5,%l4 ! k + + fsubd %f20,%f22,%f22 + sll %l5,5,%l5 + + fsubd %f28,%f30,%f30 + sll %l6,5,%l6 + + fand %f0,%f38,%f32 ! sign bit of x + ldd [%l4+%g1],%f4 + sll %l7,5,%l7 + + fand %f8,%f38,%f34 + ldd [%l5+%g1],%f12 + + fand %f16,%f38,%f36 + ldd [%l6+%g1],%f20 + + fand %f24,%f38,%f38 + ldd [%l7+%g1],%f28 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f10,%f12,%f10 + + fsubd %f18,%f20,%f18 + nop + + fsubd %f26,%f28,%f26 + nop + +! 16-byte aligned + fmuld %f2,%f2,%f0 ! z = x * x + andcc %l0,1,%g0 + bz,pn %icc,.case8 +! delay slot + fxor %f6,%f32,%f32 + + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case4 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case2 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case1 +! delay slot + fxor %f30,%f38,%f38 + +!.case0: + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case3 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case6 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case5 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case7 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + fmuld %f0,qq3,%f6 ! cos(x0) + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + faddd %f6,qq2,%f6 + fmuld %f0,pp2,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,pp1,%f4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + faddd %f6,qq1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f4,%f4 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,%f32,%f4 + ldd [%l4+16],%f0 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + fmuld %f0,%f6,%f6 + faddd %f4,%f2,%f4 + ldd [%l4+8],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f4,%f4 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + fsubd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case8: + fmuld %f10,%f10,%f8 + andcc %l1,1,%g0 + bz,pn %icc,.case12 +! delay slot + fxor %f14,%f34,%f34 + + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case10 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case9 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case11 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + fmuld %f8,qq3,%f14 ! cos(x1) + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + faddd %f14,qq2,%f14 + fmuld %f8,pp2,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + fmuld %f8,%f14,%f14 + faddd %f12,pp1,%f12 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + faddd %f14,qq1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f12,%f12 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f14,%f14 + faddd %f12,%f34,%f12 + ldd [%l5+16],%f8 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + fmuld %f8,%f14,%f14 + faddd %f12,%f10,%f12 + ldd [%l5+8],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f12,%f12 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + fsubd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f18,%f18,%f16 + andcc %l2,1,%g0 + bz,pn %icc,.case14 +! delay slot + fxor %f22,%f36,%f36 + + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case13 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + fmuld %f16,qq3,%f22 ! cos(x2) + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + faddd %f22,qq2,%f22 + fmuld %f16,pp2,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + fmuld %f16,%f22,%f22 + faddd %f20,pp1,%f20 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + faddd %f22,qq1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f20,%f20 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f22,%f22 + faddd %f20,%f36,%f20 + ldd [%l6+16],%f16 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + fmuld %f16,%f22,%f22 + faddd %f20,%f18,%f20 + ldd [%l6+8],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f20,%f20 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + fsubd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f26,%f26,%f24 + andcc %l3,1,%g0 + bz,pn %icc,.case15 +! delay slot + fxor %f30,%f38,%f38 + + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + fmuld %f24,qq3,%f30 ! cos(x3) + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + faddd %f30,qq2,%f30 + fmuld %f24,pp2,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + fmuld %f24,%f30,%f30 + faddd %f28,pp1,%f28 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + faddd %f30,qq1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f28,%f28 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f30,%f30 + faddd %f28,%f38,%f28 + ldd [%l7+16],%f24 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + fmuld %f24,%f30,%f30 + faddd %f28,%f26,%f28 + ldd [%l7+8],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f28,%f28 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + fsubd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f0,pp3,%f6 ! sin(x0) + + fmuld %f8,pp3,%f14 ! sin(x1) + + fmuld %f16,pp3,%f22 ! sin(x2) + + fmuld %f24,pp3,%f30 ! sin(x3) + + faddd %f6,pp2,%f6 + fmuld %f0,qq2,%f4 + + faddd %f14,pp2,%f14 + fmuld %f8,qq2,%f12 + + faddd %f22,pp2,%f22 + fmuld %f16,qq2,%f20 + + faddd %f30,pp2,%f30 + fmuld %f24,qq2,%f28 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f8,%f14,%f14 + faddd %f12,qq1,%f12 + + fmuld %f16,%f22,%f22 + faddd %f20,qq1,%f20 + + fmuld %f24,%f30,%f30 + faddd %f28,qq1,%f28 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l4,%g1,%l4 + + faddd %f14,pp1,%f14 + fmuld %f8,%f12,%f12 + add %l5,%g1,%l5 + + faddd %f22,pp1,%f22 + fmuld %f16,%f20,%f20 + add %l6,%g1,%l6 + + faddd %f30,pp1,%f30 + fmuld %f24,%f28,%f28 + add %l7,%g1,%l7 + + fmuld %f0,%f6,%f6 + + fmuld %f8,%f14,%f14 + + fmuld %f16,%f22,%f22 + + fmuld %f24,%f30,%f30 + + fmuld %f2,%f6,%f6 + ldd [%l4+8],%f0 + + fmuld %f10,%f14,%f14 + ldd [%l5+8],%f8 + + fmuld %f18,%f22,%f22 + ldd [%l6+8],%f16 + + fmuld %f26,%f30,%f30 + ldd [%l7+8],%f24 + + fmuld %f0,%f4,%f4 + faddd %f32,%f6,%f6 + + fmuld %f8,%f12,%f12 + faddd %f34,%f14,%f14 + + fmuld %f16,%f20,%f20 + faddd %f36,%f22,%f22 + + fmuld %f24,%f28,%f28 + faddd %f38,%f30,%f30 + + faddd %f2,%f6,%f6 + ldd [%l4+16],%f32 + + faddd %f10,%f14,%f14 + ldd [%l5+16],%f34 + + faddd %f18,%f22,%f22 + ldd [%l6+16],%f36 + + faddd %f26,%f30,%f30 + ldd [%l7+16],%f38 + + fmuld %f32,%f6,%f6 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f22,%f22 + + fmuld %f38,%f30,%f30 + + faddd %f6,%f4,%f6 + + faddd %f14,%f12,%f14 + + faddd %f22,%f20,%f22 + + faddd %f30,%f28,%f30 + + faddd %f6,%f0,%f6 + + faddd %f14,%f8,%f14 + + faddd %f22,%f16,%f22 + + faddd %f30,%f24,%f30 + mov %l0,%l4 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f14,%f12 + lda [%i1]%asi,%f0 + + fnegd %f22,%f20 + lda [%i1+4]%asi,%f3 + + fnegd %f30,%f28 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + andcc %l4,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %l1,2,%g0 + fmovdnz %icc,%f12,%f14 + st %f14,[%o1] + + andcc %l2,2,%g0 + fmovdnz %icc,%f20,%f22 + st %f22,[%o2] + + andcc %l3,2,%g0 + fmovdnz %icc,%f28,%f30 + st %f30,[%o3] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.end: + st %f15,[%o1+4] + st %f23,[%o2+4] + st %f31,[%o3+4] + ld [%fp+biguns],%i5 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + nop +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + sra %o4,0,%o4 + call __vlibm_vsin_big_ultra3 + sra %o5,0,%o5 ! delay slot + +.exit: + ret + restore + + + .align 16 +.last1: + faddd %f2,c3two44,%f4 + st %f15,[%o1+4] +.last1_from_range1: + mov 0,%l1 + fzeros %f8 + fzero %f10 + add %fp,junk,%o1 +.last2: + faddd %f10,c3two44,%f12 + st %f23,[%o2+4] +.last2_from_range2: + mov 0,%l2 + fzeros %f16 + fzero %f18 + add %fp,junk,%o2 +.last3: + faddd %f18,c3two44,%f20 + st %f31,[%o3+4] + st %f5,[%fp+nk0] + st %f13,[%fp+nk1] +.last3_from_range3: + mov 0,%l3 + fzeros %f24 + fzero %f26 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + cmp %l0,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f0 + fmuld %f2,%f0,%f2 + st %f2,[%o0] + ba,pt %icc,2f +! delay slot + st %f3,[%o0+4] +1: + fdtoi %f2,%f4 ! raise inexact if not zero + st %f0,[%o0] + st %f3,[%o0+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.end +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f8,%f0 + fmovs %f11,%f3 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range1: + cmp %l1,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f8 + fmuld %f10,%f8,%f10 + st %f10,[%o1] + ba,pt %icc,2f +! delay slot + st %f11,[%o1+4] +1: + fdtoi %f10,%f12 ! raise inexact if not zero + st %f8,[%o1] + st %f11,[%o1+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last1_from_range1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f16,%f8 + fmovs %f19,%f11 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range2: + cmp %l2,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f16 + fmuld %f18,%f16,%f18 + st %f18,[%o2] + ba,pt %icc,2f +! delay slot + st %f19,[%o2+4] +1: + fdtoi %f18,%f20 ! raise inexact if not zero + st %f16,[%o2] + st %f19,[%o2+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last2_from_range2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + andn %l3,%i5,%l2 ! hx &= ~0x80000000 + fmovs %f24,%f16 + fmovs %f27,%f19 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range3: + cmp %l3,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l3,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f24 + fmuld %f26,%f24,%f26 + st %f26,[%o3] + ba,pt %icc,2f +! delay slot + st %f27,[%o3+4] +1: + fdtoi %f26,%f28 ! raise inexact if not zero + st %f24,[%o3] + st %f27,[%o3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last3_from_range3 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! y += stridey + ld [%i1],%l3 + ld [%i1],%f24 + ld [%i1+4],%f27 + andn %l3,%i5,%l3 ! hx &= ~0x80000000 + ba,pt %icc,.loop3 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vsin_ultra3) + diff --git a/usr/src/libm/src/mvec/vis/__vsincos.S b/usr/src/libm/src/mvec/vis/__vsincos.S new file mode 100644 index 0000000..c01b394 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsincos.S @@ -0,0 +1,958 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsincos.S 1.6 06/01/23 SMI" + + .file "__vsincos.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0x42c80000,0x00000000 ! 3 * 2^44 + .word 0x43380000,0x00000000 ! 3 * 2^51 + .word 0x3fe45f30,0x6dc9c883 ! invpio2 + .word 0x3ff921fb,0x54442c00 ! pio2_1 + .word 0x3d318469,0x898cc400 ! pio2_2 + .word 0x3a71701b,0x839a2520 ! pio2_3 + .word 0xbfc55555,0x55555533 ! pp1 + .word 0x3f811111,0x10e7d53b ! pp2 + .word 0xbf2a0167,0xe6b3cf9b ! pp3 + .word 0xbfdfffff,0xffffff65 ! qq1 + .word 0x3fa55555,0x54f88ed0 ! qq2 + .word 0xbf56c12c,0xdd185f60 ! qq3 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ssave STACK_BIAS-0x10 +#define csave STACK_BIAS-0x18 +#define nsave STACK_BIAS-0x1c +#define sxsave STACK_BIAS-0x20 +#define sssave STACK_BIAS-0x24 +#define biguns STACK_BIAS-0x28 +#define junk STACK_BIAS-0x30 +#define nk2 STACK_BIAS-0x38 +#define nk1 STACK_BIAS-0x3c +#define nk0 STACK_BIAS-0x40 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 s +! i4 strides +! i5 0x80000000,n0 + +! l0 hx0,k0 +! l1 hx1,k1 +! l2 hx2,k2 +! l3 c +! l4 pc0 +! l5 pc1 +! l6 pc2 +! l7 stridec + +! the following are 64-bit registers in both V8+ and V9 + +! g1 __vlibm_TBL_sincos2 +! g5 scratch,n1 + +! o0 ps0 +! o1 ps1 +! o2 ps2 +! o3 0x3fe921fb +! o4 0x3e400000 +! o5 0x4099251e +! o7 scratch,n2 + +! f0 x0,z0 +! f2 abs(x0) +! f4 +! f6 +! f8 +! f10 x1,z1 +! f12 abs(x1) +! f14 +! f16 +! f18 +! f20 x2,z2 +! f22 abs(x2) +! f24 +! f26 +! f28 +! f30 +! f32 +! f34 +! f36 +! f38 + +#define c3two44 %f40 +#define c3two51 %f42 +#define invpio2 %f44 +#define pio2_1 %f46 +#define pio2_2 %f48 +#define pio2_3 %f50 +#define pp1 %f52 +#define pp2 %f54 +#define pp3 %f56 +#define qq1 %f58 +#define qq2 %f60 +#define qq3 %f62 + + ENTRY(__vsincos) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + PIC_SET(l7,__vlibm_TBL_sincos2,o1) + mov %o1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ssave] + stx %i5,[%fp+csave] + ldx [%fp+STACK_BIAS+0xb0],%l7 +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ssave] + st %i5,[%fp+csave] + ld [%fp+0x5c],%l7 +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sssave] + mov %i5,%l3 + st %g0,[%fp+biguns] ! biguns = 0 + ldd [%o0+0x00],c3two44 ! load/set up constants + ldd [%o0+0x08],c3two51 + ldd [%o0+0x10],invpio2 + ldd [%o0+0x18],pio2_1 + ldd [%o0+0x20],pio2_2 + ldd [%o0+0x28],pio2_3 + ldd [%o0+0x30],pp1 + ldd [%o0+0x38],pp2 + ldd [%o0+0x40],pp3 + ldd [%o0+0x48],qq1 + ldd [%o0+0x50],qq2 + ldd [%o0+0x58],qq3 + sethi %hi(0x80000000),%i5 + sethi %hi(0x3e400000),%o4 + sethi %hi(0x3fe921fb),%o3 + or %o3,%lo(0x3fe921fb),%o3 + sethi %hi(0x4099251e),%o5 + or %o5,%lo(0x4099251e),%o5 + sll %i2,3,%i2 ! scale strides + sll %i4,3,%i4 + sll %l7,3,%l7 + add %fp,junk,%o0 ! loop prologue + add %fp,junk,%o1 + add %fp,junk,%o2 + ld [%i1],%l0 ! *x + ld [%i1],%f0 + ld [%i1+4],%f3 + andn %l0,%i5,%l0 ! mask off sign + ba .loop0 + add %i1,%i2,%i1 ! x += stridex + +! 16-byte aligned + .align 16 +.loop0: + lda [%i1]%asi,%l1 ! preload next argument + sub %l0,%o4,%g5 + sub %o5,%l0,%o7 + fabss %f0,%f2 + + lda [%i1]%asi,%f10 + orcc %o7,%g5,%g0 + mov %i3,%o0 ! ps0 = s + bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f13 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! s += strides + + mov %l3,%l4 ! pc0 = c + add %l3,%l7,%l3 ! c += stridec + ble,pn %icc,.last1 + +! delay slot + andn %l1,%i5,%l1 + add %i1,%i2,%i1 ! x += stridex + faddd %f2,c3two44,%f4 + st %f17,[%o1+4] + +.loop1: + lda [%i1]%asi,%l2 ! preload next argument + sub %l1,%o4,%g5 + sub %o5,%l1,%o7 + fabss %f10,%f12 + + lda [%i1]%asi,%f20 + orcc %o7,%g5,%g0 + mov %i3,%o1 ! ps1 = s + bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e + +! delay slot + lda [%i1+4]%asi,%f23 + addcc %i0,-1,%i0 + add %i3,%i4,%i3 ! s += strides + + mov %l3,%l5 ! pc1 = c + add %l3,%l7,%l3 ! c += stridec + ble,pn %icc,.last2 + +! delay slot + andn %l2,%i5,%l2 + add %i1,%i2,%i1 ! x += stridex + faddd %f12,c3two44,%f14 + st %f27,[%o2+4] + +.loop2: + sub %l2,%o4,%g5 + sub %o5,%l2,%o7 + fabss %f20,%f22 + st %f5,[%fp+nk0] + + orcc %o7,%g5,%g0 + mov %i3,%o2 ! ps2 = s + bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e +! delay slot + st %f15,[%fp+nk1] + + mov %l3,%l6 ! pc2 = c + +.cont: + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + faddd %f22,c3two44,%f24 + st %f25,[%fp+nk2] + + sub %o3,%l0,%l0 + sub %o3,%l1,%l1 + fmovs %f3,%f1 + + sub %o3,%l2,%l2 + fmovs %f13,%f11 + + or %l0,%l1,%l0 + orcc %l0,%l2,%g0 + fmovs %f23,%f21 + + fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range + + fmuld %f10,invpio2,%f16 + ld [%fp+nk0],%l0 + + fmuld %f20,invpio2,%f26 + ld [%fp+nk1],%l1 + + bl,pn %icc,.medium +! delay slot + ld [%fp+nk2],%l2 + + sll %l0,5,%l0 ! k + fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 + + sll %l1,5,%l1 + ldd [%l0+%g1],%f4 + fcmpd %fcc1,%f10,pio2_3 + + sll %l2,5,%l2 + ldd [%l1+%g1],%f14 + fcmpd %fcc2,%f20,pio2_3 + + ldd [%l2+%g1],%f24 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f12,%f14,%f12 + + fsubd %f22,%f24,%f22 + + fmuld %f2,%f2,%f0 ! z = x * x + + fmuld %f12,%f12,%f10 + + fmuld %f22,%f22,%f20 + + fmuld %f0,pp3,%f6 + + fmuld %f10,pp3,%f16 + + fmuld %f20,pp3,%f26 + + faddd %f6,pp2,%f6 + fmuld %f0,qq3,%f4 + + faddd %f16,pp2,%f16 + fmuld %f10,qq3,%f14 + + faddd %f26,pp2,%f26 + fmuld %f20,qq3,%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,qq2,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq2,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq2,%f24 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l0,%g1,%l0 + + faddd %f16,pp1,%f16 + fmuld %f10,%f14,%f14 + add %l1,%g1,%l1 + + faddd %f26,pp1,%f26 + fmuld %f20,%f24,%f24 + add %l2,%g1,%l2 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq1,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq1,%f24 + + fmuld %f2,%f6,%f6 + ldd [%l0+8],%f8 + + fmuld %f12,%f16,%f16 + ldd [%l1+8],%f18 + + fmuld %f22,%f26,%f26 + ldd [%l2+8],%f28 + + faddd %f6,%f2,%f6 + fmuld %f0,%f4,%f4 + ldd [%l0+16],%f30 + + faddd %f16,%f12,%f16 + fmuld %f10,%f14,%f14 + ldd [%l1+16],%f32 + + faddd %f26,%f22,%f26 + fmuld %f20,%f24,%f24 + ldd [%l2+16],%f34 + + fmuld %f8,%f6,%f0 ! s * spoly + + fmuld %f18,%f16,%f10 + + fmuld %f28,%f26,%f20 + + fmuld %f30,%f4,%f2 ! c * cpoly + + fmuld %f32,%f14,%f12 + + fmuld %f34,%f24,%f22 + + fmuld %f30,%f6,%f6 ! c * spoly + fsubd %f2,%f0,%f2 + + fmuld %f32,%f16,%f16 + fsubd %f12,%f10,%f12 + + fmuld %f34,%f26,%f26 + fsubd %f22,%f20,%f22 + + fmuld %f8,%f4,%f4 ! s * cpoly + faddd %f2,%f30,%f2 + st %f2,[%l4] + + fmuld %f18,%f14,%f14 + faddd %f12,%f32,%f12 + st %f3,[%l4+4] + + fmuld %f28,%f24,%f24 + faddd %f22,%f34,%f22 + st %f12,[%l5] + + faddd %f6,%f4,%f6 + st %f13,[%l5+4] + + faddd %f16,%f14,%f16 + st %f22,[%l6] + + faddd %f26,%f24,%f26 + st %f23,[%l6+4] + + faddd %f6,%f8,%f6 + + faddd %f16,%f18,%f16 + + faddd %f26,%f28,%f26 + + fnegd %f6,%f4 + lda [%i1]%asi,%l0 ! preload next argument + + fnegd %f16,%f14 + lda [%i1]%asi,%f0 + + fnegd %f26,%f24 + lda [%i1+4]%asi,%f3 + andn %l0,%i5,%l0 + add %i1,%i2,%i1 + + fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s + st %f6,[%o0] + + fmovdl %fcc1,%f14,%f16 + st %f16,[%o1] + + fmovdl %fcc2,%f24,%f26 + st %f26,[%o2] + addcc %i0,-1,%i0 + + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + faddd %f6,c3two51,%f4 + st %f5,[%fp+nk0] + + faddd %f16,c3two51,%f14 + st %f15,[%fp+nk1] + + faddd %f26,c3two51,%f24 + st %f25,[%fp+nk2] + + fsubd %f4,c3two51,%f6 + + fsubd %f14,c3two51,%f16 + + fsubd %f24,c3two51,%f26 + + fmuld %f6,pio2_1,%f2 + ld [%fp+nk0],%i5 ! n + + fmuld %f16,pio2_1,%f12 + ld [%fp+nk1],%g5 + + fmuld %f26,pio2_1,%f22 + ld [%fp+nk2],%o7 + + fsubd %f0,%f2,%f0 + fmuld %f6,pio2_2,%f4 + mov %o0,%o4 ! if (n & 1) swap ps, pc + andcc %i5,1,%g0 + + fsubd %f10,%f12,%f10 + fmuld %f16,pio2_2,%f14 + movnz %icc,%l4,%o0 + and %i5,3,%i5 + + fsubd %f20,%f22,%f20 + fmuld %f26,pio2_2,%f24 + movnz %icc,%o4,%l4 + + fsubd %f0,%f4,%f30 + mov %o1,%o4 + andcc %g5,1,%g0 + + fsubd %f10,%f14,%f32 + movnz %icc,%l5,%o1 + and %g5,3,%g5 + + fsubd %f20,%f24,%f34 + movnz %icc,%o4,%l5 + + fsubd %f0,%f30,%f0 + fcmple32 %f30,pio2_3,%l0 ! x <= pio2_3 iff x < 0 + mov %o2,%o4 + andcc %o7,1,%g0 + + fsubd %f10,%f32,%f10 + fcmple32 %f32,pio2_3,%l1 + movnz %icc,%l6,%o2 + and %o7,3,%o7 + + fsubd %f20,%f34,%f20 + fcmple32 %f34,pio2_3,%l2 + movnz %icc,%o4,%l6 + + fsubd %f0,%f4,%f0 + fmuld %f6,pio2_3,%f6 + add %i5,1,%o4 ! n = (n >> 1) | (((n + 1) ^ l) & 2) + srl %i5,1,%i5 + + fsubd %f10,%f14,%f10 + fmuld %f16,pio2_3,%f16 + xor %o4,%l0,%o4 + + fsubd %f20,%f24,%f20 + fmuld %f26,pio2_3,%f26 + and %o4,2,%o4 + + fsubd %f6,%f0,%f6 + or %i5,%o4,%i5 + + fsubd %f16,%f10,%f16 + add %g5,1,%o4 + srl %g5,1,%g5 + + fsubd %f26,%f20,%f26 + xor %o4,%l1,%o4 + + fsubd %f30,%f6,%f0 ! reduced x + and %o4,2,%o4 + + fsubd %f32,%f16,%f10 + or %g5,%o4,%g5 + + fsubd %f34,%f26,%f20 + add %o7,1,%o4 + srl %o7,1,%o7 + + fzero %f38 + xor %o4,%l2,%o4 + + fabsd %f0,%f2 + and %o4,2,%o4 + + fabsd %f10,%f12 + or %o7,%o4,%o7 + + fabsd %f20,%f22 + sethi %hi(0x3e400000),%o4 + + fnegd %f38,%f38 + + faddd %f2,c3two44,%f4 + st %f5,[%fp+nk0] + + faddd %f12,c3two44,%f14 + st %f15,[%fp+nk1] + + faddd %f22,c3two44,%f24 + st %f25,[%fp+nk2] + + fsubd %f30,%f0,%f4 + + fsubd %f32,%f10,%f14 + + fsubd %f34,%f20,%f24 + + fsubd %f4,%f6,%f6 ! w + ld [%fp+nk0],%l0 + + fsubd %f14,%f16,%f16 + ld [%fp+nk1],%l1 + + fsubd %f24,%f26,%f26 + ld [%fp+nk2],%l2 + sll %l0,5,%l0 ! k + + fand %f0,%f38,%f30 ! sign bit of x + ldd [%l0+%g1],%f4 + sll %l1,5,%l1 + + fand %f10,%f38,%f32 + ldd [%l1+%g1],%f14 + sll %l2,5,%l2 + + fand %f20,%f38,%f34 + ldd [%l2+%g1],%f24 + + fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] + + fsubd %f12,%f14,%f12 + + fsubd %f22,%f24,%f22 + + fmuld %f2,%f2,%f0 ! z = x * x + fxor %f6,%f30,%f30 + + fmuld %f12,%f12,%f10 + fxor %f16,%f32,%f32 + + fmuld %f22,%f22,%f20 + fxor %f26,%f34,%f34 + + fmuld %f0,pp3,%f6 + + fmuld %f10,pp3,%f16 + + fmuld %f20,pp3,%f26 + + faddd %f6,pp2,%f6 + fmuld %f0,qq3,%f4 + + faddd %f16,pp2,%f16 + fmuld %f10,qq3,%f14 + + faddd %f26,pp2,%f26 + fmuld %f20,qq3,%f24 + + fmuld %f0,%f6,%f6 + faddd %f4,qq2,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq2,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq2,%f24 + + faddd %f6,pp1,%f6 + fmuld %f0,%f4,%f4 + add %l0,%g1,%l0 + + faddd %f16,pp1,%f16 + fmuld %f10,%f14,%f14 + add %l1,%g1,%l1 + + faddd %f26,pp1,%f26 + fmuld %f20,%f24,%f24 + add %l2,%g1,%l2 + + fmuld %f0,%f6,%f6 + faddd %f4,qq1,%f4 + + fmuld %f10,%f16,%f16 + faddd %f14,qq1,%f14 + + fmuld %f20,%f26,%f26 + faddd %f24,qq1,%f24 + + fmuld %f2,%f6,%f6 + ldd [%l0+16],%f8 + + fmuld %f12,%f16,%f16 + ldd [%l1+16],%f18 + + fmuld %f22,%f26,%f26 + ldd [%l2+16],%f28 + + faddd %f6,%f30,%f6 + fmuld %f0,%f4,%f4 + ldd [%l0+8],%f30 + + faddd %f16,%f32,%f16 + fmuld %f10,%f14,%f14 + ldd [%l1+8],%f32 + + faddd %f26,%f34,%f26 + fmuld %f20,%f24,%f24 + ldd [%l2+8],%f34 + + fmuld %f8,%f4,%f0 ! c * cpoly + faddd %f6,%f2,%f6 + + fmuld %f18,%f14,%f10 + faddd %f16,%f12,%f16 + + fmuld %f28,%f24,%f20 + faddd %f26,%f22,%f26 + + fmuld %f30,%f6,%f2 ! s * spoly + + fmuld %f32,%f16,%f12 + + fmuld %f34,%f26,%f22 + + fmuld %f8,%f6,%f6 ! c * spoly + fsubd %f0,%f2,%f2 + + fmuld %f18,%f16,%f16 + fsubd %f10,%f12,%f12 + + fmuld %f28,%f26,%f26 + fsubd %f20,%f22,%f22 + + fmuld %f30,%f4,%f4 ! s * cpoly + faddd %f8,%f2,%f8 + + fmuld %f32,%f14,%f14 + faddd %f18,%f12,%f18 + + fmuld %f34,%f24,%f24 + faddd %f28,%f22,%f28 + + faddd %f4,%f6,%f6 + + faddd %f14,%f16,%f16 + + faddd %f24,%f26,%f26 + + faddd %f30,%f6,%f6 ! now %f6 = sin |x|, %f8 = cos |x| + + faddd %f32,%f16,%f16 + + faddd %f34,%f26,%f26 + + fnegd %f8,%f4 ! if (n & 1) c = -c + lda [%i1]%asi,%l0 ! preload next argument + mov %i5,%l1 + + fnegd %f18,%f14 + lda [%i1]%asi,%f0 + sethi %hi(0x80000000),%i5 + + fnegd %f28,%f24 + lda [%i1+4]%asi,%f3 + + andcc %l1,1,%g0 + fmovdnz %icc,%f4,%f8 + st %f8,[%l4] + + andcc %g5,1,%g0 + fmovdnz %icc,%f14,%f18 + st %f9,[%l4+4] + + andcc %o7,1,%g0 + fmovdnz %icc,%f24,%f28 + st %f18,[%l5] + + fnegd %f6,%f4 ! if (n & 2) s = -s + st %f19,[%l5+4] + andn %l0,%i5,%l0 + + fnegd %f16,%f14 + st %f28,[%l6] + add %i1,%i2,%i1 + + fnegd %f26,%f24 + st %f29,[%l6+4] + + andcc %l1,2,%g0 + fmovdnz %icc,%f4,%f6 + st %f6,[%o0] + + andcc %g5,2,%g0 + fmovdnz %icc,%f14,%f16 + st %f16,[%o1] + + andcc %o7,2,%g0 + fmovdnz %icc,%f24,%f26 + st %f26,[%o2] + + addcc %i0,-1,%i0 + bg,pt %icc,.loop0 +! delay slot + st %f7,[%o0+4] + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.end: + st %f17,[%o1+4] + st %f27,[%o2+4] + ld [%fp+biguns],%i5 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + nop +#ifdef __sparcv9 + stx %o5,[%sp+STACK_BIAS+0xb8] + ldx [%fp+xsave],%o1 + ldx [%fp+ssave],%o3 + ldx [%fp+csave],%o5 + ldx [%fp+STACK_BIAS+0xb0],%i5 + stx %i5,[%sp+STACK_BIAS+0xb0] +#else + st %o5,[%sp+0x60] + ld [%fp+xsave],%o1 + ld [%fp+ssave],%o3 + ld [%fp+csave],%o5 + ld [%fp+0x5c],%i5 + st %i5,[%sp+0x5c] +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sssave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vsincos_big + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 16 +.last1: + faddd %f2,c3two44,%f4 + st %f17,[%o1+4] +.last1_from_range1: + mov 0,%l1 + fzeros %f10 + fzero %f12 + add %fp,junk,%o1 + add %fp,junk,%l5 +.last2: + faddd %f12,c3two44,%f14 + st %f27,[%o2+4] + st %f5,[%fp+nk0] + st %f15,[%fp+nk1] +.last2_from_range2: + mov 0,%l2 + fzeros %f20 + fzero %f22 + add %fp,junk,%o2 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%l6 + + + .align 16 +.range0: + cmp %l0,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l0,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f0 + fmuld %f2,%f0,%f2 + st %f2,[%o0] + st %f3,[%o0+4] + st %f2,[%l3] + ba,pt %icc,2f +! delay slot + st %f3,[%l3+4] +1: + fdtoi %f2,%f4 ! raise inexact if not zero + st %f0,[%o0] + st %f3,[%o0+4] + sethi %hi(0x3ff00000),%g5 + st %g5,[%l3] + st %g0,[%l3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.end +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + andn %l1,%i5,%l0 ! hx &= ~0x80000000 + fmovs %f10,%f0 + fmovs %f13,%f3 + ba,pt %icc,.loop0 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range1: + cmp %l1,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l1,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f10 + fmuld %f12,%f10,%f12 + st %f12,[%o1] + st %f13,[%o1+4] + st %f12,[%l3] + ba,pt %icc,2f +! delay slot + st %f13,[%l3+4] +1: + fdtoi %f12,%f14 ! raise inexact if not zero + st %f10,[%o1] + st %f13,[%o1+4] + sethi %hi(0x3ff00000),%g5 + st %g5,[%l3] + st %g0,[%l3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last1_from_range1 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + andn %l2,%i5,%l1 ! hx &= ~0x80000000 + fmovs %f20,%f10 + fmovs %f23,%f13 + ba,pt %icc,.loop1 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + + .align 16 +.range2: + cmp %l2,%o4 + bl,pt %icc,1f ! hx < 0x3e400000 +! delay slot, harmless if branch taken + sethi %hi(0x7ff00000),%o7 + cmp %l2,%o7 + bl,a,pt %icc,2f ! branch if finite +! delay slot, squashed if branch not taken + st %o4,[%fp+biguns] ! set biguns + fzero %f20 + fmuld %f22,%f20,%f22 + st %f22,[%o2] + st %f23,[%o2+4] + st %f22,[%l3] + ba,pt %icc,2f +! delay slot + st %f23,[%l3+4] +1: + fdtoi %f22,%f24 ! raise inexact if not zero + st %f20,[%o2] + st %f23,[%o2+4] + sethi %hi(0x3ff00000),%g5 + st %g5,[%l3] + st %g0,[%l3+4] +2: + addcc %i0,-1,%i0 + ble,pn %icc,.last2_from_range2 +! delay slot, harmless if branch taken + add %i3,%i4,%i3 ! s += strides + add %l3,%l7,%l3 ! c += stridec + ld [%i1],%l2 + ld [%i1],%f20 + ld [%i1+4],%f23 + andn %l2,%i5,%l2 ! hx &= ~0x80000000 + ba,pt %icc,.loop2 +! delay slot + add %i1,%i2,%i1 ! x += stridex + + SET_SIZE(__vsincos) + diff --git a/usr/src/libm/src/mvec/vis/__vsincosf.S b/usr/src/libm/src/mvec/vis/__vsincosf.S new file mode 100644 index 0000000..c071d91 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsincosf.S @@ -0,0 +1,905 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsincosf.S 1.8 06/01/23 SMI" + + .file "__vsincosf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ssave STACK_BIAS-0x10 +#define csave STACK_BIAS-0x18 +#define nsave STACK_BIAS-0x1c +#define sxsave STACK_BIAS-0x20 +#define sssave STACK_BIAS-0x24 +#define junk STACK_BIAS-0x28 +#define n3 STACK_BIAS-0x38 +#define n2 STACK_BIAS-0x40 +#define n1 STACK_BIAS-0x48 +#define n0 STACK_BIAS-0x50 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x50 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 s +! i4 strides +! i5 biguns + +! l0 ps0 +! l1 ps1 +! l2 ps2 +! l3 ps3 +! l4 pc0 +! l5 pc1 +! l6 pc2 +! l7 pc3 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 n0 +! o1 n1 +! o2 n2 +! o3 n3 +! o4 c +! o5 stridec +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 s0 +! f12 s1 +! f14 s2 +! f16 s3 +! f18 thresh2 (2^19 pi) +! f20 c0 +! f22 c1 +! f24 c2 +! f26 c3 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vsincosf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,o0) + mov %o0,%g1 + +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ssave] + stx %i5,[%fp+csave] + ldx [%fp+STACK_BIAS+0xb0],%o5 +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ssave] + st %i5,[%fp+csave] + ld [%fp+0x5c],%o5 +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sssave] + mov %i5,%o4 + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + sll %o5,2,%o5 + nop + fzero %f10 ! loop prologue + add %fp,junk,%l0 + fzero %f20 + add %fp,junk,%l4 + fzero %f12 + add %fp,junk,%l1 + fzero %f22 + add %fp,junk,%l5 + fzero %f14 + add %fp,junk,%l2 + fzero %f24 + add %fp,junk,%l6 + fzero %f16 + add %fp,junk,%l3 + fzero %f26 + ba .start + add %fp,junk,%l7 + +! 16-byte aligned + .align 16 +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%l0] + mov %i3,%l0 ! ps0 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f20,%f20 + + st %f20,[%l4] + mov %o4,%l4 ! pc0 = c + ble,pn %icc,.last1 +! delay slot + add %o4,%o5,%o4 ! c += stridec + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%l1] + mov %i3,%l1 ! ps1 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f22,%f22 + + st %f22,[%l5] + mov %o4,%l5 ! pc1 = c + ble,pn %icc,.last2 +! delay slot + add %o4,%o5,%o4 ! c += stridec + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%l2] + mov %i3,%l2 ! ps2 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f24,%f24 + + st %f24,[%l6] + mov %o4,%l6 ! pc2 = c + ble,pn %icc,.last3 +! delay slot + add %o4,%o5,%o4 ! c += stridec + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%l3] + mov %i3,%l3 ! ps3 = s + add %i3,%i4,%i3 ! s += strides + fdtos %f26,%f26 + + st %f26,[%l7] + mov %o4,%l7 ! pc3 = c + add %o4,%o5,%o4 ! c += stridec +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%o0 + + fcmple32 %f32,%f18,%o1 + + fcmple32 %f34,%f18,%o2 + + fcmple32 %f36,%f18,%o3 + nop + +! 16-byte aligned + andcc %o0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%o0 + +.check1: + andcc %o1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%o1 + +.check2: + andcc %o2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%o2 + +.check3: + andcc %o3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%o3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %o0,%o1,%o7 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + and %o2,%o7,%o7 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %o3,%o7,%o7 + + fmuld %f30,%f54,%f20 + andcc %o7,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f32,%f54,%f22 + + fmuld %f34,%f54,%f24 + + fmuld %f36,%f54,%f26 + + faddd %f20,%f52,%f20 + fmuld %f30,%f44,%f10 + + faddd %f22,%f52,%f22 + fmuld %f32,%f44,%f12 + + faddd %f24,%f52,%f24 + fmuld %f34,%f44,%f14 + + faddd %f26,%f52,%f26 + fmuld %f36,%f44,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f42,%f16 + + faddd %f20,%f50,%f20 + fmuld %f30,%f10,%f10 + + faddd %f22,%f50,%f22 + fmuld %f32,%f12,%f12 + + faddd %f24,%f50,%f24 + fmuld %f34,%f14,%f14 + + faddd %f26,%f50,%f26 + fmuld %f36,%f16,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f40,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f40,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f40,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f40,%f16 + + faddd %f20,%f48,%f20 + fmuld %f30,%f10,%f10 + + faddd %f22,%f48,%f22 + fmuld %f32,%f12,%f12 + + faddd %f24,%f48,%f24 + fmuld %f34,%f14,%f14 + + faddd %f26,%f48,%f26 + fmuld %f36,%f16,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f46,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f46,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f46,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f46,%f16 + + faddd %f20,%f46,%f20 + fmuld %f0,%f10,%f10 + + faddd %f22,%f46,%f22 + fmuld %f2,%f12,%f12 + + faddd %f24,%f46,%f24 + fmuld %f4,%f14,%f14 + addcc %i0,-1,%i0 + + faddd %f26,%f46,%f26 + bg,pt %icc,.start +! delay slot + fmuld %f6,%f16,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%o0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%o1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%o2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%o3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + and %o0,1,%o0 + mov %l0,%g1 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + and %o1,1,%o1 + movrnz %o0,%l4,%l0 ! if (n & 1) exchange ps and pc + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + and %o2,1,%o2 + movrnz %o0,%g1,%l4 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + and %o3,1,%o3 + mov %l1,%g1 + + fsubd %f0,%f30,%f0 + movrnz %o1,%l5,%l1 + + fsubd %f2,%f32,%f2 + movrnz %o1,%g1,%l5 + + fsubd %f4,%f34,%f4 + mov %l2,%g1 + + fsubd %f6,%f36,%f6 + movrnz %o2,%l6,%l2 + + fmuld %f0,%f0,%f30 + fnegd %f0,%f10 + movrnz %o2,%g1,%l6 + + fmuld %f2,%f2,%f32 + fnegd %f2,%f12 + mov %l3,%g1 + + fmuld %f4,%f4,%f34 + fnegd %f4,%f14 + movrnz %o3,%l7,%l3 + + fmuld %f6,%f6,%f36 + fnegd %f6,%f16 + movrnz %o3,%g1,%l7 + + fmuld %f30,%f54,%f20 + fmovrdnz %o0,%f10,%f0 ! if (n & 1) x = -x + + fmuld %f32,%f54,%f22 + fmovrdnz %o1,%f12,%f2 + + fmuld %f34,%f54,%f24 + fmovrdnz %o2,%f14,%f4 + + fmuld %f36,%f54,%f26 + fmovrdnz %o3,%f16,%f6 + + faddd %f20,%f52,%f20 + fmuld %f30,%f44,%f10 + ld [%fp+n0],%o0 + + faddd %f22,%f52,%f22 + fmuld %f32,%f44,%f12 + and %o0,2,%o0 + + faddd %f24,%f52,%f24 + fmuld %f34,%f44,%f14 + sllx %o0,62,%g1 + stx %g1,[%fp+n0] + + faddd %f26,%f52,%f26 + fmuld %f36,%f44,%f16 + ld [%fp+n1],%o1 + + fmuld %f30,%f20,%f20 + faddd %f10,%f42,%f10 + and %o1,2,%o1 + + fmuld %f32,%f22,%f22 + faddd %f12,%f42,%f12 + sllx %o1,62,%g1 + stx %g1,[%fp+n1] + + fmuld %f34,%f24,%f24 + faddd %f14,%f42,%f14 + ld [%fp+n2],%o2 + + fmuld %f36,%f26,%f26 + faddd %f16,%f42,%f16 + and %o2,2,%o2 + + faddd %f20,%f50,%f20 + fmuld %f30,%f10,%f10 + sllx %o2,62,%g1 + stx %g1,[%fp+n2] + + faddd %f22,%f50,%f22 + fmuld %f32,%f12,%f12 + ld [%fp+n3],%o3 + + faddd %f24,%f50,%f24 + fmuld %f34,%f14,%f14 + and %o3,2,%o3 + + faddd %f26,%f50,%f26 + fmuld %f36,%f16,%f16 + sllx %o3,62,%g1 + stx %g1,[%fp+n3] + + fmuld %f30,%f20,%f20 + faddd %f10,%f40,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f40,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f40,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f40,%f16 + + faddd %f20,%f48,%f20 + fmuld %f30,%f10,%f10 + + faddd %f22,%f48,%f22 + fmuld %f32,%f12,%f12 + + faddd %f24,%f48,%f24 + fmuld %f34,%f14,%f14 + + faddd %f26,%f48,%f26 + fmuld %f36,%f16,%f16 + + fmuld %f30,%f20,%f20 + faddd %f10,%f46,%f10 + + fmuld %f32,%f22,%f22 + faddd %f12,%f46,%f12 + + fmuld %f34,%f24,%f24 + faddd %f14,%f46,%f14 + + fmuld %f36,%f26,%f26 + faddd %f16,%f46,%f16 + + faddd %f20,%f46,%f20 + fmuld %f0,%f10,%f10 + ldd [%fp+n0],%f30 + + faddd %f22,%f46,%f22 + fmuld %f2,%f12,%f12 + ldd [%fp+n1],%f32 + + faddd %f24,%f46,%f24 + fmuld %f4,%f14,%f14 + ldd [%fp+n2],%f34 + + faddd %f26,%f46,%f26 + fmuld %f6,%f16,%f16 + ldd [%fp+n3],%f36 + + fxor %f10,%f30,%f10 ! if (n & 2) negate s, c + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + fxor %f16,%f36,%f16 + + fxor %f20,%f30,%f20 + + fxor %f22,%f32,%f22 + + fxor %f24,%f34,%f24 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f26,%f36,%f26 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%l0] + fdtos %f20,%f20 + st %f20,[%l4] + fdtos %f12,%f12 + st %f12,[%l1] + fdtos %f22,%f22 + st %f22,[%l5] + fdtos %f14,%f14 + st %f14,[%l2] + fdtos %f24,%f24 + st %f24,[%l6] + fdtos %f16,%f16 + st %f16,[%l3] + fdtos %f26,%f26 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f26,[%l7] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ssave],%o3 + ldx [%fp+csave],%o5 + ldx [%fp+STACK_BIAS+0xb0],%i5 + stx %i5,[%sp+STACK_BIAS+0xb0] +#else + ld [%fp+xsave],%o1 + ld [%fp+ssave],%o3 + ld [%fp+csave],%o5 + ld [%fp+0x5c],%i5 + st %i5,[%sp+0x5c] +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sssave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vsincos_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%l1] + nop + fdtos %f22,%f22 + st %f22,[%l5] + fzeros %f2 + add %fp,junk,%l5 + add %fp,junk,%l1 +.last2: + fdtos %f14,%f14 + st %f14,[%l2] + nop + fdtos %f24,%f24 + st %f24,[%l6] + fzeros %f4 + add %fp,junk,%l2 + add %fp,junk,%l6 +.last3: + fdtos %f16,%f16 + st %f16,[%l3] + fdtos %f26,%f26 + st %f26,[%l7] + fzeros %f6 + add %fp,junk,%l3 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%l7 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%o0 + andcc %o0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%l0] + st %f0,[%l4] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%l0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + mov %o4,%l4 + add %o4,%o5,%o4 + fcmple32 %f30,%f18,%o0 + andcc %o0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%o0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%l0 + add %fp,junk,%l4 + mov 2,%o0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%o1 + andcc %o1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%l1] + st %f2,[%l5] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%l1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + mov %o4,%l5 + add %o4,%o5,%o4 + fcmple32 %f32,%f18,%o1 + andcc %o1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%o1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%l1 + add %fp,junk,%l5 + mov 2,%o1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%o2 + andcc %o2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%l2] + st %f4,[%l6] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%l2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + mov %o4,%l6 + add %o4,%o5,%o4 + fcmple32 %f34,%f18,%o2 + andcc %o2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%o2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%l2 + add %fp,junk,%l6 + mov 2,%o2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%o3 + andcc %o3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%l3] + st %f6,[%l7] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%l3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + mov %o4,%l7 + add %o4,%o5,%o4 + fcmple32 %f36,%f18,%o3 + andcc %o3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%o3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%l3 + add %fp,junk,%l7 + mov 2,%o3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vsincosf) + diff --git a/usr/src/libm/src/mvec/vis/__vsinf.S b/usr/src/libm/src/mvec/vis/__vsinf.S new file mode 100644 index 0000000..2e570b7 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsinf.S @@ -0,0 +1,2093 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsinf.S 1.9 06/01/23 SMI" + + .file "__vsinf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define junk STACK_BIAS-0x20 +#define n3 STACK_BIAS-0x24 +#define n2 STACK_BIAS-0x28 +#define n1 STACK_BIAS-0x2c +#define n0 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 biguns + +! l0 n0 +! l1 n1 +! l2 n2 +! l3 n3 +! l4 +! l5 +! l6 +! l7 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 +! o5 +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 y0 +! f12 y1 +! f14 y2 +! f16 y3 +! f18 thresh2 (2^19 pi) +! f20 +! f22 +! f24 +! f26 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vsinf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,l1) + mov %l1,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + fzero %f10 ! loop prologue + add %fp,junk,%o0 + fzero %f12 + add %fp,junk,%o1 + fzero %f14 + add %fp,junk,%o2 + fzero %f16 + ba .start + add %fp,junk,%o3 + +! 16-byte aligned + .align 16 +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%o0] + mov %i3,%o0 ! py0 = y + ble,pn %icc,.last1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%o1] + mov %i3,%o1 ! py1 = y + ble,pn %icc,.last2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%o2] + mov %i3,%o2 ! py2 = y + ble,pn %icc,.last3 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%o3] + mov %i3,%o3 ! py3 = y + add %i3,%i4,%i3 ! y += stridey +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%l0 + + fcmple32 %f32,%f18,%l1 + + fcmple32 %f34,%f18,%l2 + + fcmple32 %f36,%f18,%l3 + nop + +! 16-byte aligned + andcc %l0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%l0 + +.check1: + andcc %l1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%l1 + +.check2: + andcc %l2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%l2 + +.check3: + andcc %l3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%l3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %l0,%l1,%o4 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %l2,%l3,%o5 + + fmuld %f30,%f44,%f10 + and %o4,%o5,%o5 + + fmuld %f32,%f44,%f12 + andcc %o5,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f34,%f44,%f14 + + fmuld %f36,%f44,%f16 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + + fmuld %f32,%f12,%f12 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f16,%f16 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fmuld %f0,%f10,%f10 + + fmuld %f2,%f12,%f12 + + fmuld %f4,%f14,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fmuld %f6,%f16,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%l0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%l1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%l2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%l3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + + fsubd %f0,%f30,%f0 + + fsubd %f2,%f32,%f2 + + fsubd %f4,%f34,%f4 + + fsubd %f6,%f36,%f6 + andcc %l0,1,%g0 + + fmuld %f0,%f0,%f30 + bz,pn %icc,.case8 +! delay slot + andcc %l1,1,%g0 + + fmuld %f2,%f2,%f32 + bz,pn %icc,.case4 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case2 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case1 +! delay slot + nop + +!.case0: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case3 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case6 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case5 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case7 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.case8: + fmuld %f2,%f2,%f32 + bz,pn %icc,.case12 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case10 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case9 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case11 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case14 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case13 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case15 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%o0] + fdtos %f12,%f12 + st %f12,[%o1] + fdtos %f14,%f14 + st %f14,[%o2] + fdtos %f16,%f16 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f16,[%o3] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vsin_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%o1] + fzeros %f2 + add %fp,junk,%o1 +.last2: + fdtos %f14,%f14 + st %f14,[%o2] + fzeros %f4 + add %fp,junk,%o2 +.last3: + fdtos %f16,%f16 + st %f16,[%o3] + fzeros %f6 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%l0 + andcc %l0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%o0] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%o0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + fcmple32 %f30,%f18,%l0 + andcc %l0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%l0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%o0 + mov 2,%l0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%l1 + andcc %l1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%o1] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%o1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + fcmple32 %f32,%f18,%l1 + andcc %l1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%l1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%o1 + mov 2,%l1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%l2 + andcc %l2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%o2] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%o2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + fcmple32 %f34,%f18,%l2 + andcc %l2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%l2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%o2 + mov 2,%l2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%l3 + andcc %l3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%o3] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%o3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + fcmple32 %f36,%f18,%l3 + andcc %l3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%l3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%o3 + mov 2,%l3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vsinf) + diff --git a/usr/src/libm/src/mvec/vis/__vsqrt.S b/usr/src/libm/src/mvec/vis/__vsqrt.S new file mode 100644 index 0000000..2d536f7 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsqrt.S @@ -0,0 +1,1843 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsqrt.S 1.5 06/01/23 SMI" + + .file "__vsqrt.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fe00000, 0x00000000 ! A1 = 5.00000000000000001789e-01 + .word 0xbfbfffff, 0xfffd0bfd ! A2 = -1.24999999997314110667e-01 + .word 0x3fafffff, 0xfffb5bfb ! A3 = 6.24999999978896565817e-02 + .word 0xbfa4000f, 0xc00b4fc8 ! A4 = -3.90629693917215481458e-02 + .word 0x3f9c0018, 0xc012da4e ! A5 = 2.73441188080261677282e-02 + .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff + .word 0x00001000, 0x00000000 ! DC2 = 0x0000100000000000 + .word 0x7fffe000, 0x00000000 ! DC3 = 0x7fffe00000000000 + +! i = [0,128] +! TBL[8*i+0] = 1.0 / (*(double*)&(0x3fe0000000000000LL + (i << 45))); +! TBL[8*i+1] = (double)(2.0 * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45)))); +! TBL[8*i+2] = (double)(2.0 * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))) - TBL[8*i+1]); +! TBL[8*i+3] = 0 +! TBL[8*i+4] = 1.0 / (*(double*)&(0x3fe0000000000000LL + (i << 45))); +! TBL[8*i+5] = (double)(2.0 * sqrtl(2.0) * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45)))); +! TBL[8*i+6] = (double)(2.0 * sqrtl(2.0) * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))) - TBL[8*i+5]); +! TBL[8*i+7] = 0 + + .word 0x40000000, 0x00000000, 0x3ff6a09e, 0x667f3bcd + .word 0xbc9bdd34, 0x13b26456, 0x00000000, 0x00000000 + .word 0x40000000, 0x00000000, 0x40000000, 0x00000000 + .word 0xb8f00000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3fffc07f, 0x01fc07f0, 0x3ff6b733, 0xbfd8c648 + .word 0x3c53b629, 0x05629048, 0x00000000, 0x00000000 + .word 0x3fffc07f, 0x01fc07f0, 0x40000ff8, 0x07f60deb + .word 0x3c90655c, 0x648a53f1, 0x00000000, 0x00000000 + .word 0x3fff81f8, 0x1f81f820, 0x3ff6cdb2, 0xbbb212eb + .word 0x3c960332, 0xcdbaba2d, 0x00000000, 0x00000000 + .word 0x3fff81f8, 0x1f81f820, 0x40001fe0, 0x3f61bad0 + .word 0x3ca2c41a, 0x15cbfaf2, 0x00000000, 0x00000000 + .word 0x3fff4465, 0x9e4a4271, 0x3ff6e41b, 0x9bfb3b75 + .word 0xbc925d8c, 0xfd6d5c87, 0x00000000, 0x00000000 + .word 0x3fff4465, 0x9e4a4271, 0x40002fb8, 0xd4e30f48 + .word 0xbca64203, 0xab1ba910, 0x00000000, 0x00000000 + .word 0x3fff07c1, 0xf07c1f08, 0x3ff6fa6e, 0xa162d0f0 + .word 0x3c691a24, 0x3d6297e9, 0x00000000, 0x00000000 + .word 0x3fff07c1, 0xf07c1f08, 0x40003f81, 0xf636b80c + .word 0xbca0efc8, 0xba812a8c, 0x00000000, 0x00000000 + .word 0x3ffecc07, 0xb301ecc0, 0x3ff710ac, 0x0b5e5e32 + .word 0xbc991218, 0xb8d2850d, 0x00000000, 0x00000000 + .word 0x3ffecc07, 0xb301ecc0, 0x40004f3b, 0xd03c0a64 + .word 0x3c9ee2cf, 0x2d8ae22b, 0x00000000, 0x00000000 + .word 0x3ffe9131, 0xabf0b767, 0x3ff726d4, 0x1832a0be + .word 0xbc2d9b1a, 0xa8ecb058, 0x00000000, 0x00000000 + .word 0x3ffe9131, 0xabf0b767, 0x40005ee6, 0x8efad48b + .word 0xbc9c35f4, 0x8f4b89f7, 0x00000000, 0x00000000 + .word 0x3ffe573a, 0xc901e574, 0x3ff73ce7, 0x04fb7b23 + .word 0x3c91470b, 0x816b17a6, 0x00000000, 0x00000000 + .word 0x3ffe573a, 0xc901e574, 0x40006e82, 0x5da8fc2b + .word 0x3c9a315a, 0x8bd8a03b, 0x00000000, 0x00000000 + .word 0x3ffe1e1e, 0x1e1e1e1e, 0x3ff752e5, 0x0db3a3a2 + .word 0xbc939331, 0x3eea4381, 0x00000000, 0x00000000 + .word 0x3ffe1e1e, 0x1e1e1e1e, 0x40007e0f, 0x66afed07 + .word 0xbc74a6e1, 0xdcd59eaf, 0x00000000, 0x00000000 + .word 0x3ffde5d6, 0xe3f8868a, 0x3ff768ce, 0x6d3c11e0 + .word 0xbc9478b8, 0xab33074d, 0x00000000, 0x00000000 + .word 0x3ffde5d6, 0xe3f8868a, 0x40008d8d, 0xd3b1d9aa + .word 0x3c81d533, 0x85fe2b96, 0x00000000, 0x00000000 + .word 0x3ffdae60, 0x76b981db, 0x3ff77ea3, 0x5d632e43 + .word 0x3c92f714, 0x9a22fa4f, 0x00000000, 0x00000000 + .word 0x3ffdae60, 0x76b981db, 0x40009cfd, 0xcd8ed009 + .word 0xbc4862a9, 0xbcf7f372, 0x00000000, 0x00000000 + .word 0x3ffd77b6, 0x54b82c34, 0x3ff79464, 0x16ebc56c + .word 0x3c9a7cd5, 0x224c7375, 0x00000000, 0x00000000 + .word 0x3ffd77b6, 0x54b82c34, 0x4000ac5f, 0x7c69a3c8 + .word 0x3ca94dff, 0x7bfa2757, 0x00000000, 0x00000000 + .word 0x3ffd41d4, 0x1d41d41d, 0x3ff7aa10, 0xd193c22d + .word 0xbc790ed9, 0x403afe85, 0x00000000, 0x00000000 + .word 0x3ffd41d4, 0x1d41d41d, 0x4000bbb3, 0x07acafdb + .word 0xbc852a97, 0x686f9d2e, 0x00000000, 0x00000000 + .word 0x3ffd0cb5, 0x8f6ec074, 0x3ff7bfa9, 0xc41ab040 + .word 0x3c8d6bc3, 0x02ae758f, 0x00000000, 0x00000000 + .word 0x3ffd0cb5, 0x8f6ec074, 0x4000caf8, 0x960e710d + .word 0x3c9caa6b, 0xe2366171, 0x00000000, 0x00000000 + .word 0x3ffcd856, 0x89039b0b, 0x3ff7d52f, 0x244809e9 + .word 0x3c9081f6, 0xf3b99d5f, 0x00000000, 0x00000000 + .word 0x3ffcd856, 0x89039b0b, 0x4000da30, 0x4d95fb06 + .word 0xbc9e1269, 0x76855586, 0x00000000, 0x00000000 + .word 0x3ffca4b3, 0x055ee191, 0x3ff7eaa1, 0x26f15284 + .word 0xbc846ce4, 0x68c1882b, 0x00000000, 0x00000000 + .word 0x3ffca4b3, 0x055ee191, 0x4000e95a, 0x539f492c + .word 0xbc80c73f, 0xc38a2184, 0x00000000, 0x00000000 + .word 0x3ffc71c7, 0x1c71c71c, 0x3ff80000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ffc71c7, 0x1c71c71c, 0x4000f876, 0xccdf6cd9 + .word 0x3cab1a18, 0xf13a34c0, 0x00000000, 0x00000000 + .word 0x3ffc3f8f, 0x01c3f8f0, 0x3ff8154b, 0xe2773526 + .word 0xbc857147, 0xe067d0ee, 0x00000000, 0x00000000 + .word 0x3ffc3f8f, 0x01c3f8f0, 0x40010785, 0xdd689a29 + .word 0xbcaaabbe, 0x9e4d810a, 0x00000000, 0x00000000 + .word 0x3ffc0e07, 0x0381c0e0, 0x3ff82a85, 0x00794e6c + .word 0xbc82edaa, 0x75e6ac5f, 0x00000000, 0x00000000 + .word 0x3ffc0e07, 0x0381c0e0, 0x40011687, 0xa8ae14a3 + .word 0x3cac9b43, 0xbcf06106, 0x00000000, 0x00000000 + .word 0x3ffbdd2b, 0x899406f7, 0x3ff83fab, 0x8b4d4315 + .word 0x3c829e06, 0x2d3e134d, 0x00000000, 0x00000000 + .word 0x3ffbdd2b, 0x899406f7, 0x4001257c, 0x5187fd09 + .word 0xbca4a750, 0xa83950a4, 0x00000000, 0x00000000 + .word 0x3ffbacf9, 0x14c1bad0, 0x3ff854bf, 0xb363dc39 + .word 0x3c99399f, 0xca38787e, 0x00000000, 0x00000000 + .word 0x3ffbacf9, 0x14c1bad0, 0x40013463, 0xfa37014e + .word 0x3c7b295b, 0xaa698cd3, 0x00000000, 0x00000000 + .word 0x3ffb7d6c, 0x3dda338b, 0x3ff869c1, 0xa85cc346 + .word 0x3c9fcc99, 0xde11b1d1, 0x00000000, 0x00000000 + .word 0x3ffb7d6c, 0x3dda338b, 0x4001433e, 0xc467effb + .word 0x3c92c031, 0x3b7278c8, 0x00000000, 0x00000000 + .word 0x3ffb4e81, 0xb4e81b4f, 0x3ff87eb1, 0x990b697a + .word 0x3c7c43e9, 0xf593ea0f, 0x00000000, 0x00000000 + .word 0x3ffb4e81, 0xb4e81b4f, 0x4001520c, 0xd1372feb + .word 0xbcadec22, 0x5d8e66d2, 0x00000000, 0x00000000 + .word 0x3ffb2036, 0x406c80d9, 0x3ff8938f, 0xb37bc9c1 + .word 0xbc7c115f, 0x9f5c8d6f, 0x00000000, 0x00000000 + .word 0x3ffb2036, 0x406c80d9, 0x400160ce, 0x41341d74 + .word 0x3c967036, 0x863a1bb2, 0x00000000, 0x00000000 + .word 0x3ffaf286, 0xbca1af28, 0x3ff8a85c, 0x24f70659 + .word 0x3c9f6e07, 0x6b588a50, 0x00000000, 0x00000000 + .word 0x3ffaf286, 0xbca1af28, 0x40016f83, 0x34644df9 + .word 0xbcae8679, 0x80a1c48e, 0x00000000, 0x00000000 + .word 0x3ffac570, 0x1ac5701b, 0x3ff8bd17, 0x1a07e38a + .word 0x3c9c20b5, 0xa697f23f, 0x00000000, 0x00000000 + .word 0x3ffac570, 0x1ac5701b, 0x40017e2b, 0xca46bab9 + .word 0x3ca1519b, 0x10d04d5f, 0x00000000, 0x00000000 + .word 0x3ffa98ef, 0x606a63be, 0x3ff8d1c0, 0xbe7f20ac + .word 0xbc8bdb8a, 0x6df021f3, 0x00000000, 0x00000000 + .word 0x3ffa98ef, 0x606a63be, 0x40018cc8, 0x21d6d3e3 + .word 0xbca30af1, 0xd725cc5b, 0x00000000, 0x00000000 + .word 0x3ffa6d01, 0xa6d01a6d, 0x3ff8e659, 0x3d77b0b8 + .word 0xbc7d99d7, 0x64769954, 0x00000000, 0x00000000 + .word 0x3ffa6d01, 0xa6d01a6d, 0x40019b58, 0x598f7c9f + .word 0xbc72e0d8, 0x51c0e011, 0x00000000, 0x00000000 + .word 0x3ffa41a4, 0x1a41a41a, 0x3ff8fae0, 0xc15ad38a + .word 0xbc7db7ad, 0xb6817f6d, 0x00000000, 0x00000000 + .word 0x3ffa41a4, 0x1a41a41a, 0x4001a9dc, 0x8f6df104 + .word 0xbcafc519, 0xc18dc1d5, 0x00000000, 0x00000000 + .word 0x3ffa16d3, 0xf97a4b02, 0x3ff90f57, 0x73e410e4 + .word 0x3c6fb605, 0xcee75482, 0x00000000, 0x00000000 + .word 0x3ffa16d3, 0xf97a4b02, 0x4001b854, 0xe0f496a0 + .word 0x3ca27006, 0x899b7c3a, 0x00000000, 0x00000000 + .word 0x3ff9ec8e, 0x951033d9, 0x3ff923bd, 0x7e25164d + .word 0xbc9278d1, 0x901d3b40, 0x00000000, 0x00000000 + .word 0x3ff9ec8e, 0x951033d9, 0x4001c6c1, 0x6b2db870 + .word 0x3c887e1d, 0x8335fb28, 0x00000000, 0x00000000 + .word 0x3ff9c2d1, 0x4ee4a102, 0x3ff93813, 0x088978c5 + .word 0xbc54312c, 0x627e5c52, 0x00000000, 0x00000000 + .word 0x3ff9c2d1, 0x4ee4a102, 0x4001d522, 0x4aae2ee1 + .word 0x3ca91222, 0xf6aebdc9, 0x00000000, 0x00000000 + .word 0x3ff99999, 0x9999999a, 0x3ff94c58, 0x3ada5b53 + .word 0xbc9b7ed7, 0x50df3cca, 0x00000000, 0x00000000 + .word 0x3ff99999, 0x9999999a, 0x4001e377, 0x9b97f4a8 + .word 0xbc9f5063, 0x19fcfd19, 0x00000000, 0x00000000 + .word 0x3ff970e4, 0xf80cb872, 0x3ff9608d, 0x3c41fb4b + .word 0x3c73df32, 0xeaa86b83, 0x00000000, 0x00000000 + .word 0x3ff970e4, 0xf80cb872, 0x4001f1c1, 0x799ca8ff + .word 0xbca28b52, 0xeb725e0a, 0x00000000, 0x00000000 + .word 0x3ff948b0, 0xfcd6e9e0, 0x3ff974b2, 0x334f2346 + .word 0x3c814e4a, 0xd3ae9e3f, 0x00000000, 0x00000000 + .word 0x3ff948b0, 0xfcd6e9e0, 0x40020000, 0x00000000 + .word 0xb9000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff920fb, 0x49d0e229, 0x3ff988c7, 0x45f88592 + .word 0x3c95af70, 0x1a56047b, 0x00000000, 0x00000000 + .word 0x3ff920fb, 0x49d0e229, 0x40020e33, 0x499a21a9 + .word 0xbc924ba2, 0x74fea9a1, 0x00000000, 0x00000000 + .word 0x3ff8f9c1, 0x8f9c18fa, 0x3ff99ccc, 0x999fff00 + .word 0x3c866234, 0x063b88ee, 0x00000000, 0x00000000 + .word 0x3ff8f9c1, 0x8f9c18fa, 0x40021c5b, 0x70d9f824 + .word 0xbca844f9, 0x9eee6fc3, 0x00000000, 0x00000000 + .word 0x3ff8d301, 0x8d3018d3, 0x3ff9b0c2, 0x5315c2ce + .word 0xbc87f64a, 0x65cc6887, 0x00000000, 0x00000000 + .word 0x3ff8d301, 0x8d3018d3, 0x40022a78, 0x8fc76de5 + .word 0x3c931e32, 0xd4e07a48, 0x00000000, 0x00000000 + .word 0x3ff8acb9, 0x0f6bf3aa, 0x3ff9c4a8, 0x969b7077 + .word 0xbc96ca9e, 0x5cd4517a, 0x00000000, 0x00000000 + .word 0x3ff8acb9, 0x0f6bf3aa, 0x4002388a, 0xc0059c28 + .word 0xbc96072f, 0xbe0e5da3, 0x00000000, 0x00000000 + .word 0x3ff886e5, 0xf0abb04a, 0x3ff9d87f, 0x87e71422 + .word 0xbc85fdd8, 0xb11b7b1d, 0x00000000, 0x00000000 + .word 0x3ff886e5, 0xf0abb04a, 0x40024692, 0x1ad4ea49 + .word 0xbcaa6d9b, 0x268ef62d, 0x00000000, 0x00000000 + .word 0x3ff86186, 0x18618618, 0x3ff9ec47, 0x4a261264 + .word 0xbc8540c4, 0x89ba5074, 0x00000000, 0x00000000 + .word 0x3ff86186, 0x18618618, 0x4002548e, 0xb9151e85 + .word 0x3c999820, 0x0a774879, 0x00000000, 0x00000000 + .word 0x3ff83c97, 0x7ab2bedd, 0x3ffa0000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff83c97, 0x7ab2bedd, 0x40026280, 0xb3476096 + .word 0x3c9ab88b, 0x5ffe1cf5, 0x00000000, 0x00000000 + .word 0x3ff81818, 0x18181818, 0x3ffa13a9, 0xcb996651 + .word 0xbc9f9ab9, 0x0e4e85c3, 0x00000000, 0x00000000 + .word 0x3ff81818, 0x18181818, 0x40027068, 0x21902e9a + .word 0x3c90ff4c, 0x20f541f6, 0x00000000, 0x00000000 + .word 0x3ff7f405, 0xfd017f40, 0x3ffa2744, 0xce9674f5 + .word 0xbc8b936c, 0x81e54daa, 0x00000000, 0x00000000 + .word 0x3ff7f405, 0xfd017f40, 0x40027e45, 0x1bb944c3 + .word 0x3c8e4a16, 0x42099ef0, 0x00000000, 0x00000000 + .word 0x3ff7d05f, 0x417d05f4, 0x3ffa3ad1, 0x2a1da160 + .word 0x3c951168, 0xf4be5984, 0x00000000, 0x00000000 + .word 0x3ff7d05f, 0x417d05f4, 0x40028c17, 0xb9337834 + .word 0xbc8af150, 0xa0e88972, 0x00000000, 0x00000000 + .word 0x3ff7ad22, 0x08e0ecc3, 0x3ffa4e4e, 0xfeda34de + .word 0x3c6afbb4, 0xdbdadd0d, 0x00000000, 0x00000000 + .word 0x3ff7ad22, 0x08e0ecc3, 0x400299e0, 0x11188575 + .word 0xbc9a6169, 0x3fb250e5, 0x00000000, 0x00000000 + .word 0x3ff78a4c, 0x8178a4c8, 0x3ffa61be, 0x6cfec997 + .word 0xbc8c37ea, 0xb2bb5ca0, 0x00000000, 0x00000000 + .word 0x3ff78a4c, 0x8178a4c8, 0x4002a79e, 0x3a2cd2e6 + .word 0xbca5ddd4, 0x9cc9ad59, 0x00000000, 0x00000000 + .word 0x3ff767dc, 0xe434a9b1, 0x3ffa751f, 0x9447b724 + .word 0x3c82b909, 0x477e9ed1, 0x00000000, 0x00000000 + .word 0x3ff767dc, 0xe434a9b1, 0x4002b552, 0x4ae1278e + .word 0xbca2f2a9, 0x8841b934, 0x00000000, 0x00000000 + .word 0x3ff745d1, 0x745d1746, 0x3ffa8872, 0x93fd6f34 + .word 0x3c768ef2, 0x4f198721, 0x00000000, 0x00000000 + .word 0x3ff745d1, 0x745d1746, 0x4002c2fc, 0x595456a7 + .word 0xbc996f60, 0xb0fc7e96, 0x00000000, 0x00000000 + .word 0x3ff72428, 0x7f46debc, 0x3ffa9bb7, 0x8af6cabc + .word 0x3c8ba60d, 0xc999aba7, 0x00000000, 0x00000000 + .word 0x3ff72428, 0x7f46debc, 0x4002d09c, 0x7b54e03e + .word 0x3c98c747, 0xfdeda6de, 0x00000000, 0x00000000 + .word 0x3ff702e0, 0x5c0b8170, 0x3ffaaeee, 0x979b4838 + .word 0xbc91f08a, 0xef9ef6c0, 0x00000000, 0x00000000 + .word 0x3ff702e0, 0x5c0b8170, 0x4002de32, 0xc6628741 + .word 0x3ca78746, 0xc499a4f7, 0x00000000, 0x00000000 + .word 0x3ff6e1f7, 0x6b4337c7, 0x3ffac217, 0xd7e53b66 + .word 0xbc64282a, 0xaa967e4f, 0x00000000, 0x00000000 + .word 0x3ff6e1f7, 0x6b4337c7, 0x4002ebbf, 0x4fafdd4b + .word 0xbca78a73, 0xb72d5c41, 0x00000000, 0x00000000 + .word 0x3ff6c16c, 0x16c16c17, 0x3ffad533, 0x6963eefc + .word 0xbc977c4a, 0x537dbdd2, 0x00000000, 0x00000000 + .word 0x3ff6c16c, 0x16c16c17, 0x4002f942, 0x2c23c47e + .word 0xbc827c85, 0xf29db65d, 0x00000000, 0x00000000 + .word 0x3ff6a13c, 0xd1537290, 0x3ffae841, 0x693db8b4 + .word 0x3c90f773, 0xcd7a0713, 0x00000000, 0x00000000 + .word 0x3ff6a13c, 0xd1537290, 0x400306bb, 0x705ae7c3 + .word 0x3caf4933, 0x907af47a, 0x00000000, 0x00000000 + .word 0x3ff68168, 0x16816817, 0x3ffafb41, 0xf432002e + .word 0xbc7ac94a, 0xfdfe8c5b, 0x00000000, 0x00000000 + .word 0x3ff68168, 0x16816817, 0x4003142b, 0x30a929ab + .word 0x3c98dc01, 0x081a6c5c, 0x00000000, 0x00000000 + .word 0x3ff661ec, 0x6a5122f9, 0x3ffb0e35, 0x269b38f5 + .word 0xbc4f69a8, 0x05c3271a, 0x00000000, 0x00000000 + .word 0x3ff661ec, 0x6a5122f9, 0x40032191, 0x811b0a41 + .word 0xbc9ce3f0, 0xb38c0bf7, 0x00000000, 0x00000000 + .word 0x3ff642c8, 0x590b2164, 0x3ffb211b, 0x1c70d023 + .word 0x3c2e4c5e, 0x66eae2f0, 0x00000000, 0x00000000 + .word 0x3ff642c8, 0x590b2164, 0x40032eee, 0x75770416 + .word 0x3caed8e7, 0x730eaff2, 0x00000000, 0x00000000 + .word 0x3ff623fa, 0x77016240, 0x3ffb33f3, 0xf1490def + .word 0xbc95894b, 0xcb02373b, 0x00000000, 0x00000000 + .word 0x3ff623fa, 0x77016240, 0x40033c42, 0x213ee0c9 + .word 0x3ca84c24, 0x4ba98124, 0x00000000, 0x00000000 + .word 0x3ff60581, 0x60581606, 0x3ffb46bf, 0xc05aeb89 + .word 0x3c9b1c7c, 0xc39adc9f, 0x00000000, 0x00000000 + .word 0x3ff60581, 0x60581606, 0x4003498c, 0x97b10540 + .word 0x3c734193, 0xbc8543b4, 0x00000000, 0x00000000 + .word 0x3ff5e75b, 0xb8d015e7, 0x3ffb597e, 0xa47fdda3 + .word 0xbc923cc8, 0x9d1e4635, 0x00000000, 0x00000000 + .word 0x3ff5e75b, 0xb8d015e7, 0x400356cd, 0xebc9b5e2 + .word 0x3c96dee1, 0x46bb1571, 0x00000000, 0x00000000 + .word 0x3ff5c988, 0x2b931057, 0x3ffb6c30, 0xb83593e6 + .word 0x3c8f4e3f, 0xd28d84bc, 0x00000000, 0x00000000 + .word 0x3ff5c988, 0x2b931057, 0x40036406, 0x30445306 + .word 0xbca78d86, 0x2327430a, 0x00000000, 0x00000000 + .word 0x3ff5ac05, 0x6b015ac0, 0x3ffb7ed6, 0x159fadc8 + .word 0xbc899bcf, 0xf04d134b, 0x00000000, 0x00000000 + .word 0x3ff5ac05, 0x6b015ac0, 0x40037135, 0x779c8dcb + .word 0xbc8fe126, 0xce9778ae, 0x00000000, 0x00000000 + .word 0x3ff58ed2, 0x308158ed, 0x3ffb916e, 0xd68964ec + .word 0x3c826a5d, 0x5dbaae29, 0x00000000, 0x00000000 + .word 0x3ff58ed2, 0x308158ed, 0x40037e5b, 0xd40f95a1 + .word 0x3cac6ff5, 0xeca5d122, 0x00000000, 0x00000000 + .word 0x3ff571ed, 0x3c506b3a, 0x3ffba3fb, 0x14672d7c + .word 0xbc8117d3, 0x97dcefc9, 0x00000000, 0x00000000 + .word 0x3ff571ed, 0x3c506b3a, 0x40038b79, 0x579d3eab + .word 0xbcac254f, 0xc0db598e, 0x00000000, 0x00000000 + .word 0x3ff55555, 0x55555555, 0x3ffbb67a, 0xe8584caa + .word 0x3c9cec95, 0xd0b5c1e3, 0x00000000, 0x00000000 + .word 0x3ff55555, 0x55555555, 0x4003988e, 0x1409212e + .word 0x3caf40c8, 0x6450c869, 0x00000000, 0x00000000 + .word 0x3ff53909, 0x48f40feb, 0x3ffbc8ee, 0x6b2865b9 + .word 0x3c9394eb, 0x90f645c8, 0x00000000, 0x00000000 + .word 0x3ff53909, 0x48f40feb, 0x4003a59a, 0x1adbb257 + .word 0x3ca6adce, 0x020a308d, 0x00000000, 0x00000000 + .word 0x3ff51d07, 0xeae2f815, 0x3ffbdb55, 0xb550fdbc + .word 0x3c7365e9, 0x6aa5fae3, 0x00000000, 0x00000000 + .word 0x3ff51d07, 0xeae2f815, 0x4003b29d, 0x7d635662 + .word 0x3cac99b0, 0x5e282129, 0x00000000, 0x00000000 + .word 0x3ff50150, 0x15015015, 0x3ffbedb0, 0xdefaf661 + .word 0x3c91a627, 0xb279170d, 0x00000000, 0x00000000 + .word 0x3ff50150, 0x15015015, 0x4003bf98, 0x4cb56c77 + .word 0x3ca8f653, 0xbcc0c4a1, 0x00000000, 0x00000000 + .word 0x3ff4e5e0, 0xa72f0539, 0x3ffc0000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff4e5e0, 0xa72f0539, 0x4003cc8a, 0x99af5453 + .word 0xbc486364, 0x4f05f2be, 0x00000000, 0x00000000 + .word 0x3ff4cab8, 0x8725af6e, 0x3ffc1243, 0x2fec0329 + .word 0x3c96e0d7, 0x8dd23a7d, 0x00000000, 0x00000000 + .word 0x3ff4cab8, 0x8725af6e, 0x4003d974, 0x74f76df2 + .word 0x3c82e3c9, 0xfdbbbdc2, 0x00000000, 0x00000000 + .word 0x3ff4afd6, 0xa052bf5b, 0x3ffc247a, 0x85fe81fa + .word 0x3c89d8ee, 0xf6854220, 0x00000000, 0x00000000 + .word 0x3ff4afd6, 0xa052bf5b, 0x4003e655, 0xeefe1367 + .word 0x3c80eb35, 0xbb532559, 0x00000000, 0x00000000 + .word 0x3ff49539, 0xe3b2d067, 0x3ffc36a6, 0x192bf168 + .word 0xbc9083d8, 0x1a423b11, 0x00000000, 0x00000000 + .word 0x3ff49539, 0xe3b2d067, 0x4003f32f, 0x17fe8d04 + .word 0xbc905d6c, 0x1c437de0, 0x00000000, 0x00000000 + .word 0x3ff47ae1, 0x47ae147b, 0x3ffc48c6, 0x001f0ac0 + .word 0xbc92d481, 0x189efd6b, 0x00000000, 0x00000000 + .word 0x3ff47ae1, 0x47ae147b, 0x40040000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff460cb, 0xc7f5cf9a, 0x3ffc5ada, 0x513a1593 + .word 0xbc7aaedd, 0x014f5f03, 0x00000000, 0x00000000 + .word 0x3ff460cb, 0xc7f5cf9a, 0x40040cc8, 0xb6d657c2 + .word 0xbc9c05ab, 0xf480ce19, 0x00000000, 0x00000000 + .word 0x3ff446f8, 0x6562d9fb, 0x3ffc6ce3, 0x22982a3f + .word 0x3c891b2d, 0xf3e15f29, 0x00000000, 0x00000000 + .word 0x3ff446f8, 0x6562d9fb, 0x40041989, 0x4c2329f0 + .word 0x3c976037, 0x46da0ea6, 0x00000000, 0x00000000 + .word 0x3ff42d66, 0x25d51f87, 0x3ffc7ee0, 0x8a0e6d4c + .word 0x3c991c54, 0xc53e75c8, 0x00000000, 0x00000000 + .word 0x3ff42d66, 0x25d51f87, 0x40042641, 0xcf569572 + .word 0xbcadf80b, 0x1442c029, 0x00000000, 0x00000000 + .word 0x3ff41414, 0x14141414, 0x3ffc90d2, 0x9d2d43ce + .word 0xbc9edadb, 0x07f1137a, 0x00000000, 0x00000000 + .word 0x3ff41414, 0x14141414, 0x400432f2, 0x4fb01c7a + .word 0x3ca38bfe, 0x0e012c1c, 0x00000000, 0x00000000 + .word 0x3ff3fb01, 0x3fb013fb, 0x3ffca2b9, 0x714180f7 + .word 0xbc81a63d, 0x6750c57c, 0x00000000, 0x00000000 + .word 0x3ff3fb01, 0x3fb013fb, 0x40043f9a, 0xdc3f79ce + .word 0x3c66d2b1, 0x767ae30a, 0x00000000, 0x00000000 + .word 0x3ff3e22c, 0xbce4a902, 0x3ffcb495, 0x1b558d17 + .word 0x3c8fcbcb, 0x357f2308, 0x00000000, 0x00000000 + .word 0x3ff3e22c, 0xbce4a902, 0x40044c3b, 0x83e57153 + .word 0x3c98c853, 0xc6be5ee1, 0x00000000, 0x00000000 + .word 0x3ff3c995, 0xa47babe7, 0x3ffcc665, 0xb0328622 + .word 0xbc91baa4, 0xd369f814, 0x00000000, 0x00000000 + .word 0x3ff3c995, 0xa47babe7, 0x400458d4, 0x55549c1a + .word 0x3ca02d72, 0x8d9a6054, 0x00000000, 0x00000000 + .word 0x3ff3b13b, 0x13b13b14, 0x3ffcd82b, 0x446159f3 + .word 0x3c983fb7, 0xb33cdfe8, 0x00000000, 0x00000000 + .word 0x3ff3b13b, 0x13b13b14, 0x40046565, 0x5f122ff6 + .word 0x3ca862c5, 0xd2f0ca4c, 0x00000000, 0x00000000 + .word 0x3ff3991c, 0x2c187f63, 0x3ffce9e5, 0xec2bda80 + .word 0xbc94ccf3, 0xd8e249ab, 0x00000000, 0x00000000 + .word 0x3ff3991c, 0x2c187f63, 0x400471ee, 0xaf76c2c6 + .word 0x3c975c62, 0xeff26e8e, 0x00000000, 0x00000000 + .word 0x3ff38138, 0x13813814, 0x3ffcfb95, 0xbb9dcc0c + .word 0x3c92cea2, 0x0857ae03, 0x00000000, 0x00000000 + .word 0x3ff38138, 0x13813814, 0x40047e70, 0x54af0989 + .word 0x3c9d8c33, 0xc0054830, 0x00000000, 0x00000000 + .word 0x3ff3698d, 0xf3de0748, 0x3ffd0d3a, 0xc685eda4 + .word 0x3c94115a, 0x0ff4cf9e, 0x00000000, 0x00000000 + .word 0x3ff3698d, 0xf3de0748, 0x40048aea, 0x5cbc935f + .word 0xbca8cb00, 0x12d14ff5, 0x00000000, 0x00000000 + .word 0x3ff3521c, 0xfb2b78c1, 0x3ffd1ed5, 0x2076fbe9 + .word 0x3c8f48a8, 0x6b72875f, 0x00000000, 0x00000000 + .word 0x3ff3521c, 0xfb2b78c1, 0x4004975c, 0xd5768088 + .word 0xbca1731e, 0xbc02f748, 0x00000000, 0x00000000 + .word 0x3ff33ae4, 0x5b57bcb2, 0x3ffd3064, 0xdcc8ae67 + .word 0x3c93480e, 0x805158ba, 0x00000000, 0x00000000 + .word 0x3ff33ae4, 0x5b57bcb2, 0x4004a3c7, 0xcc8a358a + .word 0xbc9d8f7f, 0xd2726ffa, 0x00000000, 0x00000000 + .word 0x3ff323e3, 0x4a2b10bf, 0x3ffd41ea, 0x0e98af91 + .word 0x3c824640, 0x0309962f, 0x00000000, 0x00000000 + .word 0x3ff323e3, 0x4a2b10bf, 0x4004b02b, 0x4f7c0a88 + .word 0xbcaf71e1, 0xf6cafde2, 0x00000000, 0x00000000 + .word 0x3ff30d19, 0x0130d190, 0x3ffd5364, 0xc8cb8f86 + .word 0x3c8ad003, 0xc00630e1, 0x00000000, 0x00000000 + .word 0x3ff30d19, 0x0130d190, 0x4004bc87, 0x6ba7f6ec + .word 0x3c9c1edb, 0x2be943b8, 0x00000000, 0x00000000 + .word 0x3ff2f684, 0xbda12f68, 0x3ffd64d5, 0x1e0db1c6 + .word 0xbc911ed3, 0x6986d362, 0x00000000, 0x00000000 + .word 0x3ff2f684, 0xbda12f68, 0x4004c8dc, 0x2e423980 + .word 0xbc949d1f, 0x46ef5d2c, 0x00000000, 0x00000000 + .word 0x3ff2e025, 0xc04b8097, 0x3ffd763b, 0x20d435ef + .word 0x3c9d6780, 0xf76cb258, 0x00000000, 0x00000000 + .word 0x3ff2e025, 0xc04b8097, 0x4004d529, 0xa457fcfc + .word 0xbca1404a, 0x46484e3d, 0x00000000, 0x00000000 + .word 0x3ff2c9fb, 0x4d812ca0, 0x3ffd8796, 0xe35ddbb2 + .word 0x3c83fdd9, 0x1aeb637a, 0x00000000, 0x00000000 + .word 0x3ff2c9fb, 0x4d812ca0, 0x4004e16f, 0xdacff937 + .word 0xbca1deb9, 0xd3815ad2, 0x00000000, 0x00000000 + .word 0x3ff2b404, 0xad012b40, 0x3ffd98e8, 0x77b3e207 + .word 0xbc48c301, 0xee02dee8, 0x00000000, 0x00000000 + .word 0x3ff2b404, 0xad012b40, 0x4004edae, 0xde6b10fe + .word 0x3ca99709, 0x4a91a780, 0x00000000, 0x00000000 + .word 0x3ff29e41, 0x29e4129e, 0x3ffdaa2f, 0xefaae1d8 + .word 0xbc63fe0e, 0x03f44594, 0x00000000, 0x00000000 + .word 0x3ff29e41, 0x29e4129e, 0x4004f9e6, 0xbbc4ecb3 + .word 0x3c6ce5a6, 0x018493f1, 0x00000000, 0x00000000 + .word 0x3ff288b0, 0x1288b013, 0x3ffdbb6d, 0x5ce3a42f + .word 0xbc922c27, 0xf71c8337, 0x00000000, 0x00000000 + .word 0x3ff288b0, 0x1288b013, 0x40050617, 0x7f5491bb + .word 0xbc9e591e, 0x7b2a6d1a, 0x00000000, 0x00000000 + .word 0x3ff27350, 0xb8812735, 0x3ffdcca0, 0xd0cbf408 + .word 0x3c7a6d16, 0x2310db57, 0x00000000, 0x00000000 + .word 0x3ff27350, 0xb8812735, 0x40051241, 0x356cf6e0 + .word 0x3ca37dc2, 0x60e8bc2d, 0x00000000, 0x00000000 + .word 0x3ff25e22, 0x708092f1, 0x3ffdddca, 0x5c9f6be8 + .word 0x3c818520, 0xf0a3f809, 0x00000000, 0x00000000 + .word 0x3ff25e22, 0x708092f1, 0x40051e63, 0xea3d95b0 + .word 0x3caecf78, 0x2e88d5ce, 0x00000000, 0x00000000 + .word 0x3ff24924, 0x92492492, 0x3ffdeeea, 0x11683f49 + .word 0x3c802aae, 0x4bfa7c27, 0x00000000, 0x00000000 + .word 0x3ff24924, 0x92492492, 0x40052a7f, 0xa9d2f8ea + .word 0xbca21c62, 0xb033c079, 0x00000000, 0x00000000 + .word 0x3ff23456, 0x789abcdf, 0x3ffe0000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff23456, 0x789abcdf, 0x40053694, 0x80174810 + .word 0xbc9c3ec1, 0xa4ee7c21, 0x00000000, 0x00000000 + .word 0x3ff21fb7, 0x8121fb78, 0x3ffe110c, 0x39105faf + .word 0x3c776161, 0x4c513964, 0x00000000, 0x00000000 + .word 0x3ff21fb7, 0x8121fb78, 0x400542a2, 0x78d2d036 + .word 0xbca495c2, 0x45254df4, 0x00000000, 0x00000000 + .word 0x3ff20b47, 0x0c67c0d9, 0x3ffe220e, 0xcd13ed60 + .word 0xbc729f01, 0xf18c9dc9, 0x00000000, 0x00000000 + .word 0x3ff20b47, 0x0c67c0d9, 0x40054ea9, 0x9fac8a0f + .word 0x3c80cfbb, 0x19353b3d, 0x00000000, 0x00000000 + .word 0x3ff1f704, 0x7dc11f70, 0x3ffe3307, 0xcc56cf5c + .word 0xbc81f04e, 0xc3189131, 0x00000000, 0x00000000 + .word 0x3ff1f704, 0x7dc11f70, 0x40055aaa, 0x002a9d5a + .word 0xbc4bf504, 0x76241f94, 0x00000000, 0x00000000 + .word 0x3ff1e2ef, 0x3b3fb874, 0x3ffe43f7, 0x46f7795b + .word 0xbc931e7f, 0x8af68f8c, 0x00000000, 0x00000000 + .word 0x3ff1e2ef, 0x3b3fb874, 0x400566a3, 0xa5b2e1b1 + .word 0x3caa1fd2, 0x8cc92e33, 0x00000000, 0x00000000 + .word 0x3ff1cf06, 0xada2811d, 0x3ffe54dd, 0x4ce75f1e + .word 0xbc811b19, 0x5dfc62e5, 0x00000000, 0x00000000 + .word 0x3ff1cf06, 0xada2811d, 0x40057296, 0x9b8b5cd8 + .word 0x3ca30cbf, 0x1c53312e, 0x00000000, 0x00000000 + .word 0x3ff1bb4a, 0x4046ed29, 0x3ffe65b9, 0xedeba38e + .word 0xbc7bb732, 0x51e8c364, 0x00000000, 0x00000000 + .word 0x3ff1bb4a, 0x4046ed29, 0x40057e82, 0xecdabe8d + .word 0xbc7c2aed, 0xf3c4c4bd, 0x00000000, 0x00000000 + .word 0x3ff1a7b9, 0x611a7b96, 0x3ffe768d, 0x399dc470 + .word 0xbc9a8c81, 0x3405c01c, 0x00000000, 0x00000000 + .word 0x3ff1a7b9, 0x611a7b96, 0x40058a68, 0xa4a8d9f3 + .word 0x3ca50798, 0xe67012d9, 0x00000000, 0x00000000 + .word 0x3ff19453, 0x808ca29c, 0x3ffe8757, 0x3f6c42c5 + .word 0x3c9dbf9c, 0xf7bbcda3, 0x00000000, 0x00000000 + .word 0x3ff19453, 0x808ca29c, 0x40059647, 0xcddf1ca5 + .word 0x3ca14a95, 0xf35dea0b, 0x00000000, 0x00000000 + .word 0x3ff18118, 0x11811812, 0x3ffe9818, 0x0e9b47f2 + .word 0xbc9b6bd7, 0x4396d08e, 0x00000000, 0x00000000 + .word 0x3ff18118, 0x11811812, 0x4005a220, 0x73490377 + .word 0xbcadd036, 0x39925812, 0x00000000, 0x00000000 + .word 0x3ff16e06, 0x89427379, 0x3ffea8cf, 0xb64547ab + .word 0x3c8721b2, 0x6374e19f, 0x00000000, 0x00000000 + .word 0x3ff16e06, 0x89427379, 0x4005adf2, 0x9f948cfb + .word 0xbca42520, 0xf7716fa6, 0x00000000, 0x00000000 + .word 0x3ff15b1e, 0x5f75270d, 0x3ffeb97e, 0x455b9edb + .word 0x3c999b45, 0x40857883, 0x00000000, 0x00000000 + .word 0x3ff15b1e, 0x5f75270d, 0x4005b9be, 0x5d52a9da + .word 0x3c9098cd, 0x1b3af777, 0x00000000, 0x00000000 + .word 0x3ff1485f, 0x0e0acd3b, 0x3ffeca23, 0xcaa72f73 + .word 0x3c7e3ed5, 0x29679959, 0x00000000, 0x00000000 + .word 0x3ff1485f, 0x0e0acd3b, 0x4005c583, 0xb6f7ab03 + .word 0x3ca963bc, 0x9d795b51, 0x00000000, 0x00000000 + .word 0x3ff135c8, 0x1135c811, 0x3ffedac0, 0x54c8f94c + .word 0x3c90b5c1, 0x15a56207, 0x00000000, 0x00000000 + .word 0x3ff135c8, 0x1135c811, 0x4005d142, 0xb6dbadc5 + .word 0x3ca6f1f5, 0x5323d116, 0x00000000, 0x00000000 + .word 0x3ff12358, 0xe75d3033, 0x3ffeeb53, 0xf23ab028 + .word 0xbc8617e4, 0xb5384f5d, 0x00000000, 0x00000000 + .word 0x3ff12358, 0xe75d3033, 0x4005dcfb, 0x673b05df + .word 0xbca099df, 0xc321634f, 0x00000000, 0x00000000 + .word 0x3ff11111, 0x11111111, 0x3ffefbde, 0xb14f4eda + .word 0xbc93a145, 0xfe1be078, 0x00000000, 0x00000000 + .word 0x3ff11111, 0x11111111, 0x4005e8ad, 0xd236a58f + .word 0xbc7ef8c7, 0xc0d1fec6, 0x00000000, 0x00000000 + .word 0x3ff0fef0, 0x10fef011, 0x3fff0c60, 0xa033a7b3 + .word 0xbc91b0fc, 0x15cd89c6, 0x00000000, 0x00000000 + .word 0x3ff0fef0, 0x10fef011, 0x4005f45a, 0x01d483b4 + .word 0xbc94a237, 0xdc0fa105, 0x00000000, 0x00000000 + .word 0x3ff0ecf5, 0x6be69c90, 0x3fff1cd9, 0xcceef239 + .word 0x3c91afd8, 0x64eab60a, 0x00000000, 0x00000000 + .word 0x3ff0ecf5, 0x6be69c90, 0x40060000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff0db20, 0xa88f4696, 0x3fff2d4a, 0x45635640 + .word 0xbc8eebae, 0xea670bc2, 0x00000000, 0x00000000 + .word 0x3ff0db20, 0xa88f4696, 0x40060b9f, 0xd68a4554 + .word 0x3ca328e1, 0x70dae176, 0x00000000, 0x00000000 + .word 0x3ff0c971, 0x4fbcda3b, 0x3fff3db2, 0x174e7468 + .word 0x3c9e1513, 0x2d6ac52a, 0x00000000, 0x00000000 + .word 0x3ff0c971, 0x4fbcda3b, 0x40061739, 0x8f2aaa48 + .word 0xbc9b672b, 0xba260735, 0x00000000, 0x00000000 + .word 0x3ff0b7e6, 0xec259dc8, 0x3fff4e11, 0x5049ec26 + .word 0xbc9b6656, 0xb6bd5d76, 0x00000000, 0x00000000 + .word 0x3ff0b7e6, 0xec259dc8, 0x400622cd, 0x337f0fe8 + .word 0x3c9fe207, 0x3279559f, 0x00000000, 0x00000000 + .word 0x3ff0a681, 0x0a6810a7, 0x3fff5e67, 0xfdcbdf44 + .word 0xbc98af06, 0x1849d6fc, 0x00000000, 0x00000000 + .word 0x3ff0a681, 0x0a6810a7, 0x40062e5a, 0xcd0c3ebe + .word 0xbca2c50e, 0x2092203a, 0x00000000, 0x00000000 + .word 0x3ff0953f, 0x39010954, 0x3fff6eb6, 0x2d27730d + .word 0xbc9401d9, 0x5ca1ce34, 0x00000000, 0x00000000 + .word 0x3ff0953f, 0x39010954, 0x400639e2, 0x653e421b + .word 0xbc9f75e0, 0x5835e4b9, 0x00000000, 0x00000000 + .word 0x3ff08421, 0x08421084, 0x3fff7efb, 0xeb8d4f12 + .word 0xbc7e84e8, 0xa6ff3256, 0x00000000, 0x00000000 + .word 0x3ff08421, 0x08421084, 0x40064564, 0x0568c1c3 + .word 0x3cad1778, 0x7e4c8970, 0x00000000, 0x00000000 + .word 0x3ff07326, 0x0a47f7c6, 0x3fff8f39, 0x460c19a8 + .word 0x3c989b4e, 0x16ee9aaf, 0x00000000, 0x00000000 + .word 0x3ff07326, 0x0a47f7c6, 0x400650df, 0xb6c759f4 + .word 0x3c99063c, 0x91db4c77, 0x00000000, 0x00000000 + .word 0x3ff0624d, 0xd2f1a9fc, 0x3fff9f6e, 0x4990f227 + .word 0x3c8b42e5, 0xb5d1e808, 0x00000000, 0x00000000 + .word 0x3ff0624d, 0xd2f1a9fc, 0x40065c55, 0x827df1d2 + .word 0xbca3923d, 0xf03e1e2f, 0x00000000, 0x00000000 + .word 0x3ff05197, 0xf7d73404, 0x3fffaf9b, 0x02e7e8f2 + .word 0x3c897a76, 0x8f34e1c2, 0x00000000, 0x00000000 + .word 0x3ff05197, 0xf7d73404, 0x400667c5, 0x7199104b + .word 0x3c875b89, 0x6f332e70, 0x00000000, 0x00000000 + .word 0x3ff04104, 0x10410410, 0x3fffbfbf, 0x7ebc755f + .word 0xbc9b2a94, 0x084da0b6, 0x00000000, 0x00000000 + .word 0x3ff04104, 0x10410410, 0x4006732f, 0x8d0e2f77 + .word 0xbc93dffd, 0x470422e3, 0x00000000, 0x00000000 + .word 0x3ff03091, 0xb51f5e1a, 0x3fffcfdb, 0xc999e97d + .word 0x3c82be17, 0xecdd3bbc, 0x00000000, 0x00000000 + .word 0x3ff03091, 0xb51f5e1a, 0x40067e93, 0xddbc0e73 + .word 0xbc86eb9f, 0x32ac1a5c, 0x00000000, 0x00000000 + .word 0x3ff02040, 0x81020408, 0x3fffdfef, 0xefebe3d6 + .word 0xbc909afc, 0xfc7c1f3b, 0x00000000, 0x00000000 + .word 0x3ff02040, 0x81020408, 0x400689f2, 0x6c6b01d0 + .word 0x3cae816f, 0x9d2a1032, 0x00000000, 0x00000000 + .word 0x3ff01010, 0x10101010, 0x3fffeffb, 0xfdfebf1f + .word 0x3c95dee5, 0x1994f18b, 0x00000000, 0x00000000 + .word 0x3ff01010, 0x10101010, 0x4006954b, 0x41cd4293 + .word 0x3ca3d5bc, 0xcc443076, 0x00000000, 0x00000000 + .word 0x3ff00000, 0x00000000, 0x40000000, 0x00000000 + .word 0x00000000, 0x00000000, 0x00000000, 0x00000000 + .word 0x3ff00000, 0x00000000, 0x4006a09e, 0x667f3bcd + .word 0xbcabdd34, 0x13b26456, 0x00000000, 0x00000000 + +#define A5 %f32 +#define A4 %f30 +#define A3 %f28 +#define A2 %f26 +#define A1 %f56 + +#define DC0 %f8 +#define DC2 %f6 +#define DC3 %f4 + +#define counter %l3 +#define TBL %l5 +#define stridex %l6 +#define stridey %l7 + +#define _0x00001ff8 %i0 +#define _0x7ff00000 %o0 +#define _0x00100000 %o2 + +#define tmp_counter STACK_BIAS-0x40 +#define tmp_px STACK_BIAS-0x38 +#define tmp0 STACK_BIAS-0x30 +#define tmp1 STACK_BIAS-0x28 +#define tmp2 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x18 +#define tmp4 STACK_BIAS-0x10 +#define tmp5 STACK_BIAS-0x08 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! ((float*)&res)[0] = ((float*)px)[0]; +! ((float*)&res)[1] = ((float*)px)[1]; +! hx = *(int*)px; +! px += stridex; +! +! if ( hx >= 0x7ff00000 ) +! { +! res = sqrt(res); +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! py += stridey; +! goto next; +! } +! if ( hx < 0x00100000 ) +! { +! res = sqrt(res); +! ((float*)py)[0] = ((float*)&res)[0]; +! ((float*)py)[1] = ((float*)&res)[1]; +! py += stridey; +! goto next; +! } +! +! sqrt_exp = hx >> 21; +! sqrt_exp -= 512; +! sqrt_exp <<= 52; +! dsqrt_exp = *(double*)&sqrt_exp; +! bit = hx >> 15; +! bit &= 32; +! ind0 = hx >> 7; +! ind0 &= 0x1ff8; +! ind0 += 32; +! ind0 &= -64; +! ind1 = ind0; +! ind1 += bit; +! +! res = vis_fand(res,DC0); /* DC0 = vis_to_double(0x000fffff, 0xffffffff); */ +! res = vis_for(res,A1); /* A1 = vis_to_double(0x3fe00000, 0x00000000); */ +! res_c = vis_fpadd32(res,DC2); /* DC2 = vis_to_double(0x00001000, 0x00000000); */ +! res_c = vis_fand(res_c,DC3); /* DC3 = vis_to_double(0x7fffe000, 0x00000000); */ +! +! pind = (char*)TBL + ind1; +! dexp_hi = ((double*)pind)[1]; +! dexp_lo = ((double*)pind)[2]; +! +! dtmp0 = ((double*)pind)[0]; +! xx = (res - res_c); +! xx *= dtmp0; +! +! res = A5 * xx; +! res += A4; +! res *= xx; +! res += A3; +! res *= xx; +! res += A2; +! res *= xx; +! res += A1; +! res *= xx; +! +! res = dexp_hi * res; +! res += dexp_lo; +! res += dexp_hi; +! +! dtmp0 = vis_fpadd32(dsqrt_exp,res); +! ((float*)py)[0] = ((float*)&dtmp0)[0]; +! ((float*)py)[1] = ((float*)&dtmp0)[1]; +! py += stridey; +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vsqrt) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l5) + wr %g0,0x82,%asi + + ldd [TBL],A1 + sll %i2,3,stridex + or %g0,%i3,%o4 + + ldd [TBL+8],A2 + sll %i4,3,stridey + or %g0,0x7ff,%o0 + + ldd [TBL+16],A3 + sll %o0,20,_0x7ff00000 + or %g0,0x001,%o2 + + ldd [TBL+24],A4 + sll %o2,20,_0x00100000 + + ldd [TBL+32],A5 + ldd [TBL+40],DC0 + ldd [TBL+48],DC2 + ldd [TBL+56],DC3 + + add TBL,64,TBL + add %g0,1023,%o5 + st %i0,[%fp+tmp_counter] + + sll %o5,3,_0x00001ff8 + stx %i1,[%fp+tmp_px] + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%l2 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + lda [%l2]%asi,%o5 ! (5_1) hx = *(int*)px; + + lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0]; + + lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1]; + + cmp %o5,_0x7ff00000 ! (5_1) hx ? 0x7ff00000 + bge,pn %icc,.spec ! (5_1) if ( hx >= 0x7ff00000 ) + nop + + cmp %o5,_0x00100000 ! (5_1) hx ? 0x00100000 + bl,pn %icc,.spec ! (5_1) if ( hx < 0x00100000 ) + nop + + add %l2,stridex,%l2 ! px += stridex + fand %f10,DC0,%f50 ! (5_1) res = vis_fand(res,DC0); + + for %f50,A1,%f40 ! (5_1) res = vis_for(res,A1); + sra %o5,21,%l1 ! (5_1) sqrt_exp = hx >> 21; + sra %o5,15,%i1 ! (5_1) bit = hx >> 15; + + sra %o5,7,%o1 ! (5_1) ind0 = hx >> 7; + sub %l1,512,%o3 ! (5_1) sqrt_exp -= 512; + + and %o1,_0x00001ff8,%o1 ! (5_1) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (0_0) ((float*)&res)[0] = ((float*)px)[0]; + + add %o1,32,%o1 ! (5_1) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (0_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %i1,32,%i4 ! (5_1) bit &= 32; + and %o1,-64,%o1 ! (5_1) ind0 &= -8; + + sll %o1,0,%o7 ! (5_1) ind1 = ind0; + + sllx %o3,52,%o3 ! (5_1) sqrt_exp <<= 52; + add %o7,%i4,%l0 ! (5_1) ind1 += bit; + lda [%l2]%asi,%o5 ! (0_0) hx = *(int*)px; + + stx %o3,[%fp+tmp0] ! (5_1) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (0_0) res = vis_fand(res,DC0); + + add %l2,stridex,%l2 ! px += stridex + fpadd32 %f40,DC2,%f54 ! (5_1) res_c = vis_fpadd32(res,DC2); + + add %l0,TBL,%o1 ! (5_1) pind = (char*)TBL + ind1 + + cmp %o5,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f42 ! (0_0) res = vis_for(res,A1); +.cont0: + sra %o5,21,%l1 ! (0_0) sqrt_exp = hx >> 21; + sra %o5,15,%i2 ! (0_0) bit = hx >> 15; + ldd [%o1],%f50 ! (5_1) dtmp0 = ((double*)pind)[0]; + + sra %o5,7,%o1 ! (0_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (0_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (5_1) res_c = vis_fand(res_c,DC3); + + and %o1,_0x00001ff8,%o1 ! (0_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (1_0) ((float*)&res)[0] = ((float*)px)[0]; + + add %o1,32,%o1 ! (0_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (1_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %i2,32,%i4 ! (0_0) bit &= 32; + and %o1,-64,%o1 ! (0_0) ind0 &= -8; + fsubd %f40,%f54,%f40 ! (5_1) xx = (res - res_c); + + sll %o1,0,%o7 ! (0_0) ind1 = ind0; + + cmp %o5,_0x00100000 ! (0_0) hx ? 0x00100000 + bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 ) + lda [%l2]%asi,%o5 ! (1_0) hx = *(int*)px; +.cont1: + sllx %o3,52,%o3 ! (0_0) sqrt_exp <<= 52; + add %o7,%i4,%i1 ! (0_0) ind1 += bit; + + fmuld %f40,%f50,%f40 ! (5_1) xx *= dtmp0; + stx %o3,[%fp+tmp1] ! (0_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (1_0) res = vis_fand(res,DC0); + + add %l2,stridex,%l2 ! px += stridex + fpadd32 %f42,DC2,%f54 ! (0_0) res_c = vis_fpadd32(res,DC2); + + add %i1,TBL,%o1 ! (0_0) pind = (char*)TBL + ind1 + + cmp %o5,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f14 ! (1_0) res = vis_for(res,A1); +.cont2: + sra %o5,21,%l1 ! (1_0) sqrt_exp = hx >> 21; + sra %o5,15,%g5 ! (1_0) bit = hx >> 15; + ldd [%o1],%f50 ! (0_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f40,%f52 ! (5_1) res = A5 * xx; + sra %o5,7,%o1 ! (1_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (1_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (0_0) res_c = vis_fand(res_c,DC3); + + and %o1,_0x00001ff8,%o1 ! (1_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (2_0) ((float*)&res)[0] = ((float*)px)[0]; + + add %o1,32,%o1 ! (1_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (2_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %g5,32,%i4 ! (1_0) bit &= 32; + and %o1,-64,%o1 ! (1_0) ind0 &= -8; + fsubd %f42,%f54,%f42 ! (0_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (1_0) ind1 = ind0; + faddd %f52,A4,%f54 ! (5_1) res += A4; + + cmp %o5,_0x00100000 ! (1_0) hx ? 0x00100000 + bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 ) + lda [%l2]%asi,%o5 ! (2_0) hx = *(int*)px; +.cont3: + sllx %o3,52,%o3 ! (1_0) sqrt_exp <<= 52; + add %o7,%i4,%i2 ! (1_0) ind1 += bit; + + fmuld %f42,%f50,%f42 ! (0_0) xx *= dtmp0; + stx %o3,[%fp+tmp2] ! (1_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (2_0) res = vis_fand(res,DC0); + + fmuld %f54,%f40,%f34 ! (5_1) res *= xx; + fpadd32 %f14,DC2,%f54 ! (1_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + + add %i2,TBL,%o1 ! (1_0) pind = (char*)TBL + ind1 + + cmp %o5,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f18 ! (2_0) res = vis_for(res,A1); +.cont4: + sra %o5,21,%l1 ! (2_0) sqrt_exp = hx >> 21; + sra %o5,15,%g1 ! (2_0) bit = hx >> 15; + ldd [%o1],%f50 ! (1_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f42,%f52 ! (0_0) res = A5 * xx; + sra %o5,7,%o1 ! (2_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (2_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (1_0) res_c = vis_fand(res_c,DC3); + + and %o1,_0x00001ff8,%o1 ! (2_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (3_0) ((float*)&res)[0] = ((float*)px)[0]; + faddd %f34,A3,%f62 ! (5_1) res += A3; + + add %o1,32,%o1 ! (2_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (3_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %g1,32,%i4 ! (2_0) bit &= 32; + and %o1,-64,%o1 ! (2_0) ind0 &= -8; + fsubd %f14,%f54,%f14 ! (1_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (2_0) ind1 = ind0; + faddd %f52,A4,%f54 ! (0_0) res += A4; + + fmuld %f62,%f40,%f52 ! (5_1) res *= xx; + cmp %o5,_0x00100000 ! (2_0) hx ? 0x00100000 + bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 ) + lda [%l2]%asi,%o5 ! (3_0) hx = *(int*)px; +.cont5: + sllx %o3,52,%o3 ! (2_0) sqrt_exp <<= 52; + add %o7,%i4,%g5 ! (2_0) ind1 += bit; + + fmuld %f14,%f50,%f14 ! (1_0) xx *= dtmp0; + stx %o3,[%fp+tmp3] ! (2_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (3_0) res = vis_fand(res,DC0); + + fmuld %f54,%f42,%f34 ! (0_0) res *= xx; + fpadd32 %f18,DC2,%f54 ! (2_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + + add %g5,TBL,%o1 ! (2_0) pind = (char*)TBL + ind1 + faddd %f52,A2,%f20 ! (5_1) res += A2; + + cmp %o5,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f44 ! (3_0) res = vis_for(res,A1); +.cont6: + sra %o5,21,%l1 ! (3_0) sqrt_exp = hx >> 21; + sra %o5,15,%i3 ! (3_0) bit = hx >> 15; + ldd [%o1],%f50 ! (2_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f14,%f52 ! (1_0) res = A5 * xx; + sra %o5,7,%o1 ! (3_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (3_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (2_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f40,%f20 ! (5_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (3_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (4_0) ((float*)&res)[0] = ((float*)px)[0]; + faddd %f34,A3,%f62 ! (0_0) res += A3; + + add %o1,32,%o1 ! (3_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (4_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %i3,32,%i4 ! (3_0) bit &= 32; + and %o1,-64,%o1 ! (3_0) ind0 &= -8; + fsubd %f18,%f54,%f18 ! (2_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (3_0) ind1 = ind0; + faddd %f52,A4,%f54 ! (1_0) res += A4; + + fmuld %f62,%f42,%f52 ! (0_0) res *= xx; + cmp %o5,_0x00100000 ! (3_0) hx ? 0x00100000 + bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (5_1) res += A1; +.cont7: + lda [%l2]%asi,%o5 ! (4_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (3_0) sqrt_exp <<= 52; + add %o7,%i4,%g1 ! (3_0) ind1 += bit; + + fmuld %f18,%f50,%f18 ! (2_0) xx *= dtmp0; + add %l0,TBL,%l0 ! (5_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp4] ! (3_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (4_0) res = vis_fand(res,DC0); + + fmuld %f54,%f14,%f34 ! (1_0) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%l0+16],%f36 ! (5_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f44,DC2,%f54 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f40,%f12 ! (5_1) res *= xx; + add %g1,TBL,%o1 ! (3_0) (char*)div_arr+ind0 + ldd [%l0+8],%f40 ! (5_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (0_0) res += A2; + + cmp %o5,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 ) + for %f50,A1,%f24 ! (4_0) res = vis_for(res,A1); +.cont8: + sra %o5,21,%l1 ! (4_0) sqrt_exp = hx >> 21; + sra %o5,15,%l0 ! (4_0) bit = hx >> 15; + ldd [%o1],%f22 ! (3_0) dtmp0 = ((double*)pind)[0]; + + fmuld A5,%f18,%f52 ! (2_0) res = A5 * xx; + sra %o5,7,%o1 ! (4_0) ind0 = hx >> 7; + sub %l1,512,%o3 ! (4_0) sqrt_exp -= 512; + fand %f54,DC3,%f54 ! (3_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f42,%f20 ! (0_0) res *= xx; + and %o1,_0x00001ff8,%o1 ! (4_0) ind0 &= 0x1ff8; + lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0]; + faddd %f34,A3,%f62 ! (1_0) res += A3; + + fmuld %f40,%f12,%f34 ! (5_1) res = dexp_hi * res; + add %o1,32,%o1 ! (4_0) ind0 += 32; + lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1]; + + and %l0,32,%i4 ! (4_0) bit &= 32; + cmp %o5,_0x00100000 ! (4_0) hx ? 0x00100000 + bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 ) + fsubd %f44,%f54,%f44 ! (3_0) xx = (res - res_c); +.cont9: + and %o1,-64,%o1 ! (4_0) ind0 &= -8; + faddd %f52,A4,%f54 ! (2_0) res += A4; + + cmp counter,6 + bl,pn %icc,.tail + or %g0,%o4,%l0 + + ba .main_loop + nop + + .align 16 +.main_loop: + fmuld %f62,%f14,%f52 ! (1_1) res *= xx; + sll %o1,0,%i3 ! (4_1) ind1 = ind0; + add %i1,TBL,%i1 ! (0_1) pind = (char*)TBL + ind1; + faddd %f20,A1,%f12 ! (0_1) res += A1; + + lda [%l2]%asi,%o5 ! (5_1) hx = *(int*)px; + sllx %o3,52,%o3 ! (4_1) sqrt_exp <<= 52; + add %i3,%i4,%i3 ! (4_1) ind1 += bit; + faddd %f34,%f36,%f60 ! (5_2) res += dexp_lo; + + fmuld %f44,%f22,%f44 ! (3_1) xx *= dtmp0; + add %l2,stridex,%l2 ! px += stridex + stx %o3,[%fp+tmp5] ! (4_1) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (5_1) res = vis_fand(res,DC0); + + fmuld %f54,%f18,%f34 ! (2_1) res *= xx; + nop + ldd [%i1+16],%f36 ! (0_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f24,DC2,%f54 ! (4_1) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f42,%f16 ! (0_1) res *= xx; + sra %o5,21,%l1 ! (5_1) sqrt_exp = hx >> 21; + ldd [%i1+8],%f42 ! (0_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (1_1) res += A2; + + ldd [%fp+tmp0],%f48 ! (5_2) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (5_1) hx ? 0x7ff00000 + bge,pn %icc,.update10 ! (5_1) if ( hx >= 0x7ff00000 ) + faddd %f60,%f40,%f60 ! (5_2) res += dexp_hi; +.cont10: + lda [%l2]%asi,%f10 ! (0_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%i1 ! (5_1) bit = hx >> 15; + add %i3,TBL,%o7 ! (4_1) pind = (char*)TBL + ind1 + for %f50,A1,%f40 ! (5_1) res = vis_for(res,A1); + + fmuld A5,%f44,%f52 ! (3_1) res = A5 * xx; + sra %o5,7,%o1 ! (5_1) ind0 = hx >> 7; + ldd [%o7],%f22 ! (4_1) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (4_1) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f14,%f20 ! (1_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (5_1) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (5_1) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (2_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (5_2) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (5_1) ind0 += 32; + st %f12,[%l0] ! (5_2) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f42,%f16,%f34 ! (0_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (0_0) ((float*)&res)[1] = ((float*)px)[1]; + and %i1,32,%i4 ! (5_1) bit &= 32; + and %o1,-64,%o1 ! (5_1) ind0 &= -8; + fsubd %f24,%f54,%f24 ! (4_1) xx = (res - res_c); + + sll %o1,0,%o7 ! (5_1) ind1 = ind0; + add %l0,stridey,%i1 ! py += stridey + st %f13,[%l0+4] ! (5_2) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (3_1) res += A4; + + fmuld %f62,%f18,%f52 ! (2_1) res *= xx; + cmp %o5,_0x00100000 ! (5_1) hx ? 0x00100000 + bl,pn %icc,.update11 ! (5_1) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (1_1) res += A1; +.cont11: + sllx %o3,52,%o3 ! (5_1) sqrt_exp <<= 52; + add %o7,%i4,%l0 ! (5_1) ind1 += bit; + lda [%l2]%asi,%o5 ! (0_0) hx = *(int*)px; + faddd %f34,%f36,%f60 ! (0_1) res += dexp_lo; + + fmuld %f24,%f22,%f24 ! (4_1) xx *= dtmp0; + add %i2,TBL,%i2 ! (1_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp0] ! (5_1) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (0_0) res = vis_fand(res,DC0); + + fmuld %f54,%f44,%f34 ! (3_1) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%i2+16],%f36 ! (1_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f40,DC2,%f54 ! (5_1) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f14,%f16 ! (1_1) res *= xx; + sra %o5,21,%l1 ! (0_0) sqrt_exp = hx >> 21; + ldd [%i2+8],%f14 ! (1_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (2_1) res += A2; + + ldd [%fp+tmp1],%f48 ! (0_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 + bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f42,%f60 ! (0_1) res += dexp_hi; +.cont12: + lda [%l2]%asi,%f10 ! (1_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%i2 ! (0_0) bit = hx >> 15; + add %l0,TBL,%o7 ! (5_1) pind = (char*)TBL + ind1 + for %f50,A1,%f42 ! (0_0) res = vis_for(res,A1); + + fmuld A5,%f24,%f52 ! (4_1) res = A5 * xx; + sra %o5,7,%o1 ! (0_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (5_1) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (5_1) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f18,%f20 ! (2_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (0_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (0_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (3_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (0_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (0_0) ind0 += 32; + st %f12,[%i1] ! (0_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f14,%f16,%f34 ! (1_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (1_0) ((float*)&res)[1] = ((float*)px)[1]; + and %i2,32,%i4 ! (0_0) bit &= 32; + and %o1,-64,%o1 ! (0_0) ind0 &= -8; + fsubd %f40,%f54,%f40 ! (5_1) xx = (res - res_c); + + sll %o1,0,%o7 ! (0_0) ind1 = ind0; + add %i1,stridey,%i2 ! py += stridey + st %f13,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (4_1) res += A4; + + fmuld %f62,%f44,%f52 ! (3_1) res *= xx; + cmp %o5,_0x00100000 ! (0_0) hx ? 0x00100000 + bl,pn %icc,.update13 ! (0_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (2_1) res += A1; +.cont13: + lda [%l2]%asi,%o5 ! (1_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (0_0) sqrt_exp <<= 52; + add %o7,%i4,%i1 ! (0_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (1_1) res += dexp_lo; + + fmuld %f40,%f22,%f40 ! (5_1) xx *= dtmp0; + add %g5,TBL,%g5 ! (2_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp1] ! (0_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (1_0) res = vis_fand(res,DC0); + + fmuld %f54,%f24,%f34 ! (4_1) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%g5+16],%f36 ! (2_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f42,DC2,%f54 ! (0_0) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f18,%f16 ! (2_1) res *= xx; + sra %o5,21,%l1 ! (1_0) sqrt_exp = hx >> 21; + ldd [%g5+8],%f18 ! (2_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (3_1) res += A2; + + ldd [%fp+tmp2],%f48 ! (1_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 + bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f14,%f60 ! (1_1) res += dexp_hi; +.cont14: + lda [%l2]%asi,%f10 ! (2_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%g5 ! (1_0) bit = hx >> 15; + add %i1,TBL,%o7 ! (0_0) pind = (char*)TBL + ind1 + for %f50,A1,%f14 ! (1_0) res = vis_for(res,A1); + + fmuld A5,%f40,%f52 ! (5_1) res = A5 * xx; + sra %o5,7,%o1 ! (1_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (0_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (0_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f44,%f20 ! (3_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (1_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (1_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (4_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (1_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (1_0) ind0 += 32; + st %f12,[%i2] ! (1_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f18,%f16,%f34 ! (2_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (2_0) ((float*)&res)[1] = ((float*)px)[1]; + and %g5,32,%i4 ! (1_0) bit &= 32; + and %o1,-64,%o1 ! (1_0) ind0 &= -8; + fsubd %f42,%f54,%f42 ! (0_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (1_0) ind1 = ind0; + add %i2,stridey,%g5 ! py += stridey + st %f13,[%i2+4] ! (1_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (5_1) res += A4; + + fmuld %f62,%f24,%f52 ! (4_1) res *= xx; + cmp %o5,_0x00100000 ! (1_0) hx ? 0x00100000 + bl,pn %icc,.update15 ! (1_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (3_1) res += A1; +.cont15: + lda [%l2]%asi,%o5 ! (2_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (1_0) sqrt_exp <<= 52; + add %o7,%i4,%i2 ! (1_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (2_1) res += dexp_lo; + + fmuld %f42,%f22,%f42 ! (0_0) xx *= dtmp0; + add %g1,TBL,%g1 ! (3_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp2] ! (1_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (2_0) res = vis_fand(res,DC0); + + fmuld %f54,%f40,%f34 ! (5_1) res *= xx; + fpadd32 %f14,DC2,%f54 ! (1_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + ldd [%g1+16],%f36 ! (3_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f44,%f16 ! (3_1) res *= xx; + sra %o5,21,%l1 ! (2_0) sqrt_exp = hx >> 21; + ldd [%g1+8],%f44 ! (3_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (4_1) res += A2; + + ldd [%fp+tmp3],%f48 ! (2_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 + bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f18,%f60 ! (2_1) res += dexp_hi; +.cont16: + lda [%l2]%asi,%f10 ! (3_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%g1 ! (2_0) bit = hx >> 15; + add %i2,TBL,%o7 ! (1_0) pind = (char*)TBL + ind1 + for %f50,A1,%f18 ! (2_0) res = vis_for(res,A1); + + fmuld A5,%f42,%f52 ! (0_0) res = A5 * xx; + sra %o5,7,%o1 ! (2_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (1_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (1_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f24,%f20 ! (4_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (2_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (2_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (5_1) res += A3; + + fpadd32 %f48,%f60,%f12 ! (2_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (2_0) ind0 += 32; + st %f12,[%g5] ! (2_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f44,%f16,%f34 ! (3_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (3_0) ((float*)&res)[1] = ((float*)px)[1]; + and %g1,32,%i4 ! (2_0) bit &= 32; + and %o1,-64,%o1 ! (2_0) ind0 &= -8; + fsubd %f14,%f54,%f14 ! (1_0) xx = (res - res_c); + + sll %o1,0,%o7 ! (2_0) ind1 = ind0; + add %g5,stridey,%g1 ! py += stridey + st %f13,[%g5+4] ! (2_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (0_0) res += A4; + + fmuld %f62,%f40,%f52 ! (5_1) res *= xx; + cmp %o5,_0x00100000 ! (2_0) hx ? 0x00100000 + bl,pn %icc,.update17 ! (2_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (4_1) res += A1; +.cont17: + lda [%l2]%asi,%o5 ! (3_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (2_0) sqrt_exp <<= 52; + add %o7,%i4,%g5 ! (2_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (3_1) res += dexp_lo; + + fmuld %f14,%f22,%f14 ! (1_0) xx *= dtmp0; + add %i3,TBL,%i3 ! (4_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp3] ! (2_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (3_0) res = vis_fand(res,DC0); + + fmuld %f54,%f42,%f34 ! (0_0) res *= xx; + fpadd32 %f18,DC2,%f54 ! (2_0) res_c = vis_fpadd32(res,DC2); + add %l2,stridex,%l2 ! px += stridex + ldd [%i3+16],%f36 ! (4_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f24,%f16 ! (4_1) res *= xx; + sra %o5,21,%l1 ! (3_0) sqrt_exp = hx >> 21; + ldd [%i3+8],%f24 ! (4_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (5_1) res += A2; + + ldd [%fp+tmp4],%f48 ! (3_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 + bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f44,%f60 ! (3_1) res += dexp_hi; +.cont18: + lda [%l2]%asi,%f10 ! (4_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%i3 ! (3_0) bit = hx >> 15; + add %g5,TBL,%o7 ! (2_0) pind = (char*)TBL + ind1 + for %f50,A1,%f44 ! (3_0) res = vis_for(res,A1); + + fmuld A5,%f14,%f52 ! (1_0) res = A5 * xx; + sra %o5,7,%o1 ! (3_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (2_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (2_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f40,%f20 ! (5_1) res *= xx; + and %o1,_0x00001ff8,%o1 ! (3_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (3_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (0_0) res += A3; + + fpadd32 %f48,%f60,%f12 ! (3_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + add %o1,32,%o1 ! (3_0) ind0 += 32; + st %f12,[%g1] ! (3_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + fmuld %f24,%f16,%f34 ! (4_1) res = dexp_hi * res; + + lda [%l2+4]%asi,%f11 ! (4_0) ((float*)&res)[1] = ((float*)px)[1]; + and %i3,32,%i4 ! (3_0) bit &= 32; + and %o1,-64,%o1 ! (3_0) ind0 &= -8; + fsubd %f18,%f54,%f18 ! (2_0) xx = (res - res_c); + + or %g0,%o1,%o7 ! (3_0) ind1 = ind0; + add %g1,stridey,%i3 ! py += stridey + st %f13,[%g1+4] ! (3_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (1_0) res += A4; + + fmuld %f62,%f42,%f52 ! (0_0) res *= xx; + cmp %o5,_0x00100000 ! (3_0) hx ? 0x00100000 + bl,pn %icc,.update19 ! (3_0) if ( hx < 0x00100000 ) + faddd %f20,A1,%f12 ! (5_1) res += A1; +.cont19: + lda [%l2]%asi,%o5 ! (4_0) hx = *(int*)px; + sllx %o3,52,%o3 ! (3_0) sqrt_exp <<= 52; + add %o7,%i4,%g1 ! (3_0) ind1 += bit; + faddd %f34,%f36,%f60 ! (4_1) res += dexp_lo; + + fmuld %f18,%f22,%f18 ! (2_0) xx *= dtmp0; + add %l0,TBL,%l0 ! (5_1) pind = (char*)TBL + ind1; + stx %o3,[%fp+tmp4] ! (3_0) dsqrt_exp = *(double*)&sqrt_exp; + fand %f10,DC0,%f50 ! (4_0) res = vis_fand(res,DC0); + + fmuld %f54,%f14,%f34 ! (1_0) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%l0+16],%f36 ! (5_1) dexp_lo = ((double*)pind)[2]; + fpadd32 %f44,DC2,%f54 ! (3_0) res_c = vis_fpadd32(res,DC2); + + fmuld %f12,%f40,%f16 ! (5_1) res *= xx; + sra %o5,21,%l1 ! (4_0) sqrt_exp = hx >> 21; + ldd [%l0+8],%f40 ! (5_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (0_0) res += A2; + + ldd [%fp+tmp5],%f48 ! (4_1) dsqrt_exp = *(double*)&sqrt_exp; + cmp %o5,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 + bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7ff00000 ) + faddd %f60,%f24,%f60 ! (4_1) res += dexp_hi; +.cont20: + lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0]; + sra %o5,15,%l0 ! (4_0) bit = hx >> 15; + add %g1,TBL,%o7 ! (3_0) (char*)div_arr+ind0 + for %f50,A1,%f24 ! (4_0) res = vis_for(res,A1); + + fmuld A5,%f18,%f52 ! (2_0) res = A5 * xx; + sra %o5,7,%o1 ! (4_0) ind0 = hx >> 7; + ldd [%o7],%f22 ! (3_0) dtmp0 = ((double*)pind)[0]; + fand %f54,DC3,%f54 ! (3_0) res_c = vis_fand(res_c,DC3); + + fmuld %f20,%f42,%f20 ! (0_0) res *= xx; + and %o1,_0x00001ff8,%o1 ! (4_0) ind0 &= 0x1ff8; + sub %l1,512,%o3 ! (4_0) sqrt_exp -= 512; + faddd %f34,A3,%f62 ! (1_0) res += A3; + + lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1]; + add %o1,32,%o1 ! (4_0) ind0 += 32; + fpadd32 %f48,%f60,%f12 ! (4_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + fmuld %f40,%f16,%f34 ! (5_1) res = dexp_hi * res; + + and %l0,32,%i4 ! (4_0) bit &= 32; + cmp %o5,_0x00100000 ! (4_0) hx ? 0x00100000 + bl,pn %icc,.update21 ! (4_0) if ( hx < 0x00100000 ) + fsubd %f44,%f54,%f44 ! (3_0) xx = (res - res_c); +.cont21: + and %o1,-64,%o1 ! (4_0) ind0 &= -8; + sub counter,6,counter ! counter + st %f12,[%i3] ! (4_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + faddd %f52,A4,%f54 ! (2_0) res += A4; + + st %f13,[%i3+4] ! (4_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + cmp counter,6 + bge,pt %icc,.main_loop + add %i3,stridey,%l0 ! py += stridey + +.tail: + subcc counter,1,counter + bneg .begin + or %g0,%l0,%o4 + + fmuld %f62,%f14,%f52 ! (1_1) res *= xx; + add %i1,TBL,%i1 ! (0_1) pind = (char*)TBL + ind1; + faddd %f20,A1,%f12 ! (0_1) res += A1; + + faddd %f34,%f36,%f60 ! (5_2) res += dexp_lo; + + fmuld %f44,%f22,%f44 ! (3_1) xx *= dtmp0; + add %l2,stridex,%l2 ! px += stridex + + fmuld %f54,%f18,%f34 ! (2_1) res *= xx; + ldd [%i1+16],%f36 ! (0_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f42,%f12 ! (0_1) res *= xx; + ldd [%i1+8],%f42 ! (0_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (1_1) res += A2; + + ldd [%fp+tmp0],%f48 ! (5_2) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f40,%f60 ! (5_2) res += dexp_hi; + + fmuld A5,%f44,%f52 ! (3_1) res = A5 * xx; + + fmuld %f20,%f14,%f20 ! (1_1) res *= xx; + faddd %f34,A3,%f62 ! (2_1) res += A3; + + fmuld %f42,%f12,%f34 ! (0_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (5_2) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%l0] ! (5_2) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %l0,stridey,%i1 ! py += stridey + st %f13,[%l0+4] ! (5_2) ((float*)py)[1] = ((float*)&dtmp0)[1]; + faddd %f52,A4,%f54 ! (3_1) res += A4; + + subcc counter,1,counter + bneg .begin + or %g0,%i1,%o4 + + fmuld %f62,%f18,%f52 ! (2_1) res *= xx; + faddd %f20,A1,%f12 ! (1_1) res += A1; + + faddd %f34,%f36,%f60 ! (0_1) res += dexp_lo; + + add %i2,TBL,%i2 ! (1_1) pind = (char*)TBL + ind1; + + fmuld %f54,%f44,%f34 ! (3_1) res *= xx; + add %l2,stridex,%l2 ! px += stridex + ldd [%i2+16],%f36 ! (1_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f14,%f12 ! (1_1) res *= xx; + ldd [%i2+8],%f14 ! (1_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (2_1) res += A2; + + ldd [%fp+tmp1],%f48 ! (0_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f42,%f60 ! (0_1) res += dexp_hi; + + fmuld %f20,%f18,%f20 ! (2_1) res *= xx; + faddd %f34,A3,%f62 ! (3_1) res += A3; + + fmuld %f14,%f12,%f34 ! (1_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (0_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%i1] ! (0_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %i1,stridey,%i2 ! py += stridey + st %f13,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + subcc counter,1,counter + bneg .begin + or %g0,%i2,%o4 + + fmuld %f62,%f44,%f52 ! (3_1) res *= xx; + faddd %f20,A1,%f12 ! (2_1) res += A1; + + faddd %f34,%f36,%f60 ! (1_1) res += dexp_lo; + + add %g5,TBL,%g5 ! (2_1) pind = (char*)TBL + ind1; + + add %l2,stridex,%l2 ! px += stridex + ldd [%g5+16],%f36 ! (2_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f18,%f12 ! (2_1) res *= xx; + ldd [%g5+8],%f18 ! (2_1) dexp_hi = ((double*)pind)[1]; + faddd %f52,A2,%f20 ! (3_1) res += A2; + + ldd [%fp+tmp2],%f48 ! (1_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f14,%f60 ! (1_1) res += dexp_hi; + + fmuld %f20,%f44,%f20 ! (3_1) res *= xx; + + fmuld %f18,%f12,%f34 ! (2_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (1_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%i2] ! (1_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %i2,stridey,%g5 ! py += stridey + st %f13,[%i2+4] ! (1_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + subcc counter,1,counter + bneg .begin + or %g0,%g5,%o4 + + faddd %f20,A1,%f12 ! (3_1) res += A1; + + faddd %f34,%f36,%f60 ! (2_1) res += dexp_lo; + + add %g1,TBL,%g1 ! (3_1) pind = (char*)TBL + ind1; + + add %l2,stridex,%l2 ! px += stridex + ldd [%g1+16],%f36 ! (3_1) dexp_lo = ((double*)pind)[2]; + + fmuld %f12,%f44,%f12 ! (3_1) res *= xx; + ldd [%g1+8],%f44 ! (3_1) dexp_hi = ((double*)pind)[1]; + + ldd [%fp+tmp3],%f48 ! (2_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f18,%f60 ! (2_1) res += dexp_hi; + + fmuld %f44,%f12,%f34 ! (3_1) res = dexp_hi * res; + fpadd32 %f48,%f60,%f12 ! (2_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%g5] ! (2_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %g5,stridey,%g1 ! py += stridey + st %f13,[%g5+4] ! (2_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + subcc counter,1,counter + bneg .begin + or %g0,%g1,%o4 + + faddd %f34,%f36,%f60 ! (3_1) res += dexp_lo; + + add %l2,stridex,%l2 ! px += stridex + + ldd [%fp+tmp4],%f48 ! (3_1) dsqrt_exp = *(double*)&sqrt_exp; + faddd %f60,%f44,%f60 ! (3_1) res += dexp_hi; + + fpadd32 %f48,%f60,%f12 ! (3_1) dtmp0 = vis_fpadd32(dsqrt_exp,res); + + st %f12,[%g1] ! (3_1) ((float*)py)[0] = ((float*)&dtmp0)[0]; + + add %g1,stridey,%i3 ! py += stridey + st %f13,[%g1+4] ! (3_1) ((float*)py)[1] = ((float*)&dtmp0)[1]; + + ba .begin + or %g0,%i3,%o4 + + .align 16 +.spec: + fsqrtd %f10,%f10 + add %l2,stridex,%l2 + + st %f10,[%o4] + st %f11,[%o4+4] + + add %o4,stridey,%o4 + ba .begin1 + sub counter,1,counter + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + ble .cont9 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + ble .cont10 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + ble .cont11 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,7 + ble .cont12 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + or %g0,7,counter + + .align 16 +.update13: + cmp counter,7 + ble .cont13 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + or %g0,7,counter + + .align 16 +.update14: + cmp counter,8 + ble .cont14 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + or %g0,8,counter + + .align 16 +.update15: + cmp counter,8 + ble .cont15 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + or %g0,8,counter + + .align 16 +.update16: + cmp counter,9 + ble .cont16 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,9,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + or %g0,9,counter + + .align 16 +.update17: + cmp counter,9 + ble .cont17 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,9,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + or %g0,9,counter + + .align 16 +.update18: + cmp counter,10 + ble .cont18 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,10,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + or %g0,10,counter + + .align 16 +.update19: + cmp counter,10 + ble .cont19 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,10,counter + st counter,[%fp+tmp_counter] + + ba .cont19 + or %g0,10,counter + + .align 16 +.update20: + cmp counter,11 + ble .cont20 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,11,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + or %g0,11,counter + + .align 16 +.update21: + cmp counter,11 + ble .cont21 + nop + + sub %l2,stridex,%i5 + stx %i5,[%fp+tmp_px] + + sub counter,11,counter + st counter,[%fp+tmp_counter] + + ba .cont21 + or %g0,11,counter + +.exit: + ret + restore + + SET_SIZE(__vsqrt) + diff --git a/usr/src/libm/src/mvec/vis/__vsqrtf.S b/usr/src/libm/src/mvec/vis/__vsqrtf.S new file mode 100644 index 0000000..0f321f7 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsqrtf.S @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsqrtf.S 1.4 06/01/23 SMI" + + .file "__vsqrtf.S" + +#include "libm.h" + + .section ".text" + .file "__vsqrtf.S" + + ENTRY(__vsqrtf) + + lda [%o1]0x82,%f0 + subcc %o0,1,%o0 + bneg,pn %icc,.exit + sll %o2,2,%o2 + ba .loop + sll %o4,2,%o4 + + .align 16 +.loop: + fsqrts %f0,%f2 + lda [%o1+%o2]0x82,%f0 + add %o1,%o2,%o1 + subcc %o0,1,%o0 + st %f2,[%o3] + bpos,pt %icc,.loop + add %o3,%o4,%o3 +.exit: + retl + nop + + SET_SIZE(__vsqrtf) + diff --git a/usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S b/usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S new file mode 100644 index 0000000..ca41db5 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S @@ -0,0 +1,993 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vsqrtf_ultra3.S 1.6 06/01/23 SMI" + + .file "__vsqrtf_ultra3.S" + +#include "libm.h" +#if defined(LIBMVEC_SO_BUILD) + .weak __vsqrtf + .type __vsqrtf,#function + __vsqrtf = __vsqrtf_ultra3 +#endif + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01 + .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01 + .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff + .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000 + .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000 + +#define DC0 %f6 +#define DC1 %f4 +#define DC2 %f2 +#define K2 %f38 +#define K1 %f36 +#define TBL %l2 +#define stridex %l3 +#define stridey %l4 +#define _0x1ff0 %l5 +#define counter %l6 +#define _0x00800000 %l7 +#define _0x7f800000 %o0 + +#define tmp_px STACK_BIAS-0x40 +#define tmp_counter STACK_BIAS-0x38 +#define tmp0 STACK_BIAS-0x30 +#define tmp1 STACK_BIAS-0x28 +#define tmp2 STACK_BIAS-0x20 +#define tmp3 STACK_BIAS-0x18 +#define tmp4 STACK_BIAS-0x10 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x40 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! +! x0 = *px; +! ax = *(int*)px; +! px += stridex; +! +! if( ax >= 0x7f800000 ) +! { +! *py = sqrtf(x0); +! py += stridey; +! continue; +! } +! if( ax < 0x00800000 ) +! { +! *py = sqrtf(x0); +! py += stridey; +! continue; +! } +! +! db0 = (double)x0; +! iexp0 = ax >> 24; +! iexp0 += 0x3c0; +! lexp0 = (long long)iexp0 << 52; +! +! db0 = vis_fand(db0,DC0); +! db0 = vis_for(db0,DC1); +! hi0 = vis_fand(db0,DC2); +! +! ax >>= 11; +! si0 = ax & 0x1ff0; +! dtmp0 = ((double*)((char*)TBL + si0))[0]; +! xx0 = (db0 - hi0); +! xx0 *= dtmp0; +! dtmp0 = ((double*)((char*)TBL + si0))[1] +! res0 = K2 * xx0; +! res0 += K1; +! res0 *= xx0; +! res0 += DC1; +! res0 = dtmp0 * res0; +! dtmp1 = *((double*)&lexp0); +! res0 *= dtmp1; +! fres0 = (float)res0; +! *py = fres0; +! py += stridey; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vsqrtf_ultra3) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,o2) + PIC_SET(l7,__vlibm_TBL_sqrtf,l2) + + st %i0,[%fp+tmp_counter] + sll %i2,2,stridex + or %g0,0xff8,%l5 + + stx %i1,[%fp+tmp_px] + sll %l5,1,_0x1ff0 + + ldd [%o2],K1 + sll %i4,2,stridey + + ldd [%o2+8],K2 + or %g0,%i3,%g5 + + ldd [%o2+16],DC0 + sethi %hi(0x7f800000),%o0 + + ldd [%o2+24],DC1 + sethi %hi(0x00800000),%l7 + + ldd [%o2+32],DC2 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i1 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%i1]0x82,%o2 ! (2_0) ax = *(int*)px; + + or %g0,%i1,%o7 + lda [%i1]0x82,%f25 ! (2_0) x0 = *px; + + cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.spec ! (2_0) if( ax >= 0x7f800000 ) + nop + + cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 + bl,pn %icc,.spec ! (2_0) if( ax < 0x00800000 ) + nop + + fstod %f25,%f56 ! (2_0) db0 = (double)x0; + + lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; + + sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; + + add %o7,stridex,%i1 ! px += stridex + add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; + lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; + fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); + + cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update0 ! (3_0) if( ax >= 0x7f800000 ) + nop +.cont0: + sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; + + sra %o2,11,%i2 ! (2_0) ax >>= 11; + stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); + for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); + + cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 + bl,pn %icc,.update1 ! (3_0) if( ax < 0x00800000 ) + nop +.cont1: + fstod %f0,%f48 ! (3_0) db0 = (double)x0; + + and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; + + add %i1,stridex,%i1 ! px += stridex + add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 + fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); + + sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; + + lda [%i1]0x82,%f13 ! (4_0) x0 = *px; + fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); + + add %o4,960,%i0 ! (3_0) iexp0 += 0x3c0; + + cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000 + bge,pn %icc,.update2 ! (4_1) if( ax >= 0x7f800000 ) + nop +.cont2: + fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0); + sllx %i0,52,%g1 ! (3_1) lexp0 = (long long)iexp0 << 52; + ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + sra %o1,11,%l0 ! (3_1) ax >>= 11; + stx %g1,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0); + for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000 + bl,pn %icc,.update3 ! (4_1) if( ax < 0x00800000 ) + nop +.cont3: + fstod %f13,%f50 ! (4_1) db0 = (double)x0; + + fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0; + and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px; + + add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0 + fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2); + + sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24; + + add %i1,stridex,%o4 ! px += stridex + add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px; + fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0; + cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000 + bge,pn %icc,.update4 ! (0_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0); +.cont4: + sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52; + ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + sra %o2,11,%i5 ! (4_1) ax >>= 11; + stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0); + for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1); + + cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000 + bl,pn %icc,.update5 ! (0_0) if( ax < 0x00800000 ) + nop +.cont5: + fstod %f17,%f56 ! (0_0) db0 = (double)x0; + + fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0; + lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px; + faddd %f52,K1,%f52 ! (2_1) res0 += K1; + + sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24; + and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0; + fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2); + + add %o4,stridex,%i1 ! px += stridex + + add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0; + add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px; + fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0; + cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000 + bge,pn %icc,.update6 ! (1_0) if( ax >= 0x7f800000 ) + fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0); +.cont6: + fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0; + sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + sra %l1,11,%i4 ! (0_0) ax >>= 11; + stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0); + for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1); + + cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000 + bl,pn %icc,.update7 ! (1_0) if( ax < 0x00800000 ) + nop +.cont7: + fstod %f21,%f56 ! (1_0) db0 = (double)x0; + + fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0; + and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px; + faddd %f50,K1,%f62 ! (3_1) res0 += K1; + + add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0 + fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2); + + sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24; + ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f58 ! (2_1) res0 += DC1; + + add %i1,stridex,%o7 ! px += stridex + add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px; + fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0; + cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.update8 ! (2_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0); +.cont8: + fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0; + sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0; + sra %i0,11,%g1 ! (1_0) ax >>= 11; + stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0); + for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 + bl,pn %icc,.update9 ! (2_0) if( ax < 0x00800000 ) + ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0); + fstod %f25,%f56 ! (2_0) db0 = (double)x0; +.cont9: + fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0; + and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0; + lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; + faddd %f50,K1,%f34 ! (4_1) res0 += K1; + + add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0 + fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2); + + fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1; + sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; + ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f54,DC1,%f58 ! (3_1) res0 += DC1; + + add %o7,stridex,%i1 ! px += stridex + add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; + lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; + fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0; + cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update10 ! (3_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0); +.cont10: + fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0; + sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + + fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0; + sra %o2,11,%i2 ! (2_0) ax >>= 11; + stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); + for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); + + cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 + bl,pn %icc,.update11 ! (3_0) if( ax < 0x00800000 ) + ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0); + fstod %f0,%f48 ! (3_0) db0 = (double)x0; +.cont11: + fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0; + and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; + faddd %f50,K1,%f56 ! (0_0) res0 += K1; + + add %i1,stridex,%i1 ! px += stridex + add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 + fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); + + fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1; + sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; + ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f54 ! (4_1) res0 += DC1; + + lda [%i1]0x82,%f13 ! (4_0) x0 = *px; + fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); + + or %g0,%g5,%i3 + cmp counter,5 + bl,pn %icc,.tail + add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0; + + ba .main_loop + sub counter,5,counter ! counter + + .align 16 +.main_loop: + fmuld K2,%f30,%f60 ! (1_1) res0 = K2 * xx0; + cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000 + bge,pn %icc,.update12 ! (4_1) if( ax >= 0x7f800000 ) + fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0); +.cont12: + fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0; + sllx %g5,52,%g5 ! (3_1) lexp0 = (long long)iexp0 << 52; + ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f32,%f15 ! (2_2) fres0 = (float)res0; + + fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0; + sra %o1,11,%l0 ! (3_1) ax >>= 11; + stx %g5,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0); + for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000 + bl,pn %icc,.update13 ! (4_1) if( ax < 0x00800000 ) + ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0); + fstod %f13,%f50 ! (4_1) db0 = (double)x0; +.cont13: + fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0; + and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px; + faddd %f60,K1,%f32 ! (1_1) res0 += K1; + + add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0 + add %i3,stridey,%o3 ! py += stridey + st %f15,[%i3] ! (2_2) *py = fres0; + fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2); + + fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1; + sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24; + ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f34 ! (0_1) res0 += DC1; + + add %i1,stridex,%o4 ! px += stridex + add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px; + fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0; + cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000 + bge,pn %icc,.update14 ! (0_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0); +.cont14: + fmuld %f32,%f30,%f48 ! (1_1) res0 *= xx0; + sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52; + ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f28,%f19 ! (3_2) fres0 = (float)res0; + + fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0; + sra %o2,11,%i5 ! (4_1) ax >>= 11; + stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0); + for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1); + + cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000 + bl,pn %icc,.update15 ! (0_0) if( ax < 0x00800000 ) + ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0); + fstod %f17,%f56 ! (0_0) db0 = (double)x0; +.cont15: + fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0; + add %o3,stridey,%g5 ! py += stridey + lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px; + faddd %f52,K1,%f52 ! (2_1) res0 += K1; + + sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24; + and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0; + st %f19,[%o3] ! (3_2) *py = fres0; + fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2); + + fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1; + add %o4,stridex,%i1 ! px += stridex + ldd [%i4+8],%f60 ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f48,DC1,%f58 ! (1_1) res0 += DC1; + + add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0; + add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0 + lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px; + fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0; + cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000 + bge,pn %icc,.update16 ! (1_0) if( ax >= 0x7f800000 ) + fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0); +.cont16: + fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0; + sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f44,%f23 ! (4_2) fres0 = (float)res0; + + fmuld %f60,%f58,%f44 ! (1_1) res0 = dtmp0 * res0; + sra %l1,11,%i4 ! (0_0) ax >>= 11; + stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0); + for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1); + + cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000 + bl,pn %icc,.update17 ! (1_0) if( ax < 0x00800000 ) + ldd [%fp+tmp4],%f34 ! (1_1) dtmp1 = *((double*)&lexp0); + fstod %f21,%f56 ! (1_0) db0 = (double)x0; +.cont17: + fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0; + and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px; + faddd %f50,K1,%f62 ! (3_1) res0 += K1; + + add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0 + add %g5,stridey,%g5 ! py += stridey + st %f23,[stridey+%o3] ! (4_2) *py = fres0; + fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2); + + fmuld %f44,%f34,%f44 ! (1_1) res0 *= dtmp1; + sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24; + ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f58 ! (2_1) res0 += DC1; + + add %i1,stridex,%o7 ! px += stridex + add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0; + lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px; + fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0; + cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000 + bge,pn %icc,.update18 ! (2_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0); +.cont18: + fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0; + sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f40,%f27 ! (0_1) fres0 = (float)res0; + + fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0; + sra %i0,11,%g1 ! (1_0) ax >>= 11; + stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0); + for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1); + + cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000 + bl,pn %icc,.update19 ! (2_0) if( ax < 0x00800000 ) + ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0); + fstod %f25,%f56 ! (2_0) db0 = (double)x0; +.cont19: + fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0; + and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0; + lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px; + faddd %f50,K1,%f34 ! (4_1) res0 += K1; + + add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0 + add %g5,stridey,%g1 ! py += stridey + st %f27,[%g5] ! (0_1) *py = fres0; + fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2); + + fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1; + sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24; + ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f54,DC1,%f58 ! (3_1) res0 += DC1; + + add %o7,stridex,%i1 ! px += stridex + add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0; + lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px; + fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0); + + fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0; + cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000 + bge,pn %icc,.update20 ! (3_0) if( ax >= 0x7f800000 ) + fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0); +.cont20: + fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0; + sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52; + ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0]; + fdtos %f44,%f8 ! (1_1) fres0 = (float)res0; + + fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0; + sra %o2,11,%i2 ! (2_0) ax >>= 11; + stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0); + for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1); + + cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000 + bl,pn %icc,.update21 ! (3_0) if( ax < 0x00800000 ) + ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0); + fstod %f0,%f48 ! (3_0) db0 = (double)x0; +.cont21: + fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0; + and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0; + lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px; + faddd %f50,K1,%f56 ! (0_0) res0 += K1; + + add %i1,stridex,%i1 ! px += stridex + add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0 + st %f8,[stridey+%g5] ! (1_1) *py = fres0; + fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2); + + fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1; + sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24; + ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f54 ! (4_1) res0 += DC1; + + add %g1,stridey,%i3 ! py += stridey + subcc counter,5,counter ! counter + lda [%i1]0x82,%f13 ! (4_0) x0 = *px; + fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0); + + bpos,pt %icc,.main_loop + add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0; + + add counter,5,counter +.tail: + subcc counter,1,counter + bneg,a .begin + or %g0,%i3,%g5 + + fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0; + fdtos %f32,%f15 ! (2_2) fres0 = (float)res0; + + fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0; + + ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0); + + add %i3,stridey,%o3 ! py += stridey + st %f15,[%i3] ! (2_2) *py = fres0; + + subcc counter,1,counter + bneg,a .begin + or %g0,%o3,%g5 + + fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1; + ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1] + faddd %f52,DC1,%f34 ! (0_1) res0 += DC1; + + fdtos %f28,%f19 ! (3_2) fres0 = (float)res0; + + fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0; + + ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0); + + add %o3,stridey,%g5 ! py += stridey + + st %f19,[%o3] ! (3_2) *py = fres0; + + subcc counter,1,counter + bneg,a .begin + nop + + fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1; + + fdtos %f44,%f23 ! (4_2) fres0 = (float)res0; + + add %g5,stridey,%g5 ! py += stridey + st %f23,[stridey+%o3] ! (4_2) *py = fres0; + + subcc counter,1,counter + bneg,a .begin + nop + + fdtos %f40,%f27 ! (0_1) fres0 = (float)res0; + + st %f27,[%g5] ! (0_1) *py = fres0; + + ba .begin + add %g5,stridey,%g5 + + .align 16 +.spec: + fsqrts %f25,%f25 + sub counter,1,counter + add %i1,stridex,%i1 + st %f25,[%g5] + ba .begin1 + add %g5,stridey,%g5 + + .align 16 +.update0: + cmp counter,1 + ble .cont0 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o1 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + ble .cont1 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + clr %o1 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + ble .cont2 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + ble .cont3 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + clr %o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + ble .cont4 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + sethi %hi(0x7f800000),%l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + ble .cont5 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + clr %l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + ble .cont6 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + ble .cont7 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + clr %i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + ble .cont8 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + ble .cont9 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + clr %o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + ble .cont10 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + ble .cont11 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + clr %o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,2 + ble .cont12 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont12 + or %g0,2,counter + + .align 16 +.update13: + cmp counter,2 + ble .cont13 + fzeros %f13 + + stx %i1,[%fp+tmp_px] + clr %o2 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + ba .cont13 + or %g0,2,counter + + .align 16 +.update14: + cmp counter,3 + ble .cont14 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + sethi %hi(0x7f800000),%l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont14 + or %g0,3,counter + + .align 16 +.update15: + cmp counter,3 + ble .cont15 + fzeros %f17 + + stx %o4,[%fp+tmp_px] + clr %l1 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + ba .cont15 + or %g0,3,counter + + .align 16 +.update16: + cmp counter,4 + ble .cont16 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont16 + or %g0,4,counter + + .align 16 +.update17: + cmp counter,4 + ble .cont17 + fzeros %f21 + + stx %i1,[%fp+tmp_px] + clr %i0 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + ba .cont17 + or %g0,4,counter + + .align 16 +.update18: + cmp counter,5 + ble .cont18 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + sethi %hi(0x7f800000),%o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont18 + or %g0,5,counter + + .align 16 +.update19: + cmp counter,5 + ble .cont19 + fzeros %f25 + + stx %o7,[%fp+tmp_px] + clr %o2 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + ba .cont19 + or %g0,5,counter + + .align 16 +.update20: + cmp counter,6 + ble .cont20 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + sethi %hi(0x7f800000),%o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont20 + or %g0,6,counter + + .align 16 +.update21: + cmp counter,6 + ble .cont21 + fzeros %f0 + + stx %i1,[%fp+tmp_px] + clr %o1 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + ba .cont21 + or %g0,6,counter + +.exit: + ret + restore + SET_SIZE(__vsqrtf_ultra3) + diff --git a/usr/src/libm/src/mvec/vlog_.c b/usr/src/libm/src/mvec/vlog_.c new file mode 100644 index 0000000..5370857 --- /dev/null +++ b/usr/src/libm/src/mvec/vlog_.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vlog_.c 1.7 06/01/31 SMI" + +extern void __vlog( int, double *, int, double *, int ); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include +#define sysinfo _sysinfo +#include + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vlog_ultra3( int, double *, int, double *, int ); +#endif + +#pragma weak vlog_ = __vlog_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vlog_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vlog_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vlog_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vlog( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vlog */ +void +__vlog_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vlog( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +#ifdef CHECK_ULTRA3 +/* m-function for ultra3 version of parallel vlog */ +void +__vlog_ultra3_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vlog_ultra3( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} +#endif + +void +__vlog_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vlog_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } +#endif + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { +#ifdef CHECK_ULTRA3 + if (u & 2) + __vlog_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vlog( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + +#ifdef CHECK_ULTRA3 + if (u & 2) + m.MFunctionPtr = &__vlog_ultra3_mfunc; + else +#endif + m.MFunctionPtr = &__vlog_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vlogf_.c b/usr/src/libm/src/mvec/vlogf_.c new file mode 100644 index 0000000..7761780 --- /dev/null +++ b/usr/src/libm/src/mvec/vlogf_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vlogf_.c 1.4 06/01/31 SMI" + +extern void __vlogf( int, float *, int, float *, int ); + +#pragma weak vlogf_ = __vlogf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vlogf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vlogf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vlogf */ +void +__vlogf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vlogf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vlogf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vlogf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vlogf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vpow_.c b/usr/src/libm/src/mvec/vpow_.c new file mode 100644 index 0000000..40fa4ee --- /dev/null +++ b/usr/src/libm/src/mvec/vpow_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vpow_.c 1.5 06/01/31 SMI" + +extern void __vpow( int, double *, int, double *, int, double *, int ); + +#pragma weak vpow_ = __vpow_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vpow_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez ) +{ + __vpow( *n, x, *stridex, y, *stridey, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vpow */ +void +__vpow_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vpow( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy, zp + sz * LowerBound, sz ); +} + +void +__vpow_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vpow( *n, x, *stridex, y, *stridey, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vpow_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vpowf_.c b/usr/src/libm/src/mvec/vpowf_.c new file mode 100644 index 0000000..f26703b --- /dev/null +++ b/usr/src/libm/src/mvec/vpowf_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vpowf_.c 1.4 06/01/31 SMI" + +extern void __vpowf( int, float *, int, float *, int, float *, int ); + +#pragma weak vpowf_ = __vpowf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vpowf_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez ) +{ + __vpowf( *n, x, *stridex, y, *stridey, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vpowf */ +void +__vpowf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vpowf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy, zp + sz * LowerBound, sz ); +} + +void +__vpowf_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vpowf( *n, x, *stridex, y, *stridey, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vpowf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vrhypot_.c b/usr/src/libm/src/mvec/vrhypot_.c new file mode 100644 index 0000000..c01d141 --- /dev/null +++ b/usr/src/libm/src/mvec/vrhypot_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vrhypot_.c 1.4 06/01/31 SMI" + +extern void __vrhypot( int, double *, int, double *, int, double *, int ); + +#pragma weak vrhypot_ = __vrhypot_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vrhypot_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez ) +{ + __vrhypot( *n, x, *stridex, y, *stridey, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vrhypot */ +void +__vrhypot_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vrhypot( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy, zp + sz * LowerBound, sz ); +} + +void +__vrhypot_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vrhypot( *n, x, *stridex, y, *stridey, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vrhypot_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vrhypotf_.c b/usr/src/libm/src/mvec/vrhypotf_.c new file mode 100644 index 0000000..5af9ad6 --- /dev/null +++ b/usr/src/libm/src/mvec/vrhypotf_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vrhypotf_.c 1.4 06/01/31 SMI" + +extern void __vrhypotf( int, float *, int, float *, int, float *, int ); + +#pragma weak vrhypotf_ = __vrhypotf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vrhypotf_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez ) +{ + __vrhypotf( *n, x, *stridex, y, *stridey, z, *stridez ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp, *zp; +static int sx, sy, sz; + +/* m-function for parallel vrhypotf */ +void +__vrhypotf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vrhypotf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy, zp + sz * LowerBound, sz ); +} + +void +__vrhypotf_( int *n, float *x, int *stridex, float *y, int *stridey, + float *z, int *stridez ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vrhypotf( *n, x, *stridex, y, *stridey, z, *stridez ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + + m.MFunctionPtr = &__vrhypotf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vrsqrt_.c b/usr/src/libm/src/mvec/vrsqrt_.c new file mode 100644 index 0000000..99d536f --- /dev/null +++ b/usr/src/libm/src/mvec/vrsqrt_.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vrsqrt_.c 1.4 06/01/31 SMI" + +extern void __vrsqrt( int, double *, int, double *, int ); + +#pragma weak vrsqrt_ = __vrsqrt_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vrsqrt_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + __vrsqrt( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vrsqrt */ +void +__vrsqrt_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vrsqrt( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vrsqrt_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vrsqrt( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vrsqrt_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif + diff --git a/usr/src/libm/src/mvec/vrsqrtf_.c b/usr/src/libm/src/mvec/vrsqrtf_.c new file mode 100644 index 0000000..f0d5d84 --- /dev/null +++ b/usr/src/libm/src/mvec/vrsqrtf_.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vrsqrtf_.c 1.4 06/01/31 SMI" + +extern void __vrsqrtf( int, float *, int, float *, int ); + +#pragma weak vrsqrtf_ = __vrsqrtf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vrsqrtf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vrsqrtf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vrsqrtf */ +void +__vrsqrtf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vrsqrtf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vrsqrtf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vrsqrtf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vrsqrtf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif + diff --git a/usr/src/libm/src/mvec/vsin_.c b/usr/src/libm/src/mvec/vsin_.c new file mode 100644 index 0000000..542cfb1 --- /dev/null +++ b/usr/src/libm/src/mvec/vsin_.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vsin_.c 1.8 06/01/31 SMI" + +extern void __vsin( int, double *, int, double *, int ); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include +#define sysinfo _sysinfo +#include + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vsin_ultra3( int, double *, int, double *, int ); +#endif + +#pragma weak vsin_ = __vsin_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vsin_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vsin_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vsin_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vsin( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vsin */ +void +__vsin_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsin( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +#ifdef CHECK_ULTRA3 +/* m-function for ultra3 version of parallel vsin */ +void +__vsin_ultra3_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsin_ultra3( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} +#endif + +void +__vsin_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vsin_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } +#endif + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { +#ifdef CHECK_ULTRA3 + if (u & 2) + __vsin_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vsin( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + +#ifdef CHECK_ULTRA3 + if (u & 2) + m.MFunctionPtr = &__vsin_ultra3_mfunc; + else +#endif + m.MFunctionPtr = &__vsin_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vsincos_.c b/usr/src/libm/src/mvec/vsincos_.c new file mode 100644 index 0000000..d356992 --- /dev/null +++ b/usr/src/libm/src/mvec/vsincos_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vsincos_.c 1.5 06/01/31 SMI" + +extern void __vsincos( int, double *, int, double *, int, double *, int ); + +#pragma weak vsincos_ = __vsincos_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vsincos_( int *n, double *x, int *stridex, double *s, int *strides, + double *c, int *stridec ) +{ + __vsincos( *n, x, *stridex, s, *strides, c, *stridec ); +} + +#else + +#include "mtsk.h" + +static double *xp, *sp, *cp; +static int sx, ss, sc; + +/* m-function for parallel vsincos */ +void +__vsincos_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsincos( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + sp + ss * LowerBound, ss, cp + sc * LowerBound, sc ); +} + +void +__vsincos_( int *n, double *x, int *stridex, double *s, int *strides, + double *c, int *stridec ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vsincos( *n, x, *stridex, s, *strides, c, *stridec ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + sp = s; + ss = *strides; + cp = c; + sc = *stridec; + + m.MFunctionPtr = &__vsincos_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vsincosf_.c b/usr/src/libm/src/mvec/vsincosf_.c new file mode 100644 index 0000000..9111404 --- /dev/null +++ b/usr/src/libm/src/mvec/vsincosf_.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vsincosf_.c 1.4 06/01/31 SMI" + +extern void __vsincosf( int, float *, int, float *, int, float *, int ); + +#pragma weak vsincosf_ = __vsincosf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vsincosf_( int *n, float *x, int *stridex, float *s, int *strides, + float *c, int *stridec ) +{ + __vsincosf( *n, x, *stridex, s, *strides, c, *stridec ); +} + +#else + +#include "mtsk.h" + +static float *xp, *sp, *cp; +static int sx, ss, sc; + +/* m-function for parallel vsincosf */ +void +__vsincosf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsincosf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + sp + ss * LowerBound, ss, cp + sc * LowerBound, sc ); +} + +void +__vsincosf_( int *n, float *x, int *stridex, float *s, int *strides, + float *c, int *stridec ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vsincosf( *n, x, *stridex, s, *strides, c, *stridec ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + sp = s; + ss = *strides; + cp = c; + sc = *stridec; + + m.MFunctionPtr = &__vsincosf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vsinf_.c b/usr/src/libm/src/mvec/vsinf_.c new file mode 100644 index 0000000..47075c6 --- /dev/null +++ b/usr/src/libm/src/mvec/vsinf_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vsinf_.c 1.4 06/01/31 SMI" + +extern void __vsinf( int, float *, int, float *, int ); + +#pragma weak vsinf_ = __vsinf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vsinf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + __vsinf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vsinf */ +void +__vsinf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsinf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vsinf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vsinf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vsinf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vsqrt_.c b/usr/src/libm/src/mvec/vsqrt_.c new file mode 100644 index 0000000..c0d6bbf --- /dev/null +++ b/usr/src/libm/src/mvec/vsqrt_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vsqrt_.c 1.4 06/01/31 SMI" + +extern void __vsqrt( int, double *, int, double *, int ); + +#pragma weak vsqrt_ = __vsqrt_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vsqrt_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + __vsqrt( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vsqrt */ +void +__vsqrt_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsqrt( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +void +__vsqrt_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vsqrt( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vsqrt_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vsqrtf_.c b/usr/src/libm/src/mvec/vsqrtf_.c new file mode 100644 index 0000000..2b88a10 --- /dev/null +++ b/usr/src/libm/src/mvec/vsqrtf_.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vsqrtf_.c 1.5 06/01/31 SMI" + +extern void __vsqrtf( int, float *, int, float *, int ); + +#if !defined(LIBMVEC_SO_BUILD) +#if defined(ARCH_v8plusa) || defined(ARCH_v8plusb) || defined(ARCH_v9a) || defined(ARCH_v9b) +#define CHECK_ULTRA3 +#endif +#endif /* !defined(LIBMVEC_SO_BUILD) */ + +#ifdef CHECK_ULTRA3 +#include +#define sysinfo _sysinfo +#include + +#define BUFLEN 257 + +static int use_ultra3 = 0; + +extern void __vsqrtf_ultra3( int, float *, int, float *, int ); +#endif + +#pragma weak vsqrtf_ = __vsqrtf_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vsqrtf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vsqrtf_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } + if (u & 2) + __vsqrtf_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vsqrtf( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static float *xp, *yp; +static int sx, sy; + +/* m-function for parallel vsqrtf */ +void +__vsqrtf_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsqrtf( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} + +#ifdef CHECK_ULTRA3 +/* m-function for ultra3 version of parallel vsqrtf */ +void +__vsqrtf_ultra3_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vsqrtf_ultra3( UpperBound - LowerBound + 1, xp + sx * LowerBound, sx, + yp + sy * LowerBound, sy ); +} +#endif + +void +__vsqrtf_( int *n, float *x, int *stridex, float *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; +#ifdef CHECK_ULTRA3 + int u; + char buf[BUFLEN]; + + u = use_ultra3; + if (!u) { + /* use __vsqrtf_ultra3 on Cheetah (and ???) */ + if (sysinfo(SI_ISALIST, buf, BUFLEN) > 0 && !strncmp(buf, "sparcv9+vis2", 12)) + u = 3; + else + u = 1; + use_ultra3 = u; + } +#endif + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { +#ifdef CHECK_ULTRA3 + if (u & 2) + __vsqrtf_ultra3( *n, x, *stridex, y, *stridey ); + else +#endif + __vsqrtf( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + +#ifdef CHECK_ULTRA3 + if (u & 2) + m.MFunctionPtr = &__vsqrtf_ultra3_mfunc; + else +#endif + m.MFunctionPtr = &__vsqrtf_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vz_abs_.c b/usr/src/libm/src/mvec/vz_abs_.c new file mode 100644 index 0000000..d73d883 --- /dev/null +++ b/usr/src/libm/src/mvec/vz_abs_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vz_abs_.c 1.5 06/01/31 SMI" + +extern void __vz_abs( int, double *, int, double *, int ); + +#pragma weak vz_abs_ = __vz_abs_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vz_abs_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + __vz_abs( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vz_abs */ +void +__vz_abs_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vz_abs( UpperBound - LowerBound + 1, xp + ( sx << 1) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy ); +} + +void +__vz_abs_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vz_abs( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vz_abs_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vz_exp_.c b/usr/src/libm/src/mvec/vz_exp_.c new file mode 100644 index 0000000..f741e38 --- /dev/null +++ b/usr/src/libm/src/mvec/vz_exp_.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vz_exp_.c 1.5 06/01/31 SMI" + +extern void __vz_exp( int, double *, int, double *, int, double * ); + +#pragma weak vz_exp_ = __vz_exp_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vz_exp_( int *n, double *x, int *stridex, double *y, int *stridey, + double *tmp ) +{ + __vz_exp( *n, x, *stridex, y, *stridey, tmp ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp, *tp; +static int sx, sy; + +/* m-function for parallel vz_exp */ +void +__vz_exp_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vz_exp( UpperBound - LowerBound + 1, xp + ( sx << 1 ) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy, tp + LowerBound ); +} + +void +__vz_exp_( int *n, double *x, int *stridex, double *y, int *stridey, + double *tmp ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vz_exp( *n, x, *stridex, y, *stridey, tmp ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + tp = tmp; + + m.MFunctionPtr = &__vz_exp_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vz_log_.c b/usr/src/libm/src/mvec/vz_log_.c new file mode 100644 index 0000000..e772bae --- /dev/null +++ b/usr/src/libm/src/mvec/vz_log_.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vz_log_.c 1.5 06/01/31 SMI" + +extern void __vz_log( int, double *, int, double *, int ); + +#pragma weak vz_log_ = __vz_log_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vz_log_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + __vz_log( *n, x, *stridex, y, *stridey ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp; +static int sx, sy; + +/* m-function for parallel vz_log */ +void +__vz_log_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vz_log( UpperBound - LowerBound + 1, xp + ( sx << 1) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy ); +} + +void +__vz_log_( int *n, double *x, int *stridex, double *y, int *stridey ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vz_log( *n, x, *stridex, y, *stridey ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + + m.MFunctionPtr = &__vz_log_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/mvec/vz_pow_.c b/usr/src/libm/src/mvec/vz_pow_.c new file mode 100644 index 0000000..2dd9935 --- /dev/null +++ b/usr/src/libm/src/mvec/vz_pow_.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)vz_pow_.c 1.5 06/01/31 SMI" + +extern void __vz_pow( int, double *, int, double *, int, double *, int, + double * ); + +#pragma weak vz_pow_ = __vz_pow_ + +#ifndef LIBMTSK_BASED + +/* just invoke the serial function */ +void +__vz_pow_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez, double *tmp ) +{ + __vz_pow( *n, x, *stridex, y, *stridey, z, *stridez, tmp ); +} + +#else + +#include "mtsk.h" + +static double *xp, *yp, *zp, *tp; +static int sx, sy, sz; + +/* m-function for parallel vz_pow */ +void +__vz_pow_mfunc( struct MFunctionBlock *MFunctionBlockPtr, int LowerBound, + int UpperBound, int Step ) +{ + __vz_pow( UpperBound - LowerBound + 1, xp + ( sx << 1 ) * LowerBound, sx, + yp + ( sy << 1 ) * LowerBound, sy, zp + ( sz << 1 ) * LowerBound, sz, + tp + LowerBound ); +} + +void +__vz_pow_( int *n, double *x, int *stridex, double *y, int *stridey, + double *z, int *stridez, double *tmp ) +{ + struct MFunctionBlock m; + int i; + + /* if ncpus < 2, we are already in a parallel construct, or there + aren't enough vector elements to bother parallelizing, just + invoke the serial function */ + i = __mt_getncpus_(); + if ( i < 2 || *n < ( i << 3 ) || __mt_inepc_() || __mt_inapc_() ) + { + __vz_pow( *n, x, *stridex, y, *stridey, z, *stridez, tmp ); + return; + } + + /* should be safe, we already know we're not in a parallel region */ + xp = x; + sx = *stridex; + yp = y; + sy = *stridey; + zp = z; + sz = *stridez; + tp = tmp; + + m.MFunctionPtr = &__vz_pow_mfunc; + m.LowerBound = 0; + m.UpperBound = *n - 1; + m.Step = 1; + __mt_dopar_vfun_( m.MFunctionPtr, m.LowerBound, m.UpperBound, m.Step ); +} + +#endif diff --git a/usr/src/libm/src/sparc/common/copysign.S b/usr/src/libm/src/sparc/common/copysign.S new file mode 100644 index 0000000..4b62829 --- /dev/null +++ b/usr/src/libm/src/sparc/common/copysign.S @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)copysign.S 1.7 06/01/23 SMI" + + .file "copysign.S" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(copysign,function) +#include "libm_synonyms.h" + + ENTRY(copysign) + sethi %hi(0x80000000),%o3 + andn %o0,%o3,%o0 + and %o2,%o3,%o2 + or %o2,%o0,%o0 + std %o0,[%sp+0x48] + retl + ldd [%sp+0x48],%f0 + + SET_SIZE(copysign) diff --git a/usr/src/libm/src/sparc/common/fabs.S b/usr/src/libm/src/sparc/common/fabs.S new file mode 100644 index 0000000..20af1ed --- /dev/null +++ b/usr/src/libm/src/sparc/common/fabs.S @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)fabs.S 1.23 06/01/23 SMI" + + .file "fabs.S" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(fabs,function) +#include "libm_synonyms.h" + + ENTRY(fabs) + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + std %o0,[%sp+0x48] + nop + nop + nop + nop + nop + nop + retl + ldd [%sp+0x48],%f0 + + SET_SIZE(fabs) diff --git a/usr/src/libm/src/sparc/common/libm.m4 b/usr/src/libm/src/sparc/common/libm.m4 new file mode 100644 index 0000000..be148dd --- /dev/null +++ b/usr/src/libm/src/sparc/common/libm.m4 @@ -0,0 +1,2635 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, include this CDDL HEADER in each +! file and include the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! +! @(#)libm.m4 1.118 06/01/31 SMI +! +undefine(`_C')dnl +define(`_C',`')dnl +ifdef(`ELFOBJ', `define(NAME,$1)' , `define(NAME,_$1)')dnl +ifdef(`ARCH_v7', `define(NO_MULDIV)')dnl +ifdef(`ARCH_v7', `define(NO_FSMULD)')dnl +ifdef(`ARCH_v8a', `define(NO_FSMULD)')dnl +ifdef(`ARCH_v8plusa', `define(ARCH_v8plus)')dnl +ifdef(`ARCH_v8plusb', `define(ARCH_v8plus)')dnl +ifdef(`ARCH_v8plusb', `define(ARCH_v8plusa)')dnl +dnl +ifdef(`NO_FSMULD', `dnl + .inline NAME(r_hypot_),2 + ld [%o0],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + ld [%o0],%f0 ! load result with first argument + bne 2f + nop + fabss %f0,%f0 + ld [%o1],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +2: + ld [%o1],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + bne 4f + nop + ld [%o1],%f0 ! second argument inf + fabss %f0,%f0 + ld [%o0],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +4: + fstod %f0,%f0 + ld [%o1],%f3 + fmuld %f0,%f0,%f0 + fstod %f3,%f2 + fmuld %f2,%f2,%f2 + faddd %f2,%f0,%f0 + fsqrtd %f0,%f0 + fdtos %f0,%f0 +5: + .end + + .inline NAME(__c_abs),1 + ld [%o0],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + ld [%o0],%f0 + bne 2f + nop + fabss %f0,%f0 + ld [%o0+4],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +2: + ld [%o0+4],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + bne 4f + nop + ld [%o0+4],%f0 + fabss %f0,%f0 + ld [%o0],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +! store to 8-aligned address +4: + fstod %f0,%f0 + ld [%o0+4],%f3 + fmuld %f0,%f0,%f0 + fstod %f3,%f2 + fmuld %f2,%f2,%f2 + faddd %f2,%f0,%f0 + fsqrtd %f0,%f0 + fdtos %f0,%f0 +5: + .end + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_mult)(c, a, b) +! complex *c, *a, *b; +! { + .inline NAME(__Fc_mult),3 +! 21 c->real = (a->real * b->real) - (a->imag * b->imag) + + ld [%o1+4],%f0 + ld [%o2+4],%f1 + fstod %f0,%f2 ! f2 = a->imag + fstod %f1,%f4 ! f4 = b->imag + fmuld %f2,%f4,%f6 ! f6 = (a->imag * b->imag) + ld [%o1],%f0 + ld [%o2],%f1 + fstod %f0,%f8 ! f8 = a->real + fstod %f1,%f10 ! f10 = b->real + fmuld %f8,%f10,%f0 + fsubd %f0,%f6,%f6 +! 22 c->imag = (a->real * b->imag) + (a->imag * b->real) + + fmuld %f2,%f10,%f2 ! f2 = a->imag * b->real + fmuld %f8,%f4,%f4 ! f4 = a->real * b->imag + faddd %f2,%f4,%f4 + fdtos %f6,%f0 + fdtos %f4,%f1 + st %f0,[%o0] + st %f1,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_div)(c, a, b) +! complex *c, *a, *b +! { + .inline NAME(__Fc_div),3 + ld [%o2+4],%o3 + sethi %hi(0x7fffffff),%o4 + or %o4,%lo(0x7fffffff),%o4 ! [internal] + andcc %o3,%o4,%g0 + ld [%o2],%f6 ! f6 gets reb + bne 1f + nop + ld [%o1],%f0 + ld [%o2],%f1 + fdivs %f0,%f1,%f0 + st %f0,[%o0] + ld [%o2],%f4 + ld [%o1+4],%f3 + fdivs %f3,%f4,%f3 + st %f3,[%o0+4] + ba 2f + nop +1: ! [internal] + fstod %f6,%f8 ! f8/9 gets reb + ld [%o2+4],%f19 ! f19 gets imb + ld [%o1+4],%f13 ! f13 gets ima + fstod %f13,%f24 ! f24/5 gets ima + fstod %f19,%f10 ! f10/11 gets imb + fmuld %f8,%f8,%f16 ! f16/17 gets reb**2 + ld [%o1],%f19 ! f19 gets rea + fmuld %f24,%f10,%f0 ! f0/f1 gets ima*imb + fstod %f19,%f26 ! f26/7 gets rea + fmuld %f10,%f10,%f12 ! f12/13 gets imb**2 + faddd %f12,%f16,%f12 ! f12/13 gets reb**2+imb**2 + fmuld %f26,%f8,%f2 ! f2/3 gets rea*reb + faddd %f2,%f0,%f2 ! f2/3 gets rea*reb+ima*imb + fdivd %f2,%f12,%f2 ! f2/3 gets rec + fmuld %f24,%f8,%f24 ! f24/5 gets ima*reb + fmuld %f26,%f10,%f10 ! f10/11 gets rea*imb + fsubd %f24,%f10,%f10 ! f10/11 gets ima*reb-rea*imb + fdtos %f2,%f7 ! f7 gets rec + fdivd %f10,%f12,%f12 ! f12 gets imc + fdtos %f12,%f15 ! f15 gets imc + st %f7,[%o0] + st %f15,[%o0+4] +2: + .end +! } + +')dnl +ifdef(`NO_FSMULD', `', `dnl +dnl! v8 (and up) implementation specific inline expansion templates +dnl! [efficient implementation of fsmuld assumed] +dnl! + .inline NAME(r_hypot_),2 + ld [%o0],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + ld [%o0],%f0 ! load result with first argument + bne 2f + nop + fabss %f0,%f0 + ld [%o1],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +2: + ld [%o1],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + bne 4f + nop + ld [%o1],%f0 ! second argument inf + fabss %f0,%f0 + ld [%o0],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +4: + ld [%o1],%f3 + fsmuld %f0,%f0,%f0 + fsmuld %f3,%f3,%f2 + faddd %f2,%f0,%f0 + fsqrtd %f0,%f0 + fdtos %f0,%f0 +5: + .end + + .inline NAME(__c_abs),1 + ld [%o0],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + ld [%o0],%f0 + bne 2f + nop + fabss %f0,%f0 + ld [%o0+4],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +2: + ld [%o0+4],%o4 + sethi 0x1fffff,%o5 + or %o5,1023,%o5 + and %o4,%o5,%o4 + sethi 0x1fe000,%o3 + cmp %o4,%o3 + bne 4f + nop + ld [%o0+4],%f0 + fabss %f0,%f0 + ld [%o0],%f1 + .volatile + fcmps %f0,%f1 ! generate invalid for Snan + .nonvolatile + nop + fba 5f + nop +! store to 8-aligned address +4: + ld [%o0+4],%f3 + fsmuld %f0,%f0,%f0 + fsmuld %f3,%f3,%f2 + faddd %f2,%f0,%f0 + fsqrtd %f0,%f0 + fdtos %f0,%f0 +5: + .end + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_mult)(c, a, b) +! complex *c, *a, *b; +! { + .inline NAME(__Fc_mult),3 +! 21 c->real = (a->real * b->real) - (a->imag * b->imag) + ld [%o1+4],%f0 ! f0 = a->imag + ld [%o2+4],%f1 ! f1 = b->imag + ld [%o1],%f2 ! f2 = a->real + fsmuld %f0,%f1,%f4 ! f4 = (a->imag * b->imag) + ld [%o2],%f3 ! f3 = b->real + fsmuld %f2,%f1,%f6 ! f6 = a->real * b->imag + fsmuld %f2,%f3,%f8 ! f8 = a->real * b->real + fsmuld %f0,%f3,%f10 ! f10 = a->imag * b->real + fsubd %f8,%f4,%f0 ! f0 = ar*br - ai*bi + faddd %f6,%f10,%f2 ! f2 = ai*br + ar*bi + fdtos %f0,%f4 + fdtos %f2,%f6 + st %f4,[%o0] + st %f6,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_div)(c, a, b) +! complex *c, *a, *b; +! { + .inline NAME(__Fc_div),3 + ld [%o2+4],%o3 + sethi %hi(0x7fffffff),%o4 + or %o4,%lo(0x7fffffff),%o4 ! [internal] + andcc %o3,%o4,%g0 + ld [%o2],%f6 ! f6 gets reb + bne 1f + nop + ld [%o1],%f0 + ld [%o2],%f1 + fdivs %f0,%f1,%f0 + st %f0,[%o0] + ld [%o1+4],%f3 + fdivs %f3,%f1,%f3 + st %f3,[%o0+4] + ba 2f + nop +1: ! [internal] + sethi %hi(0x3ff00000),%o4 _C(the cg inliner circa Lionel FCS) + or %g0,0,%o5 _C([aka WS6U1 but not before] maps) + std %o4,[%sp+0x48] _C(the idiom to an LDDF of 1.0) + ldd [%sp+0x48],%f8 _C(from a constant pool) + ld [%o2+4],%f10 ! f10 gets imb + fsmuld %f6,%f6,%f16 ! f16/17 gets reb**2 + ld [%o1+4],%f4 ! f4 gets ima + fsmuld %f10,%f10,%f12 ! f12/13 gets imb**2 + ld [%o1],%f19 ! f19 gets rea + fsmuld %f4,%f10,%f0 ! f0/f1 gets ima*imb + fsmuld %f19,%f6,%f2 ! f2/3 gets rea*reb + faddd %f12,%f16,%f12 ! f12/13 gets reb**2+imb**2 + fdivd %f8,%f12,%f12 ! f12/13 gets 1/(reb**2+imb**2) + faddd %f2,%f0,%f2 ! f2/3 gets rea*reb+ima*imb + fsmuld %f4,%f6,%f24 ! f24/5 gets ima*reb + fmuld %f2,%f12,%f2 ! f2/3 gets rec + fsmuld %f19,%f10,%f10 ! f10/11 gets rea*imb + fsubd %f24,%f10,%f10 ! f10/11 gets ima*reb-rea*imb + fmuld %f10,%f12,%f12 ! f12 gets imc + fdtos %f2,%f7 ! f7 gets rec + fdtos %f12,%f15 ! f15 gets imc + st %f7,[%o0] + st %f15,[%o0+4] +2: + .end +! } + +')dnl +ifdef(`NO_MULDIV', `', `dnl +dnl! v8a (and up) implementation specific inline expansion templates +dnl! + .inline .mul,2 + .volatile + smul %o0,%o1,%o0 + rd %y,%o1 + sra %o0,31,%o2 + cmp %o1,%o2 _C(return with Z set if %y == (%o0 >> 31)) + .nonvolatile + .end + + .inline .umul,2 + .volatile + umul %o0,%o1,%o0 + rd %y,%o1 + tst %o1 _C(return with Z set if high order bits are 0) + .nonvolatile + .end + + .inline .div,2 + sra %o0,31,%o4 ! extend sign + .volatile + wr %o4,%g0,%y + cmp %o1,0xffffffff ! is divisor -1? + be,a 1f ! if yes + .volatile + subcc %g0,%o0,%o0 ! simply negate dividend + nop ! RT620 FABs A.0/A.1 + sdiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile +1: + .end + + .inline .udiv,2 + .volatile + wr %g0,%g0,%y + nop + nop + nop + udiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile + .end + + .inline .rem,2 + sra %o0,31,%o4 ! extend sign + .volatile + wr %o4,%g0,%y + cmp %o1,0xffffffff ! is divisor -1? + be,a 1f ! if yes + .volatile + or %g0,%g0,%o0 ! simply return 0 + nop ! RT620 FABs A.0/A.1 + sdiv %o0,%o1,%o2 ! o2 contains quotient a/b + .nonvolatile + smul %o2,%o1,%o4 ! o4 contains q*b + sub %o0,%o4,%o0 ! o0 gets a-q*b +1: + .end + + .inline .urem,2 + .volatile + wr %g0,%g0,%y + nop + nop + nop + udiv %o0,%o1,%o2 ! o2 contains quotient a/b + .nonvolatile + umul %o2,%o1,%o4 ! o4 contains q*b + sub %o0,%o4,%o0 ! o0 gets a-q*b + .end + + .inline .div_o3,2 + sra %o0,31,%o4 ! extend sign + .volatile + wr %o4,%g0,%y + cmp %o1,0xffffffff ! is divisor -1? + be,a 1f ! if yes + .volatile + subcc %g0,%o0,%o0 ! simply negate dividend + mov %o0,%o3 ! o3 gets remainder + sdiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile + smul %o0,%o1,%o4 ! o4 contains q*b + ba 2f + sub %o3,%o4,%o3 ! o3 gets a-q*b +1: + mov %g0,%o3 ! remainder is 0 +2: + .end + + .inline .udiv_o3,2 + .volatile + wr %g0,%g0,%y + mov %o0,%o3 ! o3 gets remainder + nop + nop + udiv %o0,%o1,%o0 ! o0 contains quotient a/b + .nonvolatile + umul %o0,%o1,%o4 ! o4 contains q*b + sub %o3,%o4,%o3 ! o3 gets a-q*b + .end + +')dnl +dnl! v7 (and up) implementation specific inline expansion templates +dnl! [efficient implementation of fsqrts/fsqrtd assumed] +dnl! +ifdef(`LOCALLIBM', `dnl + .inline NAME(__ieee754_sqrt),2 + std %o0,[%sp+0x48] ! store to 8-aligned address + ldd [%sp+0x48],%f0 + fsqrtd %f0,%f0 + .end + + .inline NAME(__inline_sqrtf),1 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + fsqrts %f0,%f0 + .end + + .inline NAME(__inline_sqrt),2 + std %o0,[%sp+0x48] ! store to 8-aligned address + ldd [%sp+0x48],%f0 + fsqrtd %f0,%f0 + .end + +')dnl + .inline NAME(sqrtf),1 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + fsqrts %f0,%f0 + .end + + .inline NAME(sqrt),2 + std %o0,[%sp+0x48] ! store to 8-aligned address + ldd [%sp+0x48],%f0 + fsqrtd %f0,%f0 + .end + + .inline NAME(r_sqrt_),1 + ld [%o0],%f0 + fsqrts %f0,%f0 + .end + + .inline NAME(d_sqrt_),1 + ld [%o0],%f0 + ld [%o0+4],%f1 + fsqrtd %f0,%f0 + .end + +dnl! +dnl! generic SPARC inline templates +dnl! +ifdef(`ARCH_v8plusb', `dnl + .inline NAME(ceil),2 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + sllx %o0,32,%o0 + or %o0,%o1,%o0 + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 _C(x) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x48] + ldd [%sp+0x48],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^52 ? 2^52 : 0) + fand %f0,%f8,%f8 _C(copysign(0, x)) + for %f2,%f8,%f2 _C(copysign(fiddle, x)) + siam 6 + faddd %f0,%f2,%f0 _C(x + copysign(fiddle, x) rnd toward +Inf) + siam 4 + fsubd %f0,%f2,%f0 _C(" - copysign(fiddle, x) rnd to nearest) + siam 0 + for %f0,%f8,%f0 _C(in case previous fsubd gave +0) + .end + + .inline NAME(floor),2 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + sllx %o0,32,%o0 + or %o0,%o1,%o0 + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 _C(x) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x48] + ldd [%sp+0x48],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^52 ? 2^52 : 0) + fand %f0,%f8,%f8 _C(copysign(0, x)) + for %f2,%f8,%f2 _C(copysign(fiddle, x)) + siam 7 + faddd %f0,%f2,%f0 _C(x + copysign(fiddle, x) rounded down) + siam 4 + fsubd %f0,%f2,%f0 _C(" - copysign(fiddle, x) rnd to nearest) + siam 0 + for %f0,%f8,%f0 _C(in case previous fsubd gave +0) + .end + +',`dnl +dnl! +dnl! [%sp+0x48] x -> scratch +dnl! [%sp+0x50] two52 -> zero -> one +dnl! + .inline NAME(ceil),2 + std %o0,[%sp+0x48] + sethi %hi(0x80000000),%o5 _C(o5 = sign bit mask) + andn %o0,%o5,%o2 + sethi %hi(0x43300000),%o3 + st %g0,[%sp+0x54] _C(clear memory for two52, zero and one) + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] _C([%sp+0x50] = one) + ldd [%sp+0x48],%f0 + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 _C(return x * one if |x| >= 2^52) + ba 4f + nop +1: + tst %o0 + st %o3,[%sp+0x50] _C([%sp+0x50] = two52) + ldd [%sp+0x50],%f2 _C(L = copysign(two52, x)) + bge 2f + nop + fnegs %f2,%f2 _C() +2: + ldd [%sp+0x48],%f4 + faddd %f4,%f2,%f0 _C((x + L) rounded) + fsubd %f0,%f2,%f0 _C(t = (x + L) rounded - L) + fcmpd %f0,%f4 + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] _C([%sp+0x50] = one) + and %o0,%o5,%o4 _C(o4 = sign bit of x) + fbge 3f + nop + ldd [%sp+0x50],%f4 + faddd %f0,%f4,%f0 _C(t = t + 1 if t < x) +3: + st %f0,[%sp+0x48] + ld [%sp+0x48],%o3 + andn %o3,%o5,%o3 + or %o4,%o3,%o3 + st %o3,[%sp+0x48] + ld [%sp+0x48],%f0 _C(return copysign(t, x)) +4: + .end + +dnl! +dnl! [%sp+0x48] x -> scratch +dnl! [%sp+0x50] two52 -> zero -> one +dnl! + .inline NAME(floor),2 + std %o0,[%sp+0x48] + sethi %hi(0x80000000),%o5 _C(o5 = sign bit mask) + andn %o0,%o5,%o2 + sethi %hi(0x43300000),%o3 + st %g0,[%sp+0x54] _C(clear memory for two52, zero and one) + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] _C([%sp+0x50] = one) + ldd [%sp+0x48],%f0 + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 _C(return x * one if |x| >= 2^52) + ba 4f + nop +1: + tst %o0 + st %o3,[%sp+0x50] _C([%sp+0x50] = two52) + ldd [%sp+0x50],%f2 _C(L = copysign(two52, x)) + bge 2f + nop + fnegs %f2,%f2 _C() +2: + ldd [%sp+0x48],%f4 + faddd %f4,%f2,%f0 _C((x + L) rounded) + fsubd %f0,%f2,%f0 _C(t = (x + L) rounded - L) + fcmpd %f0,%f4 + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] _C([%sp+0x50] = one) + ldd [%sp+0x50],%f4 + and %o0,%o5,%o4 _C(o4 = sign bit of x) + fble 3f + nop + fsubd %f0,%f4,%f0 _C(t = t - 1 if t > x) +3: + st %f0,[%sp+0x48] + ld [%sp+0x48],%o3 + andn %o3,%o5,%o3 + or %o4,%o3,%o3 + st %o3,[%sp+0x48] + ld [%sp+0x48],%f0 _C(return copysign(t, x)) +4: + .end + +')dnl +dnl! +dnl! [%sp+0x48] x -> scaled x +dnl! [%sp+0x50] two54 +dnl! + .inline NAME(ilogb),2 + sethi %hi(0x7ff00000),%o4 + andcc %o4,%o0,%o2 + bne 1f + nop + sethi %hi(0x43500000),%o3 + std %o0,[%sp+0x48] + st %o3,[%sp+0x50] + st %g0,[%sp+0x54] + ldd [%sp+0x48],%f0 + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 _C(scale x up by two54) + sethi %hi(0x80000001),%o0 _C(return - (2^31 - 1) if iszero(x)) + or %o0,%lo(0x80000001),%o0 _C() + st %f0,[%sp+0x48] + ld [%sp+0x48],%o2 + andcc %o2,%o4,%o2 + srl %o2,20,%o2 + be 2f + nop + sub %o2,0x435,%o0 + ba 2f + nop +1: + subcc %o4,%o2,%g0 + srl %o2,20,%o3 + bne 0f + nop + sethi %hi(0x7fffffff),%o0 _C(return 2^31 - 1 if !finite(x)) + or %o0,%lo(0x7fffffff),%o0 _C() + ba 2f + nop +0: + sub %o3,0x3ff,%o0 +2: + .end + +ifdef(`ARCH_v8plusa', `dnl + .inline NAME(rint),2 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + sllx %o0,32,%o0 + or %o0,%o1,%o0 + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 _C(x) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x48] + ldd [%sp+0x48],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^52 ? 2^52 : 0) + fand %f0,%f8,%f8 _C(copysign(0, x)) + for %f2,%f8,%f2 _C(copysign(fiddle, x)) + faddd %f0,%f2,%f0 _C(x + copysign(fiddle, x)) + fsubd %f0,%f2,%f0 _C(" - copysign(fiddle, x)) + fabsd %f0,%f0 + for %f0,%f8,%f0 _C(in case previous fsubd gave wrong sign of 0) + .end + + .inline NAME(rintf),1 + fzeros %f4 _C(0) + fnegs %f4,%f8 _C(-0) + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 _C(x) + fabss %f0,%f6 _C(|x|) + sethi %hi(0x4b000000),%o2 + st %o2,[%sp+0x48] + ld [%sp+0x48],%f2 _C(2^23) + fcmps %fcc0,%f6,%f2 + fmovsuge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^23 ? 2^23 : 0) + fands %f0,%f8,%f8 _C(copysignf(0, x)) + fors %f2,%f8,%f2 _C(copysignf(fiddle, x)) + fadds %f0,%f2,%f0 _C(x + copysignf(fiddle, x)) + fsubs %f0,%f2,%f0 _C(" - copysignf(fiddle, x)) + fabss %f0,%f0 + fors %f0,%f8,%f0 _C(in case previous fsubs gave wrong sign of 0) + .end + +',`dnl +dnl! +dnl! [%sp+0x48] x -> two52 +dnl! [%sp+0x50] zero +dnl! + .inline NAME(rint),2 + std %o0,[%sp+0x48] + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o2 + ldd [%sp+0x48],%f0 + sethi %hi(0x43300000),%o3 + st %g0,[%sp+0x50] _C([%sp+0x50] = zero) + st %g0,[%sp+0x54] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3ff00000),%o2 + st %o2,[%sp+0x50] _C([%sp+0x50] = one) + ldd [%sp+0x50],%f2 + fmuld %f0,%f2,%f0 _C(return x * one (raise flag if SNaN)) + ba 3f + nop +1: + tst %o0 + st %o3,[%sp+0x48] _C([%sp+0x48] = two52) + st %g0,[%sp+0x4c] + ldd [%sp+0x48],%f2 _C(L = copysign(two52, x)) + bge 2f + nop + fnegs %f2,%f2 +2: + faddd %f0,%f2,%f0 _C((x + L) rounded) + fcmpd %f0,%f2 +ifdef(`ARCH_v7', `dnl + nop +')dnl + fbne 0f + nop + ldd [%sp+0x50],%f0 _C(return copysign(zero, x)) + bge 3f + nop + fnegs %f0,%f0 + ba 3f + nop +0: + fsubd %f0,%f2,%f0 _C(return (x + L) rounded - L) +3: + .end + + .inline NAME(rintf),1 + st %o0,[%sp+0x48] + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o2 + ld [%sp+0x48],%f0 + sethi %hi(0x4b000000),%o3 + st %g0,[%sp+0x50] _C([%sp+0x50] = zero) + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3f800000),%o2 + st %o2,[%sp+0x50] _C([%sp+0x50] = one) + ld [%sp+0x50],%f2 + fmuls %f0,%f2,%f0 _C(return x * one (raise flag if SNaN)) + ba 3f + nop +1: + tst %o0 + st %o3,[%sp+0x48] _C([%sp+0x48] = two23) + ld [%sp+0x48],%f2 _C(L = copysignf(two23, x)) + bge 2f + nop + fnegs %f2,%f2 +2: + fadds %f0,%f2,%f0 _C((x + L) rounded) + fcmps %f0,%f2 +ifdef(`ARCH_v7', `dnl + nop +')dnl + fbne 0f + nop + ld [%sp+0x50],%f0 _C(return copysignf(zero, x)) + bge 3f + nop + fnegs %f0,%f0 + ba 3f + nop +0: + fsubs %f0,%f2,%f0 _C(return (x + L) rounded - L) +3: + .end + +')dnl +dnl +dnl! x0 = 0x44 ! shadow area of %o0 +dnl! x1 = 0x48 ! shadow area of %o1 +dnl! x2 = 0x4c ! shadow area of %o2 +dnl! x3 = 0x50 ! shadow area of %o3 +dnl! x4 = 0x54 ! shadow area of %o4 +dnl! x5 = 0x58 ! shadow area of %o5 +dnl + .inline NAME(min_subnormal),0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(d_min_subnormal_),0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(min_subnormalf),0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_min_subnormal_),0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(max_subnormal),0 + set 0x000fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(d_max_subnormal_),0 + set 0x000fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(max_subnormalf),0 + set 0x007fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_max_subnormal_),0 + set 0x007fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(min_normal),0 + set 0x00100000,%o0 + set 0x0,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline NAME(d_min_normal_),0 + set 0x00100000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(min_normalf),0 + set 0x00800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_min_normal_),0 + set 0x00800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(max_normal),0 + set 0x7fefffff,%o0 + set 0xffffffff,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline NAME(d_max_normal_),0 + set 0x7fefffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(max_normalf),0 + set 0x7f7fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_max_normal_),0 + set 0x7f7fffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(__infinity),0 + set 0x7ff00000,%o0 + set 0x0,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline NAME(infinity),0 + set 0x7ff00000,%o0 + set 0x0,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline NAME(d_infinity_),0 + set 0x7ff00000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x0,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(infinityf),0 + set 0x7f800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_infinity_),0 + set 0x7f800000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(signaling_nan),0 + set 0x7ff00000,%o0 + set 0x1,%o1 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline NAME(d_signaling_nan_),0 + set 0x7ff00000,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0x1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(signaling_nanf),0 + set 0x7f800001,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_signaling_nan_),0 + set 0x7f800001,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(quiet_nan),0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(d_quiet_nan_),0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + set 0xffffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f1 + .end + + .inline NAME(quiet_nanf),0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_quiet_nan_),0 + set 0x7fffffff,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(__swapEX),1 + and %o0,0x1f,%o1 + sll %o1,5,%o1 ! shift input to aexc bit location + .volatile + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + andn %o0,0x3e0,%o2 + or %o1,%o2,%o1 ! o1 = new fsr + st %o1,[%sp+0x44] + ld [%sp+0x44],%fsr + srl %o0,5,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline NAME(_QgetRD),0 + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + srl %o0,30,%o0 ! return round control value + .end + + .inline NAME(_QgetRP),0 + or %g0,%g0,%o0 + .end + + .inline NAME(__swapRD),1 + and %o0,0x3,%o0 + sll %o0,30,%o1 ! shift input to RD bit location + .volatile + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + set 0xc0000000,%o4 ! mask of rounding direction bits + andn %o0,%o4,%o2 + or %o1,%o2,%o1 ! o1 = new fsr + st %o1,[%sp+0x44] + ld [%sp+0x44],%fsr + srl %o0,30,%o0 + and %o0,0x3,%o0 + .nonvolatile + .end + +! +! On the SPARC, __swapRP is a no-op; always return 0 for backward compatibility +! + .inline NAME(__swapRP),1 + or %g0,%g0,%o0 + .end + + .inline NAME(__swapTE),1 + and %o0,0x1f,%o0 + sll %o0,23,%o1 ! shift input to TEM bit location + .volatile + st %fsr,[%sp+0x44] + ld [%sp+0x44],%o0 ! o0 = fsr + set 0x0f800000,%o4 ! mask of TEM (Trap Enable Mode bits) + andn %o0,%o4,%o2 + or %o1,%o2,%o1 ! o1 = new fsr + st %o1,[%sp+0x48] + ld [%sp+0x48],%fsr + srl %o0,23,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline NAME(fp_class),2 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0-o1 gets abs(x) + orcc %o0,%o1,%g0 ! set cc as x is zero/nonzero + bne 1f ! branch if x is nonzero + nop + mov 0,%o0 + ba 2f ! x is 0 + nop +1: + sethi %hi(0x7ff00000),%o2 ! o2 gets 7ff00000 + andcc %o0,%o2,%g0 ! cc set by exp field of x + bne 1f ! branch if normal or max exp + nop + mov 1,%o0 + ba 2f ! x is subnormal + nop +1: + cmp %o0,%o2 + bge 1f ! branch if x is max exp + nop + mov 2,%o0 + ba 2f ! x is normal + nop +1: + andn %o0,%o2,%o0 ! o0 gets msw significand field + orcc %o0,%o1,%g0 ! set cc by OR significand + bne 1f ! Branch if nan + nop + mov 3,%o0 + ba 2f ! x is infinity + nop +1: + sethi %hi(0x00080000),%o2 + andcc %o0,%o2,%g0 ! set cc by quiet/sig bit + be 1f ! Branch if signaling + nop + mov 4,%o0 ! x is quiet NaN + ba 2f + nop +1: + mov 5,%o0 ! x is signaling NaN +2: + .end + + .inline NAME(fp_classf),1 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + bne 1f + nop + mov 0,%o0 + ba 2f ! x is 0 + nop +1: + sethi %hi(0x7f800000),%o2 + andcc %o0,%o2,%g0 + bne 1f + nop + mov 1,%o0 + ba 2f ! x is subnormal + nop +1: + cmp %o0,%o2 + bge 1f + nop + mov 2,%o0 + ba 2f ! x is normal + nop +1: + bg 1f + nop + mov 3,%o0 + ba 2f ! x is infinity + nop +1: + sethi %hi(0x00400000),%o2 + andcc %o0,%o2,%g0 + mov 4,%o0 ! x is quiet NaN + bne 2f + nop + mov 5,%o0 ! x is signaling NaN +2: + .end + + .inline NAME(ir_fp_class_),1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + bne 1f + nop + mov 0,%o0 + ba 2f ! x is 0 + nop +1: + sethi %hi(0x7f800000),%o2 + andcc %o0,%o2,%g0 + bne 1f + nop + mov 1,%o0 + ba 2f ! x is subnormal + nop +1: + cmp %o0,%o2 + bge 1f + nop + mov 2,%o0 + ba 2f ! x is normal + nop +1: + bg 1f + nop + mov 3,%o0 + ba 2f ! x is infinity + nop +1: + sethi %hi(0x00400000),%o2 + andcc %o0,%o2,%g0 + mov 4,%o0 ! x is quiet NaN + bne 2f + nop + mov 5,%o0 ! x is signaling NaN +2: + .end + + .inline NAME(copysign),4 + set 0x80000000,%o3 + and %o2,%o3,%o2 + andn %o0,%o3,%o0 + or %o0,%o2,%o0 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + + .inline NAME(copysignf),2 + set 0x80000000,%o2 + andn %o0,%o2,%o0 + and %o1,%o2,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(r_copysign_),2 + ld [%o0],%o0 + ld [%o1],%o1 + set 0x80000000,%o2 + andn %o0,%o2,%o0 + and %o1,%o2,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + .end + + .inline NAME(finite),2 + set 0x7ff00000,%o1 + and %o0,%o1,%o0 + cmp %o0,%o1 + mov 1,%o0 + bne 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(finitef),2 + set 0x7f800000,%o1 + and %o0,%o1,%o0 + cmp %o0,%o1 + mov 1,%o0 + bne 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(ir_finite_),1 + ld [%o0],%o0 + set 0x7f800000,%o1 + and %o0,%o1,%o0 + cmp %o0,%o1 + mov 1,%o0 + bne 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(signbit),1 + srl %o0,31,%o0 + .end + + .inline NAME(signbitf),1 + srl %o0,31,%o0 + .end + + .inline NAME(ir_signbit_),1 + ld [%o0],%o0 + srl %o0,31,%o0 + .end + + .inline NAME(isinf),2 + tst %o1 + sethi %hi(0x80000000),%o2 + bne 1f + nop + andn %o0,%o2,%o0 + sethi %hi(0x7ff00000),%o2 + cmp %o0,%o2 + mov 1,%o0 + be 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(isinff),1 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + mov 0,%o0 + bne 1f ! Branch if not inf. + nop + mov 1,%o0 +1: + .end + + .inline NAME(ir_isinf_),1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + mov 0,%o0 + bne 1f ! Branch if not inf. + nop + mov 1,%o0 +1: + .end + + .inline NAME(isnan),2 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 _C(mask off sign bit) + sub %g0,%o1,%o3 _C(sticky <- ((lo|-lo) >> 31)) + or %o1,%o3,%o1 + srl %o1,31,%o1 + or %o0,%o1,%o0 _C(hi <- hi | sticky) + sethi %hi(0x7ff00000),%o4 + sub %o4,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline NAME(isnanf),1 + sethi %hi(0x80000000),%o2 _C(mask off sign bit) + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o1 + sub %o1,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline NAME(ir_isnan_),1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 _C(mask off sign bit) + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o1 + sub %o1,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline NAME(isnormal),2 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7ff00000),%o2 + cmp %o0,%o2 + sethi %hi(0x00100000),%o2 + bge 1f + nop + cmp %o0,%o2 + mov 1,%o0 + bge 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(isnormalf),1 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + sethi %hi(0x00800000),%o2 + bge 1f + nop + cmp %o0,%o2 + mov 1,%o0 + bge 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(ir_isnormal_),1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o2 + cmp %o0,%o2 + sethi %hi(0x00800000),%o2 + bge 1f + nop + cmp %o0,%o2 + mov 1,%o0 + bge 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(issubnormal),2 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0/o1 gets abs(x) + sethi %hi(0x00100000),%o2 ! o2 gets 00100000 + cmp %o0,%o2 + bge 1f ! branch if x norm or max exp + nop + orcc %o0,%o1,%g0 + be 1f ! Branch if x zero + nop + mov 1,%o0 ! x is subnormal + ba 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(issubnormalf),1 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x00800000),%o2 ! o2 gets 00800000 + cmp %o0,%o2 + bge 1f ! branch if x norm or max exp + nop + orcc %o0,%g0,%g0 + be 1f ! Branch if x zero + nop + mov 1,%o0 ! x is subnormal + ba 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(ir_issubnormal_),1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 ! o2 gets 80000000 + andn %o0,%o2,%o0 ! o0 gets abs(x) + sethi %hi(0x00800000),%o2 ! o2 gets 00800000 + cmp %o0,%o2 + bge 1f ! branch if x norm or max exp + nop + orcc %o0,%g0,%g0 + be 1f ! Branch if x zero + nop + mov 1,%o0 ! x is subnormal + ba 2f + nop +1: + mov 0,%o0 +2: + .end + + .inline NAME(iszero),2 + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o0 + orcc %o0,%o1,%g0 + mov 1,%o0 + be 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(iszerof),1 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + mov 1,%o0 + be 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(ir_iszero_),1 + ld [%o0],%o0 + sethi %hi(0x80000000),%o2 + andncc %o0,%o2,%o0 + mov 1,%o0 + be 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(abs),1 + sra %o0,31,%o1 + xor %o0,%o1,%o0 + sub %o0,%o1,%o0 + .end + + .inline NAME(fabs),2 + st %o0,[%sp+0x48] + st %o1,[%sp+0x4c] + ldd [%sp+0x48],%f0 +ifdef(`ARCH_v8plus', `dnl + fabsd %f0,%f0 +',`dnl + fabss %f0,%f0 +')dnl + .end + + .inline NAME(fabsf),1 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f0 + fabss %f0,%f0 + .end + + .inline NAME(r_fabs_),1 + ld [%o0],%f0 + fabss %f0,%f0 + .end + +! +! __nintf - f77 NINT(REAL*4) +! + .inline NAME(__nintf),1 + srl %o0,30-7,%g1 + sethi %hi(0x7fffff),%o2 + st %o0,[%sp+0x44] _C(prepare for reload if |x| >= 2^31) + and %g1,0xff,%g1 _C(%g1 := biased exponent) + or %o2,%lo(0x7fffff),%o2 _C(%o2 := 0x7fffff) + sethi %hi(1<<22),%o4 _C(%o4 := 0x400000) + subcc %g1,127+31,%g0 _C(< 0 iff |x| < 2^31) + and %o0,%o2,%o3 _C(%o3 := mantissa) + bl 0f + nop + sethi %hi(0xcf000000),%o2 _C(%o2 := -2^31 in floating point) + sethi %hi(0x80000000),%g1 _C(%g1 := -2^31 in fixed point) + subcc %o0,%o2,%g0 _C(x == -2^31?) + or %g1,%g0,%o0 _C(return -2^31 if x == -2^31) + be 9f + nop + ld [%sp+0x44],%f0 + fstoi %f0,%f0 _C(return result and trigger fp_invalid) + st %f0,[%sp+0x44] + ld [%sp+0x44],%o0 + ba 9f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x800000) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + sra %o0,31-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,127,%g1 _C(%g1 := e) + srl %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + add %o3,%o4,%o3 _C(%o3 := mantissa + 0.5 in fixed point) + or %g0,23,%o0 _C(%o0 := 23) + subcc %o0,%g1,%o0 _C(%o0 := 23 - e) + bl 1f _C(if 0 <= e <= 23) + nop + srl %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) + ba 2f + nop +1: + sub %g0,%o0,%o0 _C(%o0 := e - 23) + sll %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + and %o2,1,%o2 _C(%o2 := 0/1) + add %o0,%o2,%o0 +9: + .end + +ifdef(`ARCH_v8plus', `dnl + .inline NAME(__il_nint),1 + ld [%o0],%o0 _C(%o0 := s*1.f*2^e) + sra %o0,0,%o0 _C(sign-extend to 64-bit %o0) + srlx %o0,31-8,%g1 + or %g0,1,%o2 + sllx %o2,23-1,%o4 _C(%o4 := 0x00000000 00400000) + and %g1,0xff,%g1 _C(%g1 := biased exponent) + sllx %o2,63-0,%o2 + subcc %g1,127+63,%g0 _C(>= 0 iff |x| >= 2^63) + bl 0f + nop + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + fstox %f0,%f0 + std %f0,[%sp+0x48] + ldx [%sp+0x48],%o1 + ba 9f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x00000000 00800000) + srax %o2,63-23,%o2 + sub %g1,127+23,%o1 _C(%o1 >= 0 iff |x| >= 2^23) + xnor %o2,%g0,%o2 _C(%o2 := 0x00000000 007fffff) + and %o0,%o2,%o3 _C(%o3 := mantissa) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + srax %o0,63-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,127,%g1 _C(%g1 := e) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + brlz,pt %o1,3f + nop _C(2^23 <= |x| < 2^63) + sub %g1,23,%o0 _C(%o0 := e - 23) + sllx %o3,%o0,%o0 _C(%o0 := int(|x|)) + ba 2f + nop +3: + srlx %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + add %o3,%o4,%o3 _C(%o3 := mantissa w/HLB + 0.5 in fixed point) + or %g0,23,%o0 _C(%o0 := 23) + sub %o0,%g1,%o0 _C(%o0 := 23 - e) + srlx %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o1 +9: + srlx %o1,32,%o0 + .end + +')dnl +! +! __i_dnnt - f77 NINT(REAL*8) +! +ifdef(`ARCH_v8plus', `dnl + .inline NAME(__i_dnnt),1 + ld [%o0],%o1 _C(we may not assume the address is DW-aligned) + sllx %o1,32,%o1 + ld [%o0+4],%o0 + or %o0,%o1,%o0 _C(%o0 := s*1.f*2^e) + srlx %o0,63-11,%g1 + or %g0,1,%o2 + stx %o0,[%sp+0x48] _C(prepare for reload if x is out of range) + sllx %o2,52-1,%o4 _C(%o4 := 0x00080000 00000000) + and %g1,0x7ff,%g1 _C(%g1 := biased exponent) + sllx %o2,63-0,%o2 + subcc %g1,1023+32,%g0 _C(>= 0 iff |x| >= 2^32) + bl 0f + nop + ldd [%sp+0x48],%f0 + ba 8f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x00100000 00000000) + srax %o2,63-52,%o2 + sub %g1,1023+30,%o1 _C(%o1 >= 0 iff |x| >= 2^30) + xnor %o2,%g0,%o2 _C(%o2 := 0x000fffff ffffffff) + and %o0,%o2,%o3 _C(%o3 := mantissa) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + srax %o0,63-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,1023,%g1 _C(%g1 := e) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + srlx %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + add %o3,%o4,%o3 _C(%o3 := mantissa w/HLB + 0.5 in fixed point) + or %g0,52,%o0 _C(%o0 := 52) + sub %o0,%g1,%o0 _C(%o0 := 52 - e) + srlx %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 + brlz,pt %o1,9f + nop + stx %o0,[%sp+0x48] _C(2^30 <= |x| < 2^32) + ldd [%sp+0x48],%f0 + fxtod %f0,%f0 +8: + fdtoi %f0,%f0 + st %f0,[%sp+0x44] + ld [%sp+0x44],%o0 +9: + .end + + .inline NAME(__il_dnnt),1 + ld [%o0],%o1 _C(we may not assume the address is DW-aligned) + sllx %o1,32,%o1 + ld [%o0+4],%o0 + or %o0,%o1,%o0 _C(%o0 := s*1.f*2^e) + srlx %o0,63-11,%g1 + or %g0,1,%o2 + sllx %o2,52-1,%o4 _C(%o4 := 0x00080000 00000000) + and %g1,0x7ff,%g1 _C(%g1 := biased exponent) + sllx %o2,63-0,%o2 + subcc %g1,1023+63,%g0 _C(>= 0 iff |x| >= 2^63) + bl 0f + nop + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + fdtox %f0,%f0 + std %f0,[%sp+0x48] + ldx [%sp+0x48],%o1 + ba 9f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x00100000 00000000) + srax %o2,63-52,%o2 + sub %g1,1023+52,%o1 _C(%o1 >= 0 iff |x| >= 2^52) + xnor %o2,%g0,%o2 _C(%o2 := 0x000fffff ffffffff) + and %o0,%o2,%o3 _C(%o3 := mantissa) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + srax %o0,63-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,1023,%g1 _C(%g1 := e) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + brlz,pt %o1,3f + nop _C(2^52 <= |x| < 2^63) + sub %g1,52,%o0 _C(%o0 := e - 52) + sllx %o3,%o0,%o0 _C(%o0 := int(|x|)) + ba 2f + nop +3: + srlx %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + add %o3,%o4,%o3 _C(%o3 := mantissa w/HLB + 0.5 in fixed point) + or %g0,52,%o0 _C(%o0 := 52) + sub %o0,%g1,%o0 _C(%o0 := 52 - e) + srlx %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o1 +9: + srlx %o1,32,%o0 + .end + +',`dnl +dnl! int __i_dnnt(const double *xp) { +dnl! unsigned long long u = *(unsigned long long *) xp; +dnl! unsigned v = 0x432 - (unsigned) ((u >> 52U) & 0x7ffU); +dnl! if (v < 53U) /* i.e. 0 <= v <= 52 */ +dnl! u += 1ULL << v; +dnl! return (int) *(double *) &u; +dnl! } + .inline NAME(__i_dnnt),1 + ld [%o0],%o4 + ld [%o0+4],%o5 _C(o4:5 := u) + srl %o4,20,%o3 + and %o3,0x7ff,%o3 + sub %o3,0x432,%o3 + sub %g0,%o3,%o3 _C(o3 := v = 0x432 - ((u >> 52) & 0x7ff)) + subcc %o3,53,%g0 + bcc 2f + nop + or %g0,1,%g1 + subcc %o3,32,%o2 _C(o2 = v - 32) + bl 1f + nop + sll %g1,%o2,%g1 _C(.5 falls in the high word) + add %o4,%g1,%o4 _C(u += 1 << v) + ba 2f + nop +1: + sll %g1,%o3,%g1 _C(.5 falls in the low word) + addcc %o5,%g1,%o5 + addx %o4,0,%o4 _C(u += 1 << v) +2: + st %o4,[%sp+0x48] + st %o5,[%sp+0x4c] + ldd [%sp+0x48],%f2 + fdtoi %f2,%f2 + st %f2,[%sp+0x44] + ld [%sp+0x44],%o0 + .end + +')dnl +ifdef(`ARCH_v8plusa', `dnl + .inline NAME(__aintf),1 + fzeros %f4 _C(0) + fnegs %f4,%f8 _C(-0) + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 _C(x) + fabss %f0,%f6 _C(|x|) + sethi %hi(0x4b000000),%o2 + st %o2,[%sp+0x48] + ld [%sp+0x48],%f2 _C(2^23) + fcmps %fcc0,%f6,%f2 + fmovsuge %fcc0,%f4,%f6 _C(|x| < 2^23 ? |x| : 0) + fstoi %f6,%f6 _C(truncate to integer) + fitos %f6,%f6 + fadds %f0,%f4,%f2 _C(x + 0) + fmovsuge %fcc0,%f2,%f6 _C(|x| < 2^23 ? truncf(|x|) : x + 0) + fands %f0,%f8,%f0 _C(copysignf(0, x)) + fors %f0,%f6,%f0 _C(restore sign of x) + .end + + .inline NAME(__aint),2 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + sllx %o0,32,%o0 + or %o0,%o1,%o0 + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 _C(x) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x48] + ldd [%sp+0x48],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f6 _C(|x| < 2^52 ? |x| : 0) + fdtox %f6,%f6 _C(truncate to integer) + fxtod %f6,%f6 + faddd %f0,%f4,%f2 _C(x + 0) + fmovduge %fcc0,%f2,%f6 _C(|x| < 2^52 ? trunc(|x|) : x + 0) + fand %f0,%f8,%f0 _C(copysign(0, x)) + for %f0,%f6,%f0 _C(restore sign of x) + .end + +')dnl +dnl!float +dnl!__anintf(float x) { +dnl! unsigned u = *(unsigned *) &x; +dnl! unsigned v = 0x95 - (unsigned) ((u >> 23U) & 0xffU); +dnl! unsigned t = 1U << v; +dnl! unsigned s = t - 1; +dnl! /* +dnl! * v := 22 - e +dnl! */ +dnl! if (v < 23U) { /* 0 <= e <= 22 */ +dnl! t &= u; +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (v == 23U) { /* e == -1 */ +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (*(int *) &v > 23) /* e <= -2 */ +dnl! u &= 0x80000000; +dnl! return *(float *) &u; +dnl!} + .inline NAME(__anintf),1 + or %g0,1,%o1 + srl %o0,23,%g1 + and %g1,0xff,%g1 + sub %g0,%g1,%g1 + add %g1,0x95,%g1 + subcc %g1,23,%g0 + sll %o1,%g1,%o1 + sub %o1,1,%o2 + bcs 1f + nop + be 2f + nop + bl 3f + nop + sethi %hi(0x80000000),%o1 + and %o0,%o1,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + .end + +ifdef(`ARCH_v8plus', `dnl +dnl!double +dnl!__anint(double x) { +dnl! unsigned long long u = *(unsigned long long *) &x; +dnl! unsigned v = 0x432 - (unsigned) ((u >> 52U) & 0x7ffU); +dnl! unsigned long long t = 1ULL << v; +dnl! unsigned long long s = t - 1; +dnl! /* +dnl! * v := 51 - e +dnl! */ +dnl! if (v < 52U) { /* 0 <= e <= 51 */ +dnl! t &= u; +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (v == 52U) { /* e == -1 */ +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (*(int *) &v > 52) /* e <= -2 */ +dnl! u = (u >> 63) << 63; +dnl! return *(double *) &u; +dnl!} + .inline NAME(__anint),2 + sllx %o0,32,%o0 + or %o0,%o1,%o0 + or %g0,1,%o1 + srlx %o0,52,%g1 + and %g1,0x7ff,%g1 + sub %g0,%g1,%g1 + add %g1,0x432,%g1 + subcc %g1,52,%g0 + sllx %o1,%g1,%o1 + sub %o1,1,%o2 + bcs,pt %icc,1f + nop + be,pt %icc,2f + nop + bl,pt %icc,3f + nop + srlx %o0,63,%o0 + sllx %o0,63,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + stx %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + +')dnl + .inline NAME(__Fz_minus),3 + ld [%o1],%f0 + ld [%o1+0x4],%f1 + ld [%o2],%f4 + ld [%o2+0x4],%f5 + fsubd %f0,%f4,%f0 + ld [%o1+8],%f2 + ld [%o1+0xc],%f3 + ld [%o2+8],%f6 + ld [%o2+0xc],%f7 + fsubd %f2,%f6,%f2 + st %f0,[%o0+0x0] + st %f1,[%o0+0x4] + st %f2,[%o0+0x8] + st %f3,[%o0+0xc] + .end + + .inline NAME(__Fz_add),3 + ld [%o1],%f0 + ld [%o1+0x4],%f1 + ld [%o2],%f4 + ld [%o2+0x4],%f5 + faddd %f0,%f4,%f0 + ld [%o1+8],%f2 + ld [%o1+0xc],%f3 + ld [%o2+8],%f6 + ld [%o2+0xc],%f7 + faddd %f2,%f6,%f2 + st %f0,[%o0+0x0] + st %f1,[%o0+0x4] + st %f2,[%o0+0x8] + st %f3,[%o0+0xc] + .end + + .inline NAME(__Fz_neg),2 + ld [%o1],%f0 + fnegs %f0,%f0 + ld [%o1+0x4],%f1 + st %f1,[%o0+0x4] + ld [%o1+8],%f2 + fnegs %f2,%f2 + ld [%o1+0xc],%f3 + st %f3,[%o0+0xc] + st %f0,[%o0] + st %f2,[%o0+0x8] + .end + + .inline NAME(__Ff_conv_z),2 + st %o1,[%sp+0x44] + ld [%sp+0x44],%f0 + fstod %f0,%f0 + st %g0,[%o0+0x8] + st %g0,[%o0+0xc] + st %f1,[%o0+0x4] + st %f0,[%o0] + .end + + .inline NAME(__Fz_conv_f),1 + ld [%o0],%f0 + ld [%o0+4],%f1 + fdtos %f0,%f0 + .end + + .inline NAME(__Fz_conv_i),1 + ld [%o0],%f0 + ld [%o0+4],%f1 + fdtoi %f0,%f0 + st %f0,[%sp+0x44] + ld [%sp+0x44],%o0 + .end + + .inline NAME(__Fi_conv_z),2 + st %o1,[%sp+0x44] + ld [%sp+0x44],%f0 + fitod %f0,%f0 + st %g0,[%o0+0x8] + st %g0,[%o0+0xc] + st %f1,[%o0+0x4] + st %f0,[%o0] + .end + + .inline NAME(__Fz_conv_d),1 + ld [%o0],%f0 + ld [%o0+4],%f1 + .end + + .inline NAME(__Fd_conv_z),3 + st %o1,[%o0] + st %o2,[%o0+0x4] + st %g0,[%o0+0x8] + st %g0,[%o0+0xc] + .end + + .inline NAME(__Fz_conv_c),2 + ldd [%o1],%f0 + fdtos %f0,%f0 + st %f0,[%o0] + ldd [%o1+0x8],%f2 + fdtos %f2,%f1 + st %f1,[%o0+0x4] + .end + + .inline NAME(__Fz_eq),2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %f0,%f2 + mov %o0,%o2 + mov 0,%o0 + fbne 1f + nop + ld [%o2+8],%f0 + ld [%o2+12],%f1 + ld [%o1+8],%f2 + ld [%o1+12],%f3 + fcmpd %f0,%f2 + nop + fbne 1f + nop + mov 1,%o0 +1: + .end + + .inline NAME(__Fz_ne),2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %f0,%f2 + mov %o0,%o2 + mov 1,%o0 + fbne 1f + nop + ld [%o2+8],%f0 + ld [%o2+12],%f1 + ld [%o1+8],%f2 + ld [%o1+12],%f3 + fcmpd %f0,%f2 + nop + fbne 1f + nop + mov 0,%o0 +1: + .end + + .inline NAME(__c_cmplx),3 + ld [%o1],%o1 + st %o1,[%o0] + ld [%o2],%o2 + st %o2,[%o0+4] + .end + + .inline NAME(__d_cmplx),3 + ld [%o1],%f0 + st %f0,[%o0] + ld [%o1+4],%f1 + st %f1,[%o0+4] + ld [%o2],%f0 + st %f0,[%o0+0x8] + ld [%o2+4],%f1 + st %f1,[%o0+0xc] + .end + + .inline NAME(__r_cnjg),2 + ld [%o1+0x4],%f1 + fnegs %f1,%f1 + ld [%o1],%f0 + st %f0,[%o0] + st %f1,[%o0+4] + .end + + .inline NAME(__d_cnjg),2 + ld [%o1+0x8],%f0 + fnegs %f0,%f0 + ld [%o1+0xc],%f1 + st %f1,[%o0+0xc] + ld [%o1+0x0],%f1 + st %f1,[%o0+0x0] + ld [%o1+0x4],%f1 + st %f1,[%o0+0x4] + st %f0,[%o0+0x8] + .end + + .inline NAME(__r_dim),2 +ifdef(`ARCH_v8plus', `dnl +ifdef(`ARCH_v8plusa', `dnl + fzeros %f4 +',`dnl + st %g0,[%sp+0x48] + ld [%sp+0x48],%f4 +')dnl + ld [%o0],%f0 + ld [%o1],%f2 + fcmps %fcc0,%f0,%f2 + fmovsule %fcc0,%f4,%f2 + fsubs %f0,%f2,%f0 + fmovsule %fcc0,%f4,%f0 +',`dnl + ld [%o0],%f2 + ld [%o1],%f4 + fcmps %f2,%f4 + st %g0,[%sp+0x48] + ld [%sp+0x48],%f0 + fbule 1f + nop + fsubs %f2,%f4,%f0 +1: +')dnl + .end + + .inline NAME(__d_dim),2 +ifdef(`ARCH_v8plus', `dnl +ifdef(`ARCH_v8plusa', `dnl + fzero %f4 +',`dnl + stx %g0,[%sp+0x48] + ldd [%sp+0x48],%f4 +')dnl + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %fcc0,%f0,%f2 + fmovdule %fcc0,%f4,%f2 + fsubd %f0,%f2,%f0 + fmovdule %fcc0,%f4,%f0 +',`dnl + ld [%o0],%f2 + ld [%o0+4],%f3 + ld [%o1],%f4 + ld [%o1+4],%f5 + fcmpd %f2,%f4 + st %g0,[%sp+0x48] + ld [%sp+0x48],%f0 + ld [%sp+0x48],%f1 + fbule 1f + nop + fsubd %f2,%f4,%f0 +1: +')dnl + .end + + .inline NAME(__r_imag),1 + ld [%o0+4],%f0 + .end + + .inline NAME(__d_imag),1 + ld [%o0+8],%f0 + ld [%o0+0xc],%f1 + .end + +ifdef(`ARCH_v8plus', `dnl +ifdef(`ARCH_v8plusa', `dnl + .inline NAME(__f95_signf),2 + fzeros %f2 + fnegs %f2,%f2 + ld [%o0],%f0 + ld [%o1],%f1 + fabss %f0,%f0 + fands %f1,%f2,%f1 + fors %f0,%f1,%f0 + .end + + .inline NAME(__f95_sign),2 + fzero %f4 + fnegd %f4,%f4 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fabsd %f0,%f0 + fand %f2,%f4,%f2 + for %f0,%f2,%f0 + .end + +',`dnl + .inline NAME(__f95_signf),2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sra %o1,0,%o1 _C(sign-extend to 64-bit %o1) + fmovrslz %o1,%f1,%f0 + .end + + .inline NAME(__f95_sign),2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sra %o1,0,%o1 _C(sign-extend to 64-bit %o1) + fmovrdlz %o1,%f2,%f0 + .end + +')dnl + .inline NAME(__r_sign),2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sub %o1,1,%o0 + and %o1,%o0,%o1 _C(%o1 < 0 iff A2 is negative and not -0) + sra %o1,0,%o1 _C(sign-extend to 64-bit %o1) + fmovrslz %o1,%f1,%f0 + .end + + .inline NAME(__d_sign),2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%o0 + sllx %o0,32,%o0 + ld [%o1+4],%o1 + or %o1,%o0,%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sub %o1,1,%o0 + and %o1,%o0,%o1 _C(%o1 < 0 iff A2 is negative and not -0) + fmovrdlz %o1,%f2,%f0 + .end + +',`dnl + .inline NAME(__f95_signf),2 + ld [%o0],%f0 + fabss %f0,%f0 + ld [%o1],%o1 + orcc %g0,%o1,%g0 + bge 1f + nop + fnegs %f0,%f0 +1: + .end + + .inline NAME(__f95_sign),2 + ld [%o0],%f0 + fabss %f0,%f0 + ld [%o0+4],%f1 + ld [%o1],%o1 + orcc %g0,%o1,%g0 + bge 1f + nop + fnegs %f0,%f0 +1: + .end + + .inline NAME(__r_sign),2 + ld [%o0],%f0 + fabss %f0,%f0 + ld [%o1],%o2 + sethi %hi(0x80000000),%o3 + cmp %o2,%o3 + be 1f + nop + tst %o2 + bge 1f + nop + fnegs %f0,%f0 +1: + .end + + .inline NAME(__d_sign),2 + ld [%o0],%f0 + fabss %f0,%f0 + ld [%o0+4],%f1 + ld [%o1],%o2 + ld [%o1+4],%o3 + sethi %hi(0x80000000),%o4 + andn %o2,%o4,%o4 + orcc %o3,%o4,%g0 + be 1f + nop + tst %o2 + bge 1f + nop + fnegs %f0,%f0 +1: + .end + +')dnl + .inline NAME(__Fz_mult),3 + ld [%o1],%f0 + ld [%o1+0x4],%f1 + ld [%o2],%f4 + ld [%o2+0x4],%f5 + fmuld %f0,%f4,%f8 ! f8 = r1*r2 + ld [%o1+0x8],%f2 + ld [%o1+0xc],%f3 + ld [%o2+0x8],%f6 + ld [%o2+0xc],%f7 + fmuld %f2,%f6,%f10 ! f10= i1*i2 + fsubd %f8,%f10,%f12 ! f12= r1*r2-i1*i2 + st %f12,[%o0] + st %f13,[%o0+4] + fmuld %f0,%f6,%f14 ! f14= r1*i2 + fmuld %f2,%f4,%f16 ! f16= r2*i1 + faddd %f14,%f16,%f2 ! f2 = r1*i2+r2*i1 + st %f2,[%o0+8] + st %f3,[%o0+12] + .end + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_minus)(c, a, b) +! complex *c, *a, *b; +! { + .inline NAME(__Fc_minus),3 +! 30 c->real = a->real - b->real + + ld [%o1],%f0 + ld [%o2],%f1 + fsubs %f0,%f1,%f2 +! 31 c->imag = a->imag - b->imag + + ld [%o1+4],%f3 + ld [%o2+4],%f4 + fsubs %f3,%f4,%f5 + st %f2,[%o0] + st %f5,[%o0+4] + .end + } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_add)(c, a, b) +! complex *c, *a, *b; +! { + .inline NAME(__Fc_add),3 +! 39 c->real = a->real + b->real + + ld [%o1],%f0 + ld [%o2],%f1 + fadds %f0,%f1,%f2 +! 40 c->imag = a->imag + b->imag + + ld [%o1+4],%f3 + ld [%o2+4],%f4 + fadds %f3,%f4,%f5 + st %f2,[%o0] + st %f5,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_neg)(c, a) +! complex *c, *a; +! { + .inline NAME(__Fc_neg),2 +! 48 c->real = - a->real + + ld [%o1],%f0 + fnegs %f0,%f1 +! 49 c->imag = - a->imag + + ld [%o1+4],%f2 + fnegs %f2,%f3 + st %f1,[%o0] + st %f3,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Ff_conv_c)(c, x) +! complex *c; +! FLOATPARAMETER x; +! { + .inline NAME(__Ff_conv_c),2 +! 59 c->real = x + + st %o1,[%o0] +! 60 c->imag = 0.0 + + st %g0,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! FLOATFUNCTIONTYPE +! NAME(__Fc_conv_f)(c) +! complex *c; +! { + .inline NAME(__Fc_conv_f),1 +! 69 RETURNFLOAT(c->real) + + ld [%o0],%f0 + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! int +! NAME(__Fc_conv_i)(c) +! complex *c; +! { + .inline NAME(__Fc_conv_i),1 +! 78 return (int)c->real + + ld [%o0],%f0 + fstoi %f0,%f1 + st %f1,[%sp+68] + ld [%sp+68],%o0 + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fi_conv_c)(c, i) +! complex *c; +! int i; +! { + .inline NAME(__Fi_conv_c),2 +! 88 c->real = (float)i + + st %o1,[%sp+68] + ld [%sp+68],%f0 + fitos %f0,%f1 + st %f1,[%o0] +! 89 c->imag = 0.0 + + st %g0,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! double +! NAME(__Fc_conv_d)(c) +! complex *c; +! { + .inline NAME(__Fc_conv_d),1 +! 98 return (double)c->real + + ld [%o0],%f2 + fstod %f2,%f0 + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fd_conv_c)(c, x) +! complex *c; +! double x; +! { + .inline NAME(__Fd_conv_c),2 + st %o1,[%sp+72] + st %o2,[%sp+76] +! 109 c->real = (float)(x) + + ldd [%sp+72],%f0 + fdtos %f0,%f1 + st %f1,[%o0] +! 110 c->imag = 0.0 + + st %g0,[%o0+4] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! void +! NAME(__Fc_conv_z)(result, c) +! dcomplex *result; +! complex *c; +! { + .inline NAME(__Fc_conv_z),2 +! 120 result->dreal = (double)c->real + + ld [%o1],%f0 + fstod %f0,%f2 + st %f2,[%o0] + st %f3,[%o0+4] +! 121 result->dimag = (double)c->imag + + ld [%o1+4],%f3 + fstod %f3,%f4 + st %f4,[%o0+8] + st %f5,[%o0+12] + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! int +! NAME(__Fc_eq)(x, y) +! complex *x, *y; +! { + .inline NAME(__Fc_eq),2 +! return (x->real == y->real) && (x->imag == y->imag); + ld [%o0],%f0 + ld [%o1],%f2 + mov %o0,%o2 + fcmps %f0,%f2 + mov 0,%o0 + fbne 1f + nop + ld [%o2+4],%f0 + ld [%o1+4],%f2 + fcmps %f0,%f2 + nop + fbne 1f + nop + mov 1,%o0 +1: + .end +! } + +!- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +! int +! NAME(__Fc_ne)(x, y) +! complex *x, *y; +! { + .inline NAME(__Fc_ne),2 +! return (x->real != y->real) || (x->imag != y->imag); + ld [%o0],%f0 + ld [%o1],%f2 + mov %o0,%o2 + fcmps %f0,%f2 + mov 1,%o0 + fbne 1f + nop + ld [%o2+4],%f0 + ld [%o1+4],%f2 + fcmps %f0,%f2 + nop + fbne 1f + nop + mov 0,%o0 +1: + .end +! } diff --git a/usr/src/libm/src/sparc/common/nextafter.S b/usr/src/libm/src/sparc/common/nextafter.S new file mode 100644 index 0000000..71d4851 --- /dev/null +++ b/usr/src/libm/src/sparc/common/nextafter.S @@ -0,0 +1,123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)nextafter.S 1.23 06/01/23 SMI" + + .file "nextafter.S" + +#include "libm.h" +LIBM_ANSI_PRAGMA_WEAK(nextafter,function) + .weak _nextafter + .type _nextafter,#function +_nextafter = __nextafter +#include "libm_synonyms.h" +#include "libm_protos.h" + +#if defined(LIBM_BUILD) && !defined(ELFOBJ) +#define mENTRY(x) ENTRY(__libm/**/x) +#define mNAME(x) NAME(__libm/**/x) +#else +#define mENTRY(x) ENTRY(x) +#define mNAME(x) NAME(x) +#endif + + RO_DATA + .align 8 +.Lconstant: +two54 = 0x00 + .word 0x43500000,0x0 ! 2**54 +twom54 = 0x08 + .word 0x3c900000,0x0 ! 2**-54 +tiny = 0x10 + .word 0x00100000,0x0 ! tiny + +! variable using fp +x = -0x8 +y = -0x10 + + ENTRY(nextafter) + save %sp,-128,%sp + PIC_SETUP(l7) + std %i0,[%fp+x] + or %g0,%i0,%o0 ! save original arguments + or %g0,%i1,%o1 + std %i2,[%fp+y] + or %g0,%i2,%o2 + or %g0,%i3,%o3 + ldd [%fp+x],%f2 ! x + ldd [%fp+y],%f0 ! y + fcmpd %f2,%f0 ! x:y + PIC_SET(l7,.Lconstant,l0) + sethi %hi(0x80000000),%l1 + andn %i0,%l1,%l4 + fbe 9f ! next_return + nop + fbu,a 9f ! next_return + fmuld %f2,%f0,%f0 ! + -> * for Cheetah + orcc %i1,%l4,%g0 ! see if x is zero + bne 1f + tst %i0 + ! x is zero, return sign(y)*min + and %i2,%l1,%i0 + ba 4f ! next_final + mov 1,%i1 +1: bge 2f + nop + ! x is negative + fbl 1f ! next_subulp + nop + fbg 3f ! next_addulp + nop +2: + fbl 3f ! next_addulp + nop +1: ! next_subulp + subcc %i1,1,%i1 + ba 4f ! next_final + subx %i0,0,%i0 +3: ! next_addulp + addcc %i1,1,%i1 + addx %i0,0,%i0 +4: ! next_final + sethi %hi(0x7ff00000),%l3 + std %i0,[%fp+x] + andcc %i0,%l3,%i2 + be,a 1f ! xflow + ldd [%l0+tiny],%f2 + cmp %i2,%l3 + bne,a 9f ! next_return + ldd [%fp+x],%f0 + call mNAME(_SVID_libm_err) ! overflow + or %g0,46,%o4 + ba 9f + nop +1: ! xflow + fmuld %f2,%f2,%f2 + ldd [%fp+x],%f0 +9: ! next_return + ret + restore + + SET_SIZE(nextafter) diff --git a/usr/src/libm/src/sparc/v9/libm.m4 b/usr/src/libm/src/sparc/v9/libm.m4 new file mode 100644 index 0000000..bf5ec75 --- /dev/null +++ b/usr/src/libm/src/sparc/v9/libm.m4 @@ -0,0 +1,1278 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, include this CDDL HEADER in each +! file and include the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! +! @(#)libm.m4 1.28 06/01/31 SMI +! +undefine(`_C')dnl +define(`_C',`')dnl +ifdef(`ARCH_v9a', `define(ARCH_v9)')dnl +ifdef(`ARCH_v9b', `define(ARCH_v9)')dnl +ifdef(`ARCH_v9b', `define(ARCH_v9a)')dnl +ifdef(`LOCALLIBM', `dnl + .inline __ieee754_sqrt,1 + fsqrtd %f0,%f0 + .end + + .inline __inline_sqrtf,1 + fsqrts %f1,%f0 + .end + + .inline __inline_sqrt,1 + fsqrtd %f0,%f0 + .end + +')dnl + .inline sqrtf,1 + fsqrts %f1,%f0 + .end + + .inline sqrt,1 + fsqrtd %f0,%f0 + .end + +ifdef(`ARCH_v9b', `dnl + .inline ceil,1 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^52 ? 2^52 : 0) + fand %f0,%f8,%f8 _C(copysign(0, x)) + for %f2,%f8,%f2 _C(copysign(fiddle, x)) + siam 6 + faddd %f0,%f2,%f0 _C(x + copysign(fiddle, x) rnd toward +Inf) + siam 4 + fsubd %f0,%f2,%f0 _C(" - copysign(fiddle, x) rnd to nearest) + siam 0 + for %f0,%f8,%f0 _C(in case previous fsubd gave +0) + .end + + .inline floor,1 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^52 ? 2^52 : 0) + fand %f0,%f8,%f8 _C(copysign(0, x)) + for %f2,%f8,%f2 _C(copysign(fiddle, x)) + siam 7 + faddd %f0,%f2,%f0 _C(x + copysign(fiddle, x) rounded down) + siam 4 + fsubd %f0,%f2,%f0 _C(" - copysign(fiddle, x) rnd to nearest) + siam 0 + for %f0,%f8,%f0 _C(in case previous fsubd gave +0) + .end + +',`dnl + .inline ceil,1 + sethi %hi(0x43300000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(f2:3 = 2^52) + fabsd %f0,%f4 _C(f4:5 = |x|) + fsubd %f2,%f2,%f6 _C(f6:7 = zero) + fcmpd %fcc0,%f4,%f2 + fbl,pt %fcc0,1f + nop + sethi %hi(0x3ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f6 _C(f6:7 = one) + fmuld %f0,%f6,%f0 _C(return x * one if |x| >= 2^52, NaN) + ba 4f + nop +1: + fcmpd %fcc1,%f0,%f6 _C(fcc1 = x : zero) + fbg,pt %fcc1,2f + nop + fbe,pn %fcc1,4f _C(return x if x is +/-zero) + nop + fnegd %f2,%f2 _C(L := f2:3 = copysign(2^52, x)) +2: + faddd %f0,%f2,%f4 _C(f4:5 = (x + L) rounded) + fsubd %f4,%f2,%f4 _C(t := f4:5 = (x + L) rounded - L) + fcmpd %fcc0,%f4,%f0 + fbge,pt %fcc0,3f + nop + sethi %hi(0x3ff00000),%o0 + st %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(f2:3 = one) + faddd %f4,%f2,%f4 _C(t = t + 1 if t < x) +3: + fabsd %f4,%f0 _C(f0:1 = |t|) + fbge,pt %fcc1,4f _C(at this point we know x is not +/-0) + nop + fnegd %f0,%f0 _C(return copysign(t, x)) +4: + .end + + .inline floor,1 + sethi %hi(0x43300000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(f2:3 = 2^52) + fabsd %f0,%f4 _C(f4:5 = |x|) + fsubd %f2,%f2,%f6 _C(f6:7 = zero) + fcmpd %fcc0,%f4,%f2 + fbl,pt %fcc0,1f + nop + sethi %hi(0x3ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f6 _C(f6:7 = one) + fmuld %f0,%f6,%f0 _C(return x * one if |x| >= 2^52) + ba 4f + nop +1: + fcmpd %fcc1,%f0,%f6 _C(fcc1 = x : zero) + fbg,pt %fcc1,2f + nop + fbe,pn %fcc1,4f _C(return x if x is +/-zero) + nop + fnegd %f2,%f2 _C(L := f2:3 = copysign(2^52, x)) +2: + faddd %f0,%f2,%f4 _C(f4:5 = (x + L) rounded) + fsubd %f4,%f2,%f4 _C(t := f4:5 = (x + L) rounded - L) + fcmpd %fcc0,%f4,%f0 + fble,pt %fcc0,3f + nop + sethi %hi(0x3ff00000),%o0 + st %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(f2:3 = one) + fsubd %f4,%f2,%f4 _C(t = t - 1 if t > x) +3: + fabsd %f4,%f0 _C(f0:1 = |t|) + fbge,pt %fcc1,4f _C(at this point we know x is not +/-0) + nop + fnegd %f0,%f0 _C(return copysign(t, x)) +4: + .end + +')dnl + .inline ilogb,1 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 _C(o1 = 0x7ff00000) + andcc %o0,%o1,%o0 + bne,pt %icc,2f + nop + sethi %hi(0x43500000),%o0 _C(x subnormal) + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(f2:3 = 2^54) + fmuld %f0,%f2,%f0 _C(scale x up by 2^54) + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + andcc %o0,%o1,%o0 + bne,pt %icc,1f + nop + sethi %hi(0x80000001),%o0 _C(return -(2^31 - 1) for x +/-0) + or %o0,%lo(0x80000001),%o0 + ba 4f + nop +1: + srl %o0,20,%o0 + sub %o0,0x435,%o0 + ba 4f + nop +2: + subcc %o1,%o0,%g0 + bne,pt %icc,3f + nop + sethi %hi(0x7fffffff),%o0 _C(return 2^31 - 1 for x +/-Inf or NaN) + or %o0,%lo(0x7fffffff),%o0 + ba 4f + nop +3: + srl %o0,20,%o0 + sub %o0,0x3ff,%o0 +4: + .end + +ifdef(`ARCH_v9a', `dnl + .inline rint,1 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^52 ? 2^52 : 0) + fand %f0,%f8,%f8 _C(copysign(0, x)) + for %f2,%f8,%f2 _C(copysign(fiddle, x)) + faddd %f0,%f2,%f0 _C(x + copysign(fiddle, x)) + fsubd %f0,%f2,%f0 _C(" - copysign(fiddle, x)) + fabsd %f0,%f0 + for %f0,%f8,%f0 _C(in case previous fsubd gave wrong sign of 0) + .end + + .inline rintf,1 + fzeros %f4 _C(0) + fnegs %f4,%f8 _C(-0) + fabss %f1,%f6 _C(|x|) + sethi %hi(0x4b000000),%o2 + st %o2,[%sp+0x87f] + ld [%sp+0x87f],%f2 _C(2^23) + fcmps %fcc0,%f6,%f2 + fmovsuge %fcc0,%f4,%f2 _C(fiddle := |x| < 2^23 ? 2^23 : 0) + fands %f1,%f8,%f8 _C(copysignf(0, x)) + fors %f2,%f8,%f2 _C(copysignf(fiddle, x)) + fadds %f1,%f2,%f0 _C(x + copysignf(fiddle, x)) + fsubs %f0,%f2,%f0 _C(" - copysignf(fiddle, x)) + fabss %f0,%f0 + fors %f0,%f8,%f0 _C(in case previous fsubs gave wrong sign of 0) + .end + +',`dnl + .inline rint,1 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 _C(x) + sethi %hi(0x80000000),%o2 + sllx %o2,32,%o2 + andn %o0,%o2,%o2 + sethi %hi(0x43300000),%o3 + sllx %o3,32,%o3 + stx %g0,[%sp+0x887] + subcc %o2,%o3,%g0 + bl,pt %xcc,1f + nop + sethi %hi(0x3ff00000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x887] + ldd [%sp+0x887],%f2 + fmuld %f0,%f2,%f0 _C(return x * one (raise flag if SNaN)) + ba 3f + nop +1: + orcc %o0,0,%g0 + stx %o3,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(L = copysign(two52, x)) + bge,pt %xcc,2f + nop + fnegd %f2,%f2 +2: + faddd %f0,%f2,%f0 _C((x + L) rounded) + fcmpd %f0,%f2 + fbne,pt %fcc0,0f + nop + ldd [%sp+0x887],%f0 _C(return copysign(zero, x)) + bge,pt %xcc,3f + nop + fnegd %f0,%f0 + ba 3f + nop +0: + fsubd %f0,%f2,%f0 _C(return (x + L) rounded - L) +3: + .end + + .inline rintf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 _C(x) + sethi %hi(0x80000000),%o2 + andn %o0,%o2,%o2 + sethi %hi(0x4b000000),%o3 + st %g0,[%sp+0x887] + subcc %o2,%o3,%g0 + bl 1f + nop + sethi %hi(0x3f800000),%o2 + st %o2,[%sp+0x887] + ld [%sp+0x887],%f2 + fmuls %f1,%f2,%f0 _C(return x * one (raise flag if SNaN)) + ba 3f + nop +1: + tst %o0 + st %o3,[%sp+0x87f] + ld [%sp+0x87f],%f2 _C(L = copysignf(two23, x)) + bge 2f + nop + fnegs %f2,%f2 +2: + fadds %f1,%f2,%f0 _C((x + L) rounded) + fcmps %f0,%f2 + fbne 0f + nop + ld [%sp+0x887],%f0 _C(return copysignf(zero, x)) + bge 3f + nop + fnegs %f0,%f0 + ba 3f + nop +0: + fsubs %f0,%f2,%f0 _C(return (x + L) rounded - L) +3: + .end + +')dnl + .inline min_subnormal,1 + or %g0,1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline min_subnormalf,1 + or %g0,1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline max_subnormal,1 + xnor %g0,%g0,%o0 + srlx %o0,12,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline max_subnormalf,1 + xnor %g0,%g0,%o0 + srl %o0,9,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline min_normal,1 + sethi %hi(0x00100000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline min_normalf,1 + sethi %hi(0x00800000),%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline max_normal,1 + sethi %hi(0x80100000),%o1 + sllx %o1,32,%o1 + xnor %g0,%g0,%o0 + andn %o0,%o1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline max_normalf,1 + sethi %hi(0x7f7ffc00),%o0 + or %o0,0x3ff,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __infinity,1 + sethi %hi(0x7ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline infinity,1 + sethi %hi(0x7ff00000),%o0 + sllx %o0,32,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline infinityf,1 + sethi %hi(0x7f800000),%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline signaling_nan,1 + sethi %hi(0x7ff00000),%o0 + sllx %o0,32,%o0 + or %o0,0x1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline signaling_nanf,1 + sethi %hi(0x7f800000),%o0 + or %o0,1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline quiet_nan,1 + xnor %g0,%g0,%o0 + srlx %o0,1,%o0 + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline quiet_nanf,1 + xnor %g0,%g0,%o0 + srl %o0,1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline __swapEX,1 + and %o0,0x1f,%o1 + sll %o1,5,%o1 + .volatile + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + andn %o0,0x3e0,%o2 + or %o1,%o2,%o1 + st %o1,[%sp+0x87f] + ld [%sp+0x87f],%fsr + srl %o0,5,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline _QgetRD,0 + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,30,%o0 + .end + + .inline _QgetRP,0 + or %g0,%g0,%o0 + .end + + .inline __swapRD,1 + and %o0,0x3,%o0 + sll %o0,30,%o1 + .volatile + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0xc0000000),%o4 + andn %o0,%o4,%o2 + or %o1,%o2,%o1 + st %o1,[%sp+0x87f] + ld [%sp+0x87f],%fsr + srl %o0,30,%o0 + and %o0,0x3,%o0 + .nonvolatile + .end + +! +! On the SPARC, __swapRP is a no-op; always return 0 for backward compatibility +! + .inline __swapRP,1 + or %g0,%g0,%o0 + .end + + .inline __swapTE,1 + and %o0,0x1f,%o0 + sll %o0,23,%o1 + .volatile + st %fsr,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x0f800000),%o4 + andn %o0,%o4,%o2 + or %o1,%o2,%o1 + st %o1,[%sp+0x87f] + ld [%sp+0x87f],%fsr + srl %o0,23,%o0 + and %o0,0x1f,%o0 + .nonvolatile + .end + + .inline fp_class,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + orcc %g0,%o0,%g0 + be,pn %xcc,2f _C(x is +/-zero) + nop + sethi %hi(0x7ff00000),%o1 + sllx %o1,32,%o1 _C(o1 gets 7ff00000 00000000) + andcc %o0,%o1,%g0 _C(cc set by exp field of x) + bne,pt %xcc,1f _C(branch if normal or max exp) + nop + or %g0,1,%o0 + ba 2f _C(x is subnormal) + nop +1: + subcc %o0,%o1,%g0 + bge,pn %xcc,1f _C(branch if x is max exp) + nop + or %g0,2,%o0 + ba 2f _C(x is normal) + nop +1: + andncc %o0,%o1,%o0 _C(o0 gets significand) + bne,pn %xcc,1f _C(branch if NaN) + nop + or %g0,3,%o0 + ba 2f _C(x is infinity) + nop +1: + sethi %hi(0x00080000),%o1 + sllx %o1,32,%o1 + andcc %o0,%o1,%g0 _C(cc set by quiet/sig bit) + or %g0,4,%o0 _C(x is quiet NaN) + bne,pt %xcc,2f _C(Branch if signaling) + nop + or %g0,5,%o0 _C(x is signaling NaN) +2: + .end + + .inline fp_classf,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + orcc %g0,%o0,%g0 + be,pn %icc,2f + nop +1: + sethi %hi(0x7f800000),%o1 + andcc %o0,%o1,%g0 + bne,pt %icc,1f + nop + or %g0,1,%o0 + ba 2f _C(x is subnormal) + nop +1: + subcc %o0,%o1,%g0 + bge,pn %icc,1f + nop + or %g0,2,%o0 + ba 2f _C(x is normal) + nop +1: + bg,pn %icc,1f + nop + or %g0,3,%o0 + ba 2f _C(x is infinity) + nop +1: + sethi %hi(0x00400000),%o1 + andcc %o0,%o1,%g0 + or %g0,4,%o0 _C(x is quiet NaN) + bne,pt %icc,2f + nop + or %g0,5,%o0 _C(x is signaling NaN) +2: + .end + + .inline copysign,2 + fabsd %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + st %f2,[%sp+0x887] + ld [%sp+0x887],%o1 + srl %o1,31,%o1 + sll %o1,31,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 _C(f1 stays unchanged) + .end + + .inline copysignf,2 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + st %f3,[%sp+0x887] + ld [%sp+0x887],%o1 + srl %o1,31,%o1 + sll %o1,31,%o1 + or %o0,%o1,%o0 + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + + .inline finite,1 + fabsd %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + sub %o0,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline finitef,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7f800000),%o1 + sub %o0,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline signbit,1 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,31,%o0 + .end + + .inline signbitf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,31,%o0 + .end + + .inline isinf,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + sllx %o1,32,%o1 + sub %o0,%o1,%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srlx %o0,63,%o0 + .end + + .inline isinff,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7f800000),%o1 + sub %o0,%o1,%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srl %o0,31,%o0 + .end + + .inline isnan,1 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sllx %o0,1,%o0 _C(shift off sign bit; see 4837702) + srlx %o0,1,%o0 + sethi %hi(0x7ff00000),%o1 + sllx %o1,32,%o1 + sub %o1,%o0,%o0 + srlx %o0,63,%o0 + .end + + .inline isnanf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x80000000),%o2 _C(mask off sign bit) + andn %o0,%o2,%o0 + sethi %hi(0x7f800000),%o1 + sub %o1,%o0,%o0 + srl %o0,31,%o0 + .end + + .inline isnormal,1 + fabsd %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7ff00000),%o1 + sub %o0,%o1,%o2 _C(signbit(o2): finite) + sethi %hi(0x00100000),%o1 + sub %o0,%o1,%o1 _C(signbit(o1): subnormal or 0) + andn %o2,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline isnormalf,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x7f800000),%o1 + sub %o0,%o1,%o2 + sethi %hi(0x00800000),%o1 + sub %o0,%o1,%o1 + andn %o2,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline issubnormal,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sethi %hi(0x00100000),%o1 + sllx %o1,32,%o1 + sub %o0,%o1,%o1 + sub %g0,%o0,%o2 + or %o0,%o2,%o0 + and %o0,%o1,%o0 + srlx %o0,63,%o0 + .end + + .inline issubnormalf,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sethi %hi(0x00800000),%o1 + sub %o0,%o1,%o1 + sub %g0,%o0,%o2 + or %o0,%o2,%o0 + and %o0,%o1,%o0 + srl %o0,31,%o0 + .end + + .inline iszero,1 + fabsd %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srlx %o0,63,%o0 + .end + + .inline iszerof,1 + fabss %f1,%f1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sub %g0,%o0,%o1 + or %o0,%o1,%o0 + xnor %o0,%g0,%o0 + srl %o0,31,%o0 + .end + + .inline abs,1 + sra %o0,31,%o1 + xor %o0,%o1,%o0 + sub %o0,%o1,%o0 + sra %o0,0,%o0 _C(sign-extended 64-bit value) + .end + + .inline fabs,1 + fabsd %f0,%f0 + .end + + .inline fabsf,1 + fabss %f1,%f0 + .end + +! +! __nintf - f77 NINT(REAL*4) +! + .inline __nintf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + srl %o0,30-7,%g1 + sethi %hi(0x7fffff),%o2 + and %g1,0xff,%g1 _C(%g1 := biased exponent) + or %o2,%lo(0x7fffff),%o2 _C(%o2 := 0x7fffff) + sethi %hi(1<<22),%o4 _C(%o4 := 0x400000) + subcc %g1,127+31,%g0 _C(< 0 iff |x| < 2^31) + and %o0,%o2,%o3 _C(%o3 := mantissa) + bl 1f + nop + sethi %hi(0xcf000000),%o2 _C(%o2 := -2^31 in floating point) + sethi %hi(0x80000000),%g1 _C(%g1 := -2^31 in fixed point) + subcc %o0,%o2,%g0 _C(x == -2^31?) + or %g1,%g0,%o0 _C(return -2^31 if x == -2^31) + be 0f + nop + fstoi %f1,%f0 _C(return result and trigger fp_invalid) + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 +0: + sra %o0,0,%o0 _C(%o0 := sign-extended 64-bit value) + ba 9f + nop +1: + add %o4,%o4,%o5 _C(%o5 := 0x800000) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + sra %o0,31-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,127,%g1 _C(%g1 := e) + srl %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + add %o3,%o4,%o3 _C(%o3 := mantissa + 0.5 in fixed point) + or %g0,23,%o0 _C(%o0 := 23) + subcc %o0,%g1,%o0 _C(%o0 := 23 - e) + bl 1f _C(if 0 <= e <= 23) + nop + srl %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) + ba 2f + nop +1: + sub %g0,%o0,%o0 _C(%o0 := e - 23) + sll %o3,%o0,%o0 +2: + xor %o0,%o2,%o0 + sra %o0,0,%o0 _C(%o0 := sign-extended 64-bit value) + and %o2,1,%o2 _C(%o2 := 0/1) + add %o0,%o2,%o0 +9: + .end + + .inline __il_nint,1 + ld [%o0],%o0 _C(%o0 := s*1.f*2^e) + sra %o0,0,%o0 _C(sign-extend to 64-bit %o0) + srlx %o0,31-8,%g1 + or %g0,1,%o2 + sllx %o2,23-1,%o4 _C(%o4 := 0x00000000 00400000) + and %g1,0xff,%g1 _C(%g1 := biased exponent) + sllx %o2,63-0,%o2 + subcc %g1,127+63,%g0 _C(>= 0 iff |x| >= 2^63) + bl 0f + nop + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + fstox %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + ba 9f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x00000000 00800000) + srax %o2,63-23,%o2 + sub %g1,127+23,%o1 _C(%o1 >= 0 iff |x| >= 2^23) + xnor %o2,%g0,%o2 _C(%o2 := 0x00000000 007fffff) + and %o0,%o2,%o3 _C(%o3 := mantissa) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + srax %o0,63-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,127,%g1 _C(%g1 := e) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + brlz,pt %o1,3f + nop _C(2^23 <= |x| < 2^63) + sub %g1,23,%o0 _C(%o0 := e - 23) + sllx %o3,%o0,%o0 _C(%o0 := int(|x|)) + ba 2f + nop +3: + srlx %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + add %o3,%o4,%o3 _C(%o3 := mantissa w/HLB + 0.5 in fixed point) + or %g0,23,%o0 _C(%o0 := 23) + sub %o0,%g1,%o0 _C(%o0 := 23 - e) + srlx %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 +9: + .end + +! +! __i_dnnt - f77 NINT(REAL*8) +! + .inline __i_dnnt,1 + ldx [%o0],%o0 _C(%o0 := s*1.f*2^e) + srlx %o0,63-11,%g1 + or %g0,1,%o2 + stx %o0,[%sp+0x87f] _C(prepare for reload if x is out of range) + sllx %o2,52-1,%o4 _C(%o4 := 0x00080000 00000000) + and %g1,0x7ff,%g1 _C(%g1 := biased exponent) + sllx %o2,63-0,%o2 + subcc %g1,1023+32,%g0 _C(>= 0 iff |x| >= 2^32) + bl 0f + nop + ldd [%sp+0x87f],%f0 + ba 8f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x00100000 00000000) + srax %o2,63-52,%o2 + sub %g1,1023+30,%o1 _C(%o1 >= 0 iff |x| >= 2^30) + xnor %o2,%g0,%o2 _C(%o2 := 0x000fffff ffffffff) + and %o0,%o2,%o3 _C(%o3 := mantissa) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + srax %o0,63-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,1023,%g1 _C(%g1 := e) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + srlx %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + add %o3,%o4,%o3 _C(%o3 := mantissa w/HLB + 0.5 in fixed point) + or %g0,52,%o0 _C(%o0 := 52) + sub %o0,%g1,%o0 _C(%o0 := 52 - e) + srlx %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 + brlz,pt %o1,9f + nop + stx %o0,[%sp+0x87f] _C(2^30 <= |x| < 2^32) + ldd [%sp+0x87f],%f0 + fxtod %f0,%f0 +8: + fdtoi %f0,%f0 + st %f0,[%sp+0x87f] + ld [%sp+0x87f],%o0 + sra %o0,0,%o0 _C(%o0 := sign-extended 64-bit value) +9: + .end + + .inline __il_dnnt,1 + ldx [%o0],%o0 _C(%o0 := s*1.f*2^e) + srlx %o0,63-11,%g1 + or %g0,1,%o2 + sllx %o2,52-1,%o4 _C(%o4 := 0x00080000 00000000) + and %g1,0x7ff,%g1 _C(%g1 := biased exponent) + sllx %o2,63-0,%o2 + subcc %g1,1023+63,%g0 _C(>= 0 iff |x| >= 2^63) + bl 0f + nop + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + fdtox %f0,%f0 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + ba 9f + nop +0: + add %o4,%o4,%o5 _C(%o5 := 0x00100000 00000000) + srax %o2,63-52,%o2 + sub %g1,1023+52,%o1 _C(%o1 >= 0 iff |x| >= 2^52) + xnor %o2,%g0,%o2 _C(%o2 := 0x000fffff ffffffff) + and %o0,%o2,%o3 _C(%o3 := mantissa) + or %o3,%o5,%o3 _C(%o3 := mantissa w/hidden leading bit) + srax %o0,63-0,%o2 _C(%o2 := 0/-1 = copysign(0.5, x) - 0.5) + subcc %g1,1023,%g1 _C(%g1 := e) + bge 1f _C(if |x| >= 1.0) + nop + subcc %g1,-1,%g0 + or %g0,0,%o0 _C(return 0 if |x| < 0.5) + bne 2f + nop + or %g0,1,%o0 _C(return 1 if 0.5 <= |x| < 1) + ba 2f + nop +1: + brlz,pt %o1,3f + nop _C(2^52 <= |x| < 2^63) + sub %g1,52,%o0 _C(%o0 := e - 52) + sllx %o3,%o0,%o0 _C(%o0 := int(|x|)) + ba 2f + nop +3: + srlx %o4,%g1,%o4 _C(%o4 := 0.5 in fixed point) + add %o3,%o4,%o3 _C(%o3 := mantissa w/HLB + 0.5 in fixed point) + or %g0,52,%o0 _C(%o0 := 52) + sub %o0,%g1,%o0 _C(%o0 := 52 - e) + srlx %o3,%o0,%o0 _C(%o0 := int(|x| + 0.5)) +2: + xor %o0,%o2,%o0 + sub %o0,%o2,%o0 +9: + .end + +ifdef(`ARCH_v9a', `dnl + .inline __aintf,1 + fzeros %f4 _C(0) + fnegs %f4,%f8 _C(-0) + fabss %f1,%f6 _C(|x|) + sethi %hi(0x4b000000),%o2 + st %o2,[%sp+0x87f] + ld [%sp+0x87f],%f2 _C(2^23) + fcmps %fcc0,%f6,%f2 + fmovsuge %fcc0,%f4,%f6 _C(|x| < 2^23 ? |x| : 0) + fstoi %f6,%f6 _C(truncate to integer) + fitos %f6,%f6 + fadds %f1,%f4,%f2 _C(x + 0) + fmovsuge %fcc0,%f2,%f6 _C(|x| < 2^23 ? truncf(|x|) : x + 0) + fands %f1,%f8,%f0 _C(copysignf(0, x)) + fors %f0,%f6,%f0 _C(restore sign of x) + .end + + .inline __aint,1 + fzero %f4 _C(0) + fnegd %f4,%f8 _C(-0) + fabsd %f0,%f6 _C(|x|) + sethi %hi(0x43300000),%o2 + sllx %o2,32,%o2 + stx %o2,[%sp+0x87f] + ldd [%sp+0x87f],%f2 _C(2^52) + fcmpd %fcc0,%f6,%f2 + fmovduge %fcc0,%f4,%f6 _C(|x| < 2^52 ? |x| : 0) + fdtox %f6,%f6 _C(truncate to integer) + fxtod %f6,%f6 + faddd %f0,%f4,%f2 _C(x + 0) + fmovduge %fcc0,%f2,%f6 _C(|x| < 2^52 ? trunc(|x|) : x + 0) + fand %f0,%f8,%f0 _C(copysign(0, x)) + for %f0,%f6,%f0 _C(restore sign of x) + .end + +')dnl +dnl!float +dnl!__anintf(float x) { +dnl! unsigned u = *(unsigned *) &x; +dnl! unsigned v = 0x95 - (unsigned) ((u >> 23U) & 0xffU); +dnl! unsigned t = 1U << v; +dnl! unsigned s = t - 1; +dnl! /* +dnl! * v := 22 - e +dnl! */ +dnl! if (v < 23U) { /* 0 <= e <= 22 */ +dnl! t &= u; +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (v == 23U) { /* e == -1 */ +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (*(int *) &v > 23) /* e <= -2 */ +dnl! u &= 0x80000000; +dnl! return *(float *) &u; +dnl!} + .inline __anintf,1 + st %f1,[%sp+0x87f] + ld [%sp+0x87f],%o0 + or %g0,1,%o1 + srl %o0,23,%g1 + and %g1,0xff,%g1 + sub %g0,%g1,%g1 + add %g1,0x95,%g1 + subcc %g1,23,%g0 + sll %o1,%g1,%o1 + sub %o1,1,%o2 + bcs 1f + nop + be 2f + nop + bl 3f + nop + sethi %hi(0x80000000),%o1 + and %o0,%o1,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + st %o0,[%sp+0x87f] + ld [%sp+0x87f],%f0 + .end + +dnl!double +dnl!__anint(double x) { +dnl! unsigned long long u = *(unsigned long long *) &x; +dnl! unsigned v = 0x432 - (unsigned) ((u >> 52U) & 0x7ffU); +dnl! unsigned long long t = 1ULL << v; +dnl! unsigned long long s = t - 1; +dnl! /* +dnl! * v := 51 - e +dnl! */ +dnl! if (v < 52U) { /* 0 <= e <= 51 */ +dnl! t &= u; +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (v == 52U) { /* e == -1 */ +dnl! u += t; +dnl! u &= ~s; +dnl! } +dnl! else if (*(int *) &v > 52) /* e <= -2 */ +dnl! u = (u >> 63) << 63; +dnl! return *(double *) &u; +dnl!} + .inline __anint,1 + std %f0,[%sp+0x87f] + ldx [%sp+0x87f],%o0 + or %g0,1,%o1 + srlx %o0,52,%g1 + and %g1,0x7ff,%g1 + sub %g0,%g1,%g1 + add %g1,0x432,%g1 + subcc %g1,52,%g0 + sllx %o1,%g1,%o1 + sub %o1,1,%o2 + bcs,pt %icc,1f + nop + be,pt %icc,2f + nop + bl,pt %icc,3f + nop + srlx %o0,63,%o0 + sllx %o0,63,%o0 + ba 3f + nop +1: + and %o0,%o1,%o1 +2: + add %o0,%o1,%o0 + andn %o0,%o2,%o0 +3: + stx %o0,[%sp+0x87f] + ldd [%sp+0x87f],%f0 + .end + + .inline __r_dim,2 +ifdef(`ARCH_v9a', `dnl + fzeros %f4 +',`dnl + st %g0,[%sp+0x87f] + ld [%sp+0x87f],%f4 +')dnl + ld [%o0],%f0 + ld [%o1],%f2 + fcmps %fcc0,%f0,%f2 + fmovsule %fcc0,%f4,%f2 + fsubs %f0,%f2,%f0 + fmovsule %fcc0,%f4,%f0 + .end + + .inline __d_dim,2 +ifdef(`ARCH_v9a', `dnl + fzero %f4 +',`dnl + stx %g0,[%sp+0x87f] + ldd [%sp+0x87f],%f4 +')dnl + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fcmpd %fcc0,%f0,%f2 + fmovdule %fcc0,%f4,%f2 + fsubd %f0,%f2,%f0 + fmovdule %fcc0,%f4,%f0 + .end + +ifdef(`ARCH_v9a', `dnl + .inline __f95_signf,2 + fzeros %f2 + fnegs %f2,%f2 + ld [%o0],%f0 + ld [%o1],%f1 + fabss %f0,%f0 + fands %f1,%f2,%f1 + fors %f0,%f1,%f0 + .end + + .inline __f95_sign,2 + fzero %f4 + fnegd %f4,%f4 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%f2 + ld [%o1+4],%f3 + fabsd %f0,%f0 + fand %f2,%f4,%f2 + for %f0,%f2,%f0 + .end + +',`dnl + .inline __f95_signf,2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sra %o1,0,%o1 _C(sign-extend to 64-bit %o1) + fmovrslz %o1,%f1,%f0 + .end + + .inline __f95_sign,2 + ld [%o0],%f0 + ld [%o0+4],%f1 + ld [%o1],%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sra %o1,0,%o1 _C(sign-extend to 64-bit %o1) + fmovrdlz %o1,%f2,%f0 + .end + +')dnl + .inline __r_sign,2 + ld [%o0],%f0 + ld [%o1],%o1 + fabss %f0,%f0 + fnegs %f0,%f1 + sub %o1,1,%o0 + and %o1,%o0,%o1 _C(%o1 < 0 iff A2 is negative and not -0) + sra %o1,0,%o1 _C(sign-extend to 64-bit %o1) + fmovrslz %o1,%f1,%f0 + .end + + .inline __d_sign,2 + ldd [%o0],%f0 + ldx [%o1],%o1 + fabsd %f0,%f0 + fnegd %f0,%f2 + sub %o1,1,%o0 + and %o1,%o0,%o1 _C(%o1 < 0 iff A2 is negative and not -0) + fmovrdlz %o1,%f2,%f0 + .end + +! +! complex __Fc_div_f(complex a, complex b); +! + .inline __Fc_div_f,0 +ifdef(`ARCH_v9a', `dnl + fzeros %f4 +',`dnl + st %g0,[%sp+0x87f] + ld [%sp+0x87f],%f4 +')dnl + fcmps %fcc0,%f3,%f4 _C(will trigger fp_invalid on SNaN) + fbne,pn %fcc0,1f + nop + fdivs %f0,%f2,%f0 + fdivs %f1,%f2,%f1 + ba 2f + nop +1: + sethi %hi(0x3ff00000),%o0 _C(the cg inliner circa Lionel FCS) + sllx %o0,32,%o0 _C([aka WS6U1 but not before] maps) + stx %o0,[%sp+0x87f] _C(the idiom to an LDDF of 1.0) + ldd [%sp+0x87f],%f16 _C(from a constant pool) + fsmuld %f2,%f2,%f4 _C(f4/5 gets reb**2) + fsmuld %f3,%f3,%f6 _C(f6/7 gets imb**2) + fsmuld %f1,%f3,%f8 _C(f8/9 gets ima*imb) + fsmuld %f0,%f2,%f10 _C(f10/11 gets rea*reb) + faddd %f6,%f4,%f6 _C(f6/7 gets reb**2+imb**2) + fdivd %f16,%f6,%f6 _C(f6/7 gets 1/(reb**2+imb**2)) + faddd %f10,%f8,%f10 _C(f10/11 gets rea*reb+ima*imb) + fsmuld %f1,%f2,%f12 _C(f12/13 gets ima*reb) + fmuld %f10,%f6,%f10 _C(f10/11 gets rec) + fsmuld %f0,%f3,%f14 _C(f14/15 gets rea*imb) + fsubd %f12,%f14,%f14 _C(f14/15 gets ima*reb-rea*imb) + fmuld %f14,%f6,%f6 _C(f6/7 gets imc) + fdtos %f10,%f0 _C(f0 gets rec) + fdtos %f6,%f1 _C(f1 gets imc) +2: + .end + diff --git a/usr/src/libm/wos/Integration.log b/usr/src/libm/wos/Integration.log new file mode 100644 index 0000000..7c99446 --- /dev/null +++ b/usr/src/libm/wos/Integration.log @@ -0,0 +1,140 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Integration.log 1.80 06/01/31 SMI" +# +# This log documents *only* bug fixes for Solaris WOS. +# Use libm/Integration.log for bug fixes for the current compiler train. +# +# Integration.log format: +# +# Lines starting with # are comment lines +# +# Bug-fix lines have this format: +# BugID STATUS Synopsis +# +# Bugs in any Solaris build must have a valid BugId, otherwise BugID may be +# in the form of YYMMDDX where YYMMDD is the day the fix is integrated and +# X is an upper-case letter. +# +# The only STATUS we have so far is FIXED. +# +# Example: +# +# 4312345 FIXED test case 47 dumps core + +4905282 FIXED remquo can return quotient bits with the wrong sign +030817A FIXED hypot(5,12) raises inexact improperly +030819A FIXED prototype isnan only when !defined(_STDC_C99) +4910901 FIXED tgammal(-Inf) returned -Inf and failed to raise FE_INALID +4910956 FIXED SUSv2 math routines should always set errno when required +030826A FIXED remquo and remquof should not call _SVID_* anymore +4916300 FIXED nextafterl(LDBL_MIN,0.0L) failed to raise FE_UNDERFLOW +030903A FIXED LD/{cos,sin}hl.c cleanup +030905A FIXED LD/{sin,cos,tan,__rem_pio2}l.c to use 64-bit table values & argred +030909A FIXED compile libm with -xarch=v8plus +030910A FIXED {LD,Q}/erfl.c minor accuracy and speed improvement +4933436 FIXED C99 math library header changes caused build failure +4934744 FIXED sparcv9/llib-lm does not belong +4936167 FIXED cacosh{,f,l} C99 Annex G violations +4936171 FIXED clogl C99 Annex G violations +4944556 FIXED cacos delivers wrong result +4947982 FIXED clog and clogl can lose accuracy +4948551 FIXED cacosl/casinl/cacoshl/casinhl sometimes returns chopped pi/2 +4947145 FIXED historic math routines should be moved out of libc. +4951175 FIXED log1p(QNaN) can raise invalid on x86 when built correctly +4951240 FIXED cpow family of routines can be more accurate +031121A FIXED isnand should be in llib-lm as per LSARC/2003/658 +4964166 FIXED libmvec should be part of Solaris +4964810 FIXED SUNWlibm pkg description is misleading +4961179 FIXED libmvec should not depend on libm +4966464 FIXED cpowl can be more accurate +4967960 FIXED libmvec vsin(-0) sometimes returns +0 in v8plus mode +4973671 FIXED typedef of float_t and double_t must honor __FLT_EVAL_METHOD__ +4975159 FIXED clog in should be redirected via redefine_extname +4980302 FIXED cpowl can produce bogus results +4986728 FIXED cpowf can deliver 0 or Inf real/imag part with the wrong sign +4987768 FIXED x86: cpowl delivered inaccurate results unexpectedly +5052359 FIXED *x86* frexp(any subnormal) gets the same wrong exponent -16446 +5052857 FIXED *x86* powl(-0.L,-1.0L) returned +Inf in error +5063618 FIXED "using std::abs;" missing from +5064741 FIXED *x86* acosl(large) and asinl(large) raise spurious exceptions +5065197 FIXED *x86* pow(NAN,NAN) raises invalid exception in C99 mode in error +040317A FIXED *x86* libm.il: removed __swap{EX,TE,RD}; __swapRP->locallibm.il +040503A FIXED *x86* made __fenv_{get,set}fsr MXCSR-aware +040604A FIXED *x86* libm.il: __swapRD->__swap87RD->locallibm.il for LD functions +040604B FIXED log10_2hi in log10l.c wrong all along +040608A FIXED made libmvec C source either-endian +040615A FIXED removed reference to _lib_version from sqrtf +5067038 FIXED *x86* FE_DFL_ENV does not match actual start-up FP environment +5069838 FIXED *x86* libm.so references SUNWprivate libc symbols __swap{EX,RD,TE} +5074578 FIXED sys/ieeefp.h fenv.h iso/math_c99.h need to be made amd64-aware +5075799 FIXED *x86* expl(subnormal) raises spurious underflow +5075800 FIXED *x86* coshl(subnormal) raises spurious underflow +040727A FIXED *x86* made LD/{__{sin,cos,sincos,tan},cbrt}l.c 64-bit safe +4836936 FIXED assembly-coded routines should avoid raws +5093901 FIXED *x86* libmvec should be part of Solaris 10 +5106211 FIXED iso/math_c99.h violates C++ compiler rule about pragma placement +5106831 FIXED prototype mismatch in fenv.h when FEX_SIGNAL handler used +6179526 FIXED struct exception incompatible with class exception +6183512 FIXED math headers need to also key on __C99FEATURES__ for C99 features +6183572 FIXED *x86* SSE+SSE2 support needed in fex_* routines +6193239 FIXED *x86* vsincos can return bogus values +041121A FIXED added AMD copyright to SUNWlibmsr +041122A FIXED removed SSE2LINT; 32-bit x86 compile -xO2 -> -O +050113A FIXED retired log.S; updated fabs.S+atan2f.c+_TBL_log.c+log{,2,10}.c +050211A FIXED remainder(x,y) shouldn't set errno when x or y is NaN +050211A FIXED remainderf(x,y) should never set errno +050226A FIXED *x86* remainder must empty fp stack before calling _SVID_libm_err +050301A FIXED *x86* don't use fucomi in remainder +6234690 FIXED libm.so.2 expf(-100.0F) on US3 very slow even with -fns=yes +6231830 FIXED log functions raise spurious inexact exceptions +050521A FIXED make setting of errno match documentation +050525A FIXED *x86* clean up pow code; avoid some spurious inexact exceptions +050531A FIXED remove lorder/tsort; some _TBL_* objects are sparc-only +6231929 FIXED libm contains local symbols mdb dislikes +050621A FIXED nextafter.c: mask off sign bit for under/overflow detection +6285517 FIXED gcc 3.4.x/4.x c99 math intrinsics support needed +050622A FIXED __vhypot.c: fixed typo causing NaN to be returned in error +050622B FIXED __vrhypot.c: strip the sign of NaN returned +6290646 FIXED svvs/bsst - matherr failed due to wrong function name of 'lgamma' +050628A FIXED *amd64* anintl(-int+0.5L) rounds incorrectly +050630A FIXED __vrhypot.S: strip the sign of NaN returned +050708A FIXED rint.c: __swapRP is needed only when the x87 fpstack is in use +050824A FIXED undo 6233245 for non-sparc platforms +6333665 FIXED CDDL block needed in math headers +6333690 FIXED Put faster trig functions into libm +6334568 FIXED porting error in sincos +6336688 FIXED log(x) commits rounding error exceeding 1 ulp +051205A FIXED improve performance of some single precision functions +051212B FIXED add assembly coded __vsqrtf to amd64 libmvec +051226A FIXED fix x86 RP mode problems +051231A FIXED remove __ppc, HP_UX, _LIBM_CONSTANTS +060104A FIXED expf: fix performance regression for some small arguments +060115A FIXED remove unnecessary internal inline templates +060115B FIXED hide internal labels in x86 assembly code +060122A FIXED performance improvements in atan2, exp, rint +6375880 FIXED add CDDL to libm and libmvec source +060124A FIXED accuracy improvement in exp, performance improvement in fmod +060126A FIXED clean up __libx_errno.c and remove it from libsunmath +060126B FIXED performance improvements in remainder, scalbn, isnan, ilogb diff --git a/usr/src/libm/wos/Makefile b/usr/src/libm/wos/Makefile new file mode 100644 index 0000000..2cb5654 --- /dev/null +++ b/usr/src/libm/wos/Makefile @@ -0,0 +1,1124 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile 1.76 06/01/31 SMI" +# + +MCPU:sh = uname -p + +DESTDIR = destdir-$(MCPU) + +CG_sparc = v8plus +CG_i386 = f80387 +CG = $(CG_$(MCPU)) +CHIP = ultra +XARCH = $(CG) + +VnA = sparcv8plus+vis +VnB = sparcv9+vis2 +IA1 = libmvec +ISA_DIRS_sparc = $(VnA) $(VnB) +ISA_DIRS_i386 = $(IA1) +ISA_DIRS = $(ISA_DIRS_$(MCPU)) +FLTRTYPE_sparc = -f +FLTRTYPE_i386 = -f +FLTRTYPE = $(FLTRTYPE_$(MCPU)) +FLTRPATH_sparc = $$ORIGIN/cpu/$$ISALIST/libmvec_isa.so.1 +FLTRPATH_i386 = $$ORIGIN/libmvec/$$HWCAP +FLTRPATH = $(FLTRPATH_$(MCPU)) + +# +# system commands +# + +CPP_CMD = $(CC) -E -Xs +INSTALL = /usr/sbin/install +INST_CMD = $(INSTALL) $(OWNED_BY) -m 644 -f $(@D) $< +INST_EXEC_CMD = $(INSTALL) $(OWNED_BY) -m 755 -f $(@D) $< +LD = /usr/ccs/bin/ld +M4 = /usr/ccs/bin/m4 +MCS = /usr/ccs/bin/mcs +STRIP = /usr/ccs/bin/strip +TR = /usr/bin/tr + +# +# compiler-related symbols and flags +# + +OSREL:sh = /usr/bin/uname -r +POUND_SIGN:sh = /usr/bin/printf "\\043" +BUILDDATE:sh = /usr/bin/date +%m/%d/%Y +BUILDNAME = Generic +PATCHID = +BUILDINFO = SunOS $(OSREL) $(BUILDNAME) $(PATCHID) +LIBM_VERSION = "@($(POUND_SIGN))RELEASE VERSION $(BUILDINFO) $(BUILDDATE)" + +FPDEF_sparc = -DCG89 -DARCH_v8plus +FPDEF_i386 = -D__i386 +FPDEF = $(FPDEF_$(MCPU)) + +# +# above and beyond what CDEF defines; so -DELFOBJ -DPIC is removed +# +ASDEF = -D_ASM $(FPDEF) + +ASOPT_sparc = -xarch=$(XARCH) -Kpic +ASOPT_i386 = +ASOPT = $(ASOPT_$(MCPU)) + +ASSUFFIX_sparc = S +ASSUFFIX_i386 = s +ASSUFFIX = $(ASSUFFIX_$(MCPU)) + +CDEF_sparc = +CDEF_i386 = +CDEF = -DELFOBJ -DPIC -D_STDC_C99= -D_REENTRANT \ + -DLIBM_MT_FEX_SYNC $(CDEF_$(MCPU)) + +CINC = -I../inc -I$(SRC)/C + +# +# __INLINE turns on the use of __inline_sqrt in sqrt.c +# +COPT_sparc = -D__INLINE -xO4 -xregs=no%appl -xarch=$(XARCH) -xchip=ultra3 +COPT_i386 = -D__INLINE -O +COPT = -Xa -Kpic -xstrconst $(COPT_$(MCPU)) $(LM_IL) + +# +# $(LDEND) must come *last* when link editing with versioning; otherwise +# _lib_version, isnan, logb, nextafter, scalb will disappear from version +# information due to the fact that they are also part of libc +# +MAPFMV1 = mapfiles/libmv1-common +MAPFMV2 = mapfiles/libmv2-$(MCPU) +MAPFMVEC = mapfiles/libmvec-common +MAPFMVECA = mapfiles/libmvec-v8plusa +MAPFMVECB = mapfiles/libmvec-v8plusb +MAPFMVEC1 = mapfiles/libmvec-ia1 +MAPFILES_sparc = $(MAPFMV1) $(MAPFMV2) $(MAPFMVEC) $(MAPFMVECA) $(MAPFMVECB) +MAPFILES_i386 = $(MAPFMV1) $(MAPFMV2) $(MAPFMVEC) $(MAPFMVEC1) +MAPFILES = $(MAPFILES_$(MCPU)) +LDOPT0 = -z redlocsym +LDOPT = -dy -G $(LDOPT0) -z combreloc -z text -z defs -h $(@F) +LDEND = -lc + +APPFLAGS = $(ASDEF) -Wp,-P +ASFLAGS = $(ASOPT) +CFLAGS = $(COPT) +CPPFLAGS = $(CDEF) $(CINC) +LINTFLAGS = -Xa -u + +OWNED_BY = -u root + +M4FLAGS = -D__STDC__ -DELFOBJ -DPIC + +# +# SVR4 hack +# +CMD.S = $(CPP_CMD) $(APPFLAGS) $(CPPFLAGS) $< > $(CG)/$(@F:%.o=%.s); \ + $(COMPILE.S) -o $@ $(CG)/$(@F:%.o=%.s); \ + $(RM) $(CG)/$(@F:%.o=%.s) + +# +# Directories +# + +MDIR = $(MCPU) +USR = $(DESTDIR)/usr +USRLIB = $(USR)/lib +ROOTLIB = $(DESTDIR)/lib +SRC = ../src + +LDBLDIR_sparc = Q +LDBLDIR_i386 = LD +LDBLDIR = $(LDBLDIR_$(MCPU)) + +# +# Symbols and flags +# + +LLIB_LM = llib-lm.ln +M_LN = $(LLIB_LM:%=$(CG)/misc/%) + +LIBM_IL_SRC = $(SRC)/$(MDIR)/common/libm.m4 + +LOCALLIBM_IL = locallibm.il +LM_IL = $(LOCALLIBM_IL:%=$(CG)/misc/%) + +M9X_IL = __fenv_$(MCPU).il +m9x_IL = $(M9X_IL:%=$(SRC)/m9x/%) + +COBJS_i386 = \ + #end + +COBJS_sparc = \ + _TBL_atan.o \ + _TBL_exp2.o \ + _TBL_log.o \ + _TBL_log2.o \ + _TBL_tan.o \ + __tan.o \ + __tanf.o \ + #end +# +# atan2pi.o and sincospi.o is for internal use only +# +COBJS = \ + $(COBJS_$(MCPU)) \ + __cos.o \ + __lgamma.o \ + __libx_errno.o \ + __rem_pio2.o \ + __rem_pio2m.o \ + __sin.o \ + __sincos.o \ + __xpg6.o \ + _lib_version.o \ + _SVID_error.o \ + _TBL_ipio2.o \ + _TBL_sin.o \ + acos.o \ + acosh.o \ + asin.o \ + asinh.o \ + atan.o \ + atan2.o \ + atan2pi.o \ + atanh.o \ + cbrt.o \ + ceil.o \ + cos.o \ + cosh.o \ + erf.o \ + exp10.o \ + exp2.o \ + expm1.o \ + floor.o \ + gamma.o \ + gamma_r.o \ + hypot.o \ + j0.o \ + j1.o \ + jn.o \ + lgamma.o \ + lgamma_r.o \ + log.o \ + log10.o \ + log1p.o \ + log2.o \ + logb.o \ + matherr.o \ + pow.o \ + scalb.o \ + signgam.o \ + significand.o \ + sin.o \ + sincos.o \ + sincospi.o \ + sinh.o \ + sqrt.o \ + tan.o \ + tanh.o \ + #end + +QOBJS_i386 = \ + #end + +# +# LSARC/2003/658 adds isnanl +# +QOBJS_sparc = \ + _TBL_atanl.o \ + _TBL_expl.o \ + _TBL_expm1l.o \ + _TBL_logl.o \ + finitel.o \ + isnanl.o \ + #end + +# +# atan2pil.o, ieee_funcl.o, rndintl.o, sinpil.o, sincospil.o +# are for internal use only +# +# LSARC/2003/279 adds the following: +# gammal.o 1 +# gammal_r.o 1 +# j0l.o 2 +# j1l.o 2 +# jnl.o 2 +# lgammal_r.o 1 +# scalbl.o 1 +# significandl.o 1 +# +QOBJS = \ + $(QOBJS_$(MCPU)) \ + __cosl.o \ + __lgammal.o \ + __poly_libmq.o \ + __rem_pio2l.o \ + __sincosl.o \ + __sinl.o \ + __tanl.o \ + _TBL_cosl.o \ + _TBL_ipio2l.o \ + _TBL_sinl.o \ + _TBL_tanl.o \ + acoshl.o \ + acosl.o \ + asinhl.o \ + asinl.o \ + atan2l.o \ + atan2pil.o \ + atanhl.o \ + atanl.o \ + cbrtl.o \ + copysignl.o \ + coshl.o \ + cosl.o \ + erfl.o \ + exp10l.o \ + exp2l.o \ + expl.o \ + expm1l.o \ + fabsl.o \ + floorl.o \ + fmodl.o \ + gammal.o \ + gammal_r.o \ + hypotl.o \ + ieee_funcl.o \ + ilogbl.o \ + j0l.o \ + j1l.o \ + jnl.o \ + lgammal.o \ + lgammal_r.o \ + log10l.o \ + log1pl.o \ + log2l.o \ + logbl.o \ + logl.o \ + nextafterl.o \ + powl.o \ + remainderl.o \ + rintl.o \ + rndintl.o \ + scalbl.o \ + scalbnl.o \ + signgaml.o \ + significandl.o \ + sincosl.o \ + sincospil.o \ + sinhl.o \ + sinl.o \ + sinpil.o \ + sqrtl.o \ + tanhl.o \ + tanl.o \ + #end + +ROBJS_i386 = \ + #end + +# +# LSARC/2003/658 adds isnanf +# +ROBJS_sparc = \ + __cosf.o \ + __sincosf.o \ + __sinf.o \ + isnanf.o \ + #end + +# +# atan2pif.o, sincosf.o, sincospif.o are for internal use only +# +# LSARC/2003/279 adds the following: +# besself.o 6 +# scalbf.o 1 +# gammaf.o 1 +# gammaf_r.o 1 +# lgammaf_r.o 1 +# significandf.o 1 +# +ROBJS = \ + $(ROBJS_$(MCPU)) \ + _TBL_r_atan_.o \ + acosf.o \ + acoshf.o \ + asinf.o \ + asinhf.o \ + atan2f.o \ + atan2pif.o \ + atanf.o \ + atanhf.o \ + besself.o \ + cbrtf.o \ + copysignf.o \ + cosf.o \ + coshf.o \ + erff.o \ + exp10f.o \ + exp2f.o \ + expf.o \ + expm1f.o \ + fabsf.o \ + floorf.o \ + fmodf.o \ + gammaf.o \ + gammaf_r.o \ + hypotf.o \ + ilogbf.o \ + lgammaf.o \ + lgammaf_r.o \ + log10f.o \ + log1pf.o \ + log2f.o \ + logbf.o \ + logf.o \ + nextafterf.o \ + powf.o \ + remainderf.o \ + rintf.o \ + scalbf.o \ + scalbnf.o \ + signgamf.o \ + significandf.o \ + sinf.o \ + sinhf.o \ + sincosf.o \ + sincospif.o \ + sqrtf.o \ + tanf.o \ + tanhf.o \ + #end + +# +# LSARC/2003/658 adds isnanf/isnanl +# +SOBJS_i386 = \ + __reduction.o \ + finitef.o \ + finitel.o \ + isnanf.o \ + isnanl.o \ + #end + +SOBJS_sparc = \ + #end + +SOBJS = \ + $(SOBJS_$(MCPU)) \ + copysign.o \ + exp.o \ + fabs.o \ + fmod.o \ + ilogb.o \ + isnan.o \ + nextafter.o \ + remainder.o \ + rint.o \ + scalbn.o \ + #end + +m9xsseOBJS_i386 = \ + __fex_hdlr.o \ + __fex_i386.o \ + __fex_sse.o \ + __fex_sym.o \ + fex_log.o \ + #end + +m9xsseOBJS_sparc = \ + #end + +m9xsseOBJS = $(m9xsseOBJS_$(MCPU)) + +m9xOBJS_i386 = \ + __fex_sse.o \ + feprec.o \ + #end + +m9xOBJS_sparc = \ + #end + +m9xOBJS = \ + $(m9xOBJS_$(MCPU)) \ + __fex_$(MCPU).o \ + __fex_hdlr.o \ + __fex_sym.o \ + fdim.o \ + fdimf.o \ + fdiml.o \ + feexcept.o \ + fenv.o \ + feround.o \ + fex_handler.o \ + fex_log.o \ + fma.o \ + fmaf.o \ + fmal.o \ + fmax.o \ + fmaxf.o \ + fmaxl.o \ + fmin.o \ + fminf.o \ + fminl.o \ + frexp.o \ + frexpf.o \ + frexpl.o \ + ldexp.o \ + ldexpf.o \ + ldexpl.o \ + llrint.o \ + llrintf.o \ + llrintl.o \ + llround.o \ + llroundf.o \ + llroundl.o \ + lrint.o \ + lrintf.o \ + lrintl.o \ + lround.o \ + lroundf.o \ + lroundl.o \ + modf.o \ + modff.o \ + modfl.o \ + nan.o \ + nanf.o \ + nanl.o \ + nearbyint.o \ + nearbyintf.o \ + nearbyintl.o \ + nexttoward.o \ + nexttowardf.o \ + nexttowardl.o \ + remquo.o \ + remquof.o \ + remquol.o \ + round.o \ + roundf.o \ + roundl.o \ + scalbln.o \ + scalblnf.o \ + scalblnl.o \ + tgamma.o \ + tgammaf.o \ + tgammal.o \ + trunc.o \ + truncf.o \ + truncl.o \ + #end + +complexOBJS = \ + cabs.o \ + cabsf.o \ + cabsl.o \ + cacos.o \ + cacosf.o \ + cacosh.o \ + cacoshf.o \ + cacoshl.o \ + cacosl.o \ + carg.o \ + cargf.o \ + cargl.o \ + casin.o \ + casinf.o \ + casinh.o \ + casinhf.o \ + casinhl.o \ + casinl.o \ + catan.o \ + catanf.o \ + catanh.o \ + catanhf.o \ + catanhl.o \ + catanl.o \ + ccos.o \ + ccosf.o \ + ccosh.o \ + ccoshf.o \ + ccoshl.o \ + ccosl.o \ + cexp.o \ + cexpf.o \ + cexpl.o \ + cimag.o \ + cimagf.o \ + cimagl.o \ + clog.o \ + clogf.o \ + clogl.o \ + conj.o \ + conjf.o \ + conjl.o \ + cpow.o \ + cpowf.o \ + cpowl.o \ + cproj.o \ + cprojf.o \ + cprojl.o \ + creal.o \ + crealf.o \ + creall.o \ + csin.o \ + csinf.o \ + csinh.o \ + csinhf.o \ + csinhl.o \ + csinl.o \ + csqrt.o \ + csqrtf.o \ + csqrtl.o \ + ctan.o \ + ctanf.o \ + ctanh.o \ + ctanhf.o \ + ctanhl.o \ + ctanl.o \ + k_atan2.o \ + k_atan2l.o \ + k_cexp.o \ + k_cexpl.o \ + k_clog_r.o \ + k_clog_rl.o \ + #end + +LIBMV1_SO_OBJS = \ + libmv1.o \ + #end + +LIBM_SO_OBJS = $(COBJS) $(ROBJS) $(QOBJS) $(SOBJS) $(m9xOBJS) $(complexOBJS) + +mvecOBJS = \ + __vTBL_atan1.o \ + __vTBL_atan2.o \ + __vTBL_rsqrt.o \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vatan.o \ + __vatan2.o \ + __vatan2f.o \ + __vatanf.o \ + __vc_abs.o \ + __vc_exp.o \ + __vc_log.o \ + __vc_pow.o \ + __vcos.o \ + __vcosbig.o \ + __vcosbigf.o \ + __vcosf.o \ + __vexp.o \ + __vexpf.o \ + __vhypot.o \ + __vhypotf.o \ + __vlog.o \ + __vlogf.o \ + __vpow.o \ + __vpowf.o \ + __vrem_pio2m.o \ + __vrhypot.o \ + __vrhypotf.o \ + __vrsqrt.o \ + __vrsqrtf.o \ + __vsin.o \ + __vsinbig.o \ + __vsinbigf.o \ + __vsincos.o \ + __vsincosbig.o \ + __vsincosbigf.o \ + __vsincosf.o \ + __vsinf.o \ + __vsqrt.o \ + __vsqrtf.o \ + __vz_abs.o \ + __vz_exp.o \ + __vz_log.o \ + __vz_pow.o \ + vatan2_.o \ + vatan2f_.o \ + vatan_.o \ + vatanf_.o \ + vc_abs_.o \ + vc_exp_.o \ + vc_log_.o \ + vc_pow_.o \ + vcos_.o \ + vcosf_.o \ + vexp_.o \ + vexpf_.o \ + vhypot_.o \ + vhypotf_.o \ + vlog_.o \ + vlogf_.o \ + vpow_.o \ + vpowf_.o \ + vrhypot_.o \ + vrhypotf_.o \ + vrsqrt_.o \ + vrsqrtf_.o \ + vsin_.o \ + vsincos_.o \ + vsincosf_.o \ + vsinf_.o \ + vsqrt_.o \ + vsqrtf_.o \ + vz_abs_.o \ + vz_exp_.o \ + vz_log_.o \ + vz_pow_.o \ + #end + +mvecaCOBJS = \ + __vTBL_atan1.o \ + __vTBL_atan2.o \ + __vTBL_rsqrt.o \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vcosbig.o \ + __vcosbigf.o \ + __vrem_pio2m.o \ + __vsinbig.o \ + __vsinbigf.o \ + __vsincosbig.o \ + __vsincosbigf.o \ + #end + +mvecaSOBJS = \ + __vatan.o \ + __vatan2.o \ + __vatan2f.o \ + __vatanf.o \ + __vcos.o \ + __vcosf.o \ + __vexp.o \ + __vexpf.o \ + __vhypot.o \ + __vhypotf.o \ + __vlog.o \ + __vlogf.o \ + __vpow.o \ + __vpowf.o \ + __vrhypot.o \ + __vrhypotf.o \ + __vrsqrt.o \ + __vrsqrtf.o \ + __vsin.o \ + __vsincos.o \ + __vsincosf.o \ + __vsinf.o \ + __vsqrt.o \ + __vsqrtf.o \ + #end + +mvecbCOBJS = \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vcosbig.o \ + __vcosbig_ultra3.o \ + __vrem_pio2m.o \ + __vsinbig.o \ + __vsinbig_ultra3.o \ + #end + +mvecbSOBJS = \ + __vcos_ultra3.o \ + __vlog_ultra3.o \ + __vsin_ultra3.o \ + __vsqrtf_ultra3.o \ + #end + +LIBMVEC_SO_OBJS = $(mvecOBJS) +LIBMVEC_VnA_OBJS = $(mvecaCOBJS) $(mvecaSOBJS) +LIBMVEC_VnB_OBJS = $(mvecbCOBJS) $(mvecbSOBJS) +LIBMVEC_IA1_OBJS = $(mvecOBJS) + +# +# Header files under $(SRC) +# + +CHDR = \ + libm.h \ + libm_macros.h \ + libm_protos.h \ + libm_synonyms.h \ + libm_thread.h \ + xpg6.h \ + #end + +QHDR = longdouble.h + +complexHDR = complex_wrapper.h + +m9xHDR = \ + fenv_synonyms.h \ + fex_handler.h \ + fma.h \ + regset.h \ + #end + +C_HDR = $(CHDR:%=$(SRC)/C/%) +Q_HDR = $(QHDR:%=$(SRC)/$(LDBLDIR)/%) +complex_HDR = $(complexHDR:%=$(SRC)/complex/%) +m9x_HDR = $(m9xHDR:%=$(SRC)/m9x/%) +HDRS = $(C_HDR) $(Q_HDR) $(complex_HDR) $(m9x_HDR) + +# +# Header files above and beyond +# + +_HEADERS = \ + complex.h \ + fenv.h \ + floatingpoint.h \ + iso/math_c99.h \ + iso/math_iso.h \ + math.h \ + sys/ieeefp.h \ + tgmath.h \ + #end + +HEADERS = $(_HEADERS:%=../inc/%) + +INCS = $(HEADERS) + +# +# Objects +# +LIBMV1_S = $(CG)/libm.so.1 +LIBM_P = +LIBM_S = $(CG)/libm.so.2 +LIBMS = $(LIBMV1_S) $(LIBM_S) + +LIBMVEC_S = $(CG)/libmvec.so.1 +LIBMVEC_VnA = $(VnA)/libmvec_isa.so.1 +LIBMVEC_VnB = $(VnB)/libmvec_isa.so.1 +LIBMVEC_IA1 = $(IA1)/libmvec_hwcap1.so.1 +LIBMVECS_sparc = $(LIBMVEC_S) $(LIBMVEC_VnA) $(LIBMVEC_VnB) +LIBMVECS_i386 = $(LIBMVEC_S) $(LIBMVEC_IA1) +LIBMVECS = $(LIBMVECS_$(MCPU)) + +LIBS = $(LIBMS) $(LIBMVECS) + +$(LIBM_P) := VARIANT = $(CG)/mobj_p +$(LIBMV1_S) := VARIANT = $(CG)/mobj_s +$(LIBM_S) := VARIANT = $(CG)/mobj_s + +$(LIBMVEC_S) := VARIANT = $(CG)/mvobj_s +$(LIBMVEC_VnA) := VARIANT = $(VnA)/mvobj_s +$(LIBMVEC_VnB) := VARIANT = $(VnB)/mvobj_s +$(LIBMVEC_IA1) := VARIANT = $(IA1)/mvobj_s + +OBJS_LIBMV1_SO = $(LIBMV1_SO_OBJS:%=$(VARIANT)/%) +OBJS_LIBM_SO = $(LIBM_SO_OBJS:%=$(VARIANT)/%) + +OBJS_LIBMVEC_SO = $(LIBMVEC_SO_OBJS:%=$(VARIANT)/%) +OBJS_LIBMVEC_VnA = $(LIBMVEC_VnA_OBJS:%=$(VARIANT)/%) +OBJS_LIBMVEC_VnB = $(LIBMVEC_VnB_OBJS:%=$(VARIANT)/%) +OBJS_LIBMVEC_IA1 = $(LIBMVEC_IA1_OBJS:%=$(VARIANT)/%) + +OBJS_M9XSSE = $(m9xsseOBJS:%=$(CG)/mobj_s/%) + +OBJS_LIB = \ + $(LIBM_SO_OBJS:%=$(CG)/mobj_p/%) \ + $(LIBM_SO_OBJS:%=$(CG)/mobj_s/%) \ + $(LIBMVEC_SO_OBJS:%=$(CG)/mvobj_s/%) \ + $(LIBMVEC_VnA_OBJS:%=$(VnA)/mvobj_s/%) \ + $(LIBMVEC_VnB_OBJS:%=$(VnB)/mvobj_s/%) \ + $(LIBMVEC_IA1_OBJS:%=$(IA1)/mvobj_s/%) \ + #end + +%.o := FPDEF_sparc += -DFPADD_TRAPS_INCOMPLETE_ON_NAN +%.o := CDEF_sparc += -DFPADD_TRAPS_INCOMPLETE_ON_NAN +%.o := CDEF_sparc += -DFDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE + +%.o := CDEF_i386 += -DCOMPARISON_MACRO_BUG + +$(CG)/mobj%.o := ASDEF += -DLIBM_BUILD +$(CG)/mobj%.o := CDEF += -DLIBM_BUILD +$(CG)/mobj%.o := COPT_sparc += -dalign + +$(CG)/mobj_p/%.o := ASDEF += -DPROF +$(CG)/mobj_p/%.o := CDEF += -DPROF +$(CG)/mobj_p/%.o := COPT += -p + +# +# __vatan.c, __vatan2.c, __vpow.c ,__vexp.c reference fabs; +# we need acomp to treat fabs as an intrinsic. +# +$(CG)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD +$(CG)/mv%.o := COPT += -W0,-xintrinsic +$(CG)/mv%.o := CDEF_i386 += -Dfabs=__fabs + +$(VnA)/mv%.o := CHIP = vis +$(VnA)/mv%.o := XARCH = v8plusa +$(VnA)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD + +$(VnB)/mv%.o := CHIP = vis +$(VnB)/mv%.o := XARCH = v8plusb +$(VnB)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD + +$(IA1)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD +$(IA1)/mv%.o := CDEF_i386 += -Dfabs=__fabs +$(IA1)/mv%.o := COPT_i386 += -xarch=sse2 + +$(OBJS_M9XSSE) := COPT_i386 = -xarch=sse2 + +dryrun := DR_BGN = -@echo ' +dryrun := DR_END = ' +dryrun := DR_NBGN = true || echo ' +dryrun := DR_NEND = ' + +CG_DIRS_i386 = mobj_p mobj_s mvobj_s misc +CG_DIRS_sparc = mobj_p mobj_s mvobj_s misc +CG_DIRS = $(CG_DIRS_$(MCPU)) + +DIRS_CG = \ + $(CG_DIRS:%=$(CG)/%) \ + $(ISA_DIRS:%=%/mvobj_s) \ + #end + +INST_HDIR = iso sys +INST_DIRS_sparc = $(ISA_DIRS:%=cpu/%) +INST_DIRS_i386 = $(ISA_DIRS) +INST_DIRS = . $(INST_DIRS_$(MCPU)) +DIRS_INST = $(INST_HDIR:%=$(USR)/include/%) $(INST_DIRS:%=$(ROOTLIB)/%) + +LIBS_INST_i386 = \ + $(LIBMS:$(CG)/%=$(ROOTLIB)/%) \ + $(LIBMVEC_S:$(CG)/%=$(ROOTLIB)/%) \ + $(LIBMVEC_IA1:%=$(ROOTLIB)/%) \ + #end +LIBS_INST_sparc = \ + $(LIBMS:$(CG)/%=$(ROOTLIB)/%) \ + $(LIBMVEC_S:$(CG)/%=$(ROOTLIB)/%) \ + $(LIBMVEC_VnA:$(VnA)/%=$(ROOTLIB)/cpu/$(VnA)/%) \ + $(LIBMVEC_VnB:$(VnB)/%=$(ROOTLIB)/cpu/$(VnB)/%) \ + #end +LIBS_INST = $(LIBS_INST_$(MCPU)) + +INST_FILES = $(LLIB_LM:%.ln=%) $(LLIB_LM) +FILES_INST = $(_HEADERS:%=$(USR)/include/%) $(INST_FILES:%=$(ROOTLIB)/%) + +# +# libm build rules +# + +.INIT: $(DIRS_CG) $(MAPFILES) + +.NO_PARALLEL: $(DIRS_CG) $(DIRS_INST) $(LIBS_INST) + +.PARALLEL: $(OBJS_LIB) + +.DONE: + -@echo $(CG) DONE + +all: $(DIRS_CG) .WAIT $(INCS) $(HDRS) $(M_LN) $(LIBS) + +dryrun: all + +install: all .WAIT $(DIRS_INST) .WAIT $(FILES_INST) $(LIBS_INST) + +clean: FRC + -$(RM) -r $(CG)/mobj_p $(CG)/mobj_s $(CG)/mvobj_s \ + $(ISA_DIRS:%=%/mvobj_s) $(LM_IL) + + +clobber: clean + -$(RM) -r $(CG) $(ISA_DIRS) + +$(CG)/misc/%.ln: % + $(LINT) -nvx $(LINTFLAGS) $(CPPFLAGS) -om$( $(CG)/$(@F).$(ASSUFFIX); \ + $(CPP_CMD) $(CPPFLAGS) $(CG)/$(@F).$(ASSUFFIX) | \ + sed -e 's/[ ]*$$//' -e '/^#/d' -e '/^$$/d' > $@; \ + $(RM) $(CG)/$(@F).$(ASSUFFIX) + -@echo $@ created with `wc -w < $@` words + +$(LIBM_P) $(LIBM_S): $(MAPFMV2) $$(OBJS_LIBM_SO) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMV2) \'; \ + echo -o '$(@:$(CG)/%=../%) \'; \ + echo $(OBJS_LIBM_SO:$(VARIANT)/%=%) | $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$, $(LDEND),'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMV2) \ + -o $(@:$(CG)/%=../%) \ + $(OBJS_LIBM_SO:$(VARIANT)/%=%) $(LDEND); \ + $(MCS) -d $(@:$(CG)/%=../%); $(STRIP) -x $(@:$(CG)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMV1_S): $(MAPFMV1) $$(OBJS_LIBMV1_SO) + $(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) -M $${_d}/$(MAPFMV1) \ + -o $(@:$(CG)/%=../%) -F libm.so.2 $(OBJS_LIBMV1_SO:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(CG)/%=../%); $(STRIP) -x $(@:$(CG)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_S): $(MAPFMVEC) $$(OBJS_LIBMVEC_SO) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMVEC) \'; \ + echo $(FLTRTYPE) '$(FLTRPATH) \'; \ + echo -o '$(@:$(CG)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_SO:$(VARIANT)/%=%) | $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMVEC) \ + $(FLTRTYPE) '$(FLTRPATH)' \ + -o $(@:$(CG)/%=../%) \ + $(OBJS_LIBMVEC_SO:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(CG)/%=../%); $(STRIP) -x $(@:$(CG)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_VnA): $(MAPFMVECA) $$(OBJS_LIBMVEC_VnA) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo '-z endfiltee \'; \ + echo -M $${_d}/'$(MAPFMVECA) \'; \ + echo -o '$(@:$(VnA)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_VnA:$(VARIANT)/%=%) | \ + $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(VnA)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -z endfiltee \ + -M $${_d}/$(MAPFMVECA) \ + -o $(@:$(VnA)/%=../%) \ + $(OBJS_LIBMVEC_VnA:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(VnA)/%=../%); $(STRIP) -x $(@:$(VnA)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(VnA)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_VnB): $(MAPFMVECB) $$(OBJS_LIBMVEC_VnB) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMVECB) \'; \ + echo -o '$(@:$(VnB)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_VnB:$(VARIANT)/%=%) | \ + $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(VnB)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMVECB) \ + -o $(@:$(VnB)/%=../%) \ + $(OBJS_LIBMVEC_VnB:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(VnB)/%=../%); $(STRIP) -x $(@:$(VnB)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(VnB)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_IA1): $(MAPFMVEC) $(MAPFMVEC1) $$(OBJS_LIBMVEC_IA1) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMVEC1) \'; \ + echo -M $${_d}/'$(MAPFMVEC) \'; \ + echo -o '$(@:$(IA1)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_IA1:$(VARIANT)/%=%) | \ + $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(IA1)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMVEC1) \ + -M $${_d}/$(MAPFMVEC) \ + -o $(@:$(IA1)/%=../%) \ + $(OBJS_LIBMVEC_IA1:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(IA1)/%=../%); $(STRIP) -x $(@:$(IA1)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(IA1)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$$(VARIANT)/%.o: $(SRC)/mvec/$$(CHIP)/%.S + $(DR_BGN)$(CMD.S)$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/$(MDIR)/common/%.$(ASSUFFIX) $(C_HDR) + $(DR_BGN)$(CMD.S)$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/mvec/%.c $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/C/%.c $(C_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/$(LDBLDIR)/%.c $(C_HDR) $(Q_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/R/%.c $(C_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/complex/%.c $(C_HDR) $(complex_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/m9x/%.c $(C_HDR) $(m9x_HDR) $(HEADERS) $(m9x_IL) $(LM_IL) + $(DR_BGN)$(COMPILE.c) $(m9x_IL) -o $@ $<$(DR_END) + +$(DIRS_CG): + $(INSTALL) -d -m 0775 $@ + +$(DIRS_INST): + $(INSTALL) -d $(OWNED_BY) $@ + +$(USR)/include/%.h: ../inc/%.h + $(INST_CMD) + +$(ROOTLIB)/libm.so.%: $(CG)/libm.so.% + $(INST_EXEC_CMD) + @if [ $< = $(LIBM_S) ]; then \ + (cd $(@D); $(RM) libm.so; ln -s $(@F) libm.so) \ + fi + +$(ROOTLIB)/libmvec.so.%: $(CG)/libmvec.so.% + $(INST_EXEC_CMD) + @cd $(@D); $(RM) libmvec.so; ln -s $(@F) libmvec.so + +$(ROOTLIB)/cpu/%.so.1: %.so.1 + $(INST_EXEC_CMD) + +$(ROOTLIB)/$(IA1)/%.so.1: $(IA1)/%.so.1 + $(INST_EXEC_CMD) + +$(ROOTLIB)/%-lm: %-lm + $(INST_CMD) + +$(ROOTLIB)/%:: $(CG)/misc/% + $(INST_CMD) + +FRC: diff --git a/usr/src/libm/wos/llib-lm b/usr/src/libm/wos/llib-lm new file mode 100644 index 0000000..1b015f5 --- /dev/null +++ b/usr/src/libm/wos/llib-lm @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llib-lm 1.8 05/10/08 SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#undef __PRAGMA_REDEFINE_EXTNAME +#include +#if defined(_STDC_C99) +#undef isnan +extern int isnan(double); +extern int isnand(double); /* LSARC/2003/670 */ +typedef union _h_val { + unsigned long _i[2]; + double _d; +} _h_val; +extern const _h_val __huge_val; +#endif +#include +#include +#undef clog +extern double complex clog(double complex); diff --git a/usr/src/libm/wos/mapfiles/libmv1-common b/usr/src/libm/wos/mapfiles/libmv1-common new file mode 100644 index 0000000..d0b35d2 --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmv1-common @@ -0,0 +1,205 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmv1-common 1.4 06/01/31 SMI +# +# Interface definition for libm.so.1 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +SUNWprivate_1.2 { + global: + __libm_errno; # SC3.0.1 -lmopt +} SUNWprivate_1.1; + +SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/libm/wos/mapfiles/libmv2-i386 b/usr/src/libm/wos/mapfiles/libmv2-i386 new file mode 100644 index 0000000..3eead9d --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmv2-i386 @@ -0,0 +1,692 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmv2-i386 1.8 06/01/31 SMI +# +# Interface definition for libm.so.2 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +SUNW_1.3 { + global: + __isnanf; #LSARC/2003/658 + __isnanl; #LSARC/2003/658 + _isnan; #LSARC/2003/658 + _isnand; #LSARC/2003/658 + _isnanf; #LSARC/2003/658 + _logb; #LSARC/2003/658 + _modf; #LSARC/2003/658 + _modff; #LSARC/2003/658 + _nextafter; #LSARC/2003/658 + _scalb; #LSARC/2003/658 + isnand; #LSARC/2003/658 + isnanf; #LSARC/2003/658 + isnanl; #LSARC/2003/658 +} SUNW_1.2; + +SUNW_1.2 { + global: + __acoshf; + __acoshl; + __asinhf; + __asinhl; + __atanhf; + __atanhl; + __cabs; + __cabsf; + __cabsl; + __cacos; + __cacosf; + __cacosh; + __cacoshf; + __cacoshl; + __cacosl; + __carg; + __cargf; + __cargl; + __casin; + __casinf; + __casinh; + __casinhf; + __casinhl; + __casinl; + __catan; + __catanf; + __catanh; + __catanhf; + __catanhl; + __catanl; + __cbrtf; + __cbrtl; + __ccos; + __ccosf; + __ccosh; + __ccoshf; + __ccoshl; + __ccosl; + __cexp; + __cexpf; + __cexpl; + __cimag; + __cimagf; + __cimagl; + __clog; + __clogf; + __clogl; + __conj; + __conjf; + __conjl; + __copysignf; + __copysignl; + __cpow; + __cpowf; + __cpowl; + __cproj; + __cprojf; + __cprojl; + __creal; + __crealf; + __creall; + __csin; + __csinf; + __csinh; + __csinhf; + __csinhl; + __csinl; + __csqrt; + __csqrtf; + __csqrtl; + __ctan; + __ctanf; + __ctanh; + __ctanhf; + __ctanhl; + __ctanl; + __erfcf; + __erfcl; + __erff; + __erfl; + __exp2; + __exp2f; + __exp2l; + __expm1f; + __expm1l; + __fdim; + __fdimf; + __fdiml; + __feclearexcept; + __fegetenv; + __fegetexceptflag; + __fegetprec; #LSARC/1996/175 + __fegetround; + __feholdexcept; + __fenv_dfl_env; #LSARC/1996/175 + __feraiseexcept; + __fesetenv; + __fesetexceptflag; + __fesetprec; #LSARC/1996/175 + __fesetround; + __fetestexcept; + __feupdateenv; + __fex_get_handling; #LSARC/1996/175 + __fex_get_log; #LSARC/1996/175 + __fex_get_log_depth; #LSARC/1996/175 + __fex_getexcepthandler; #LSARC/1996/175 + __fex_log_entry; #LSARC/1996/175 + __fex_merge_flags; #LSARC/1996/175 + __fex_set_handling; #LSARC/1996/175 + __fex_set_log; #LSARC/1996/175 + __fex_set_log_depth; #LSARC/1996/175 + __fex_setexcepthandler; #LSARC/1996/175 + __fma; + __fmaf; + __fmal; + __fmax; + __fmaxf; + __fmaxl; + __fmin; + __fminf; + __fminl; + __frexp; + __gammaf; #LSARC/2003/279 + __gammaf_r; #LSARC/2003/279 + __gammal; #LSARC/2003/279 + __gammal_r; #LSARC/2003/279 + __hypotf; + __hypotl; + __ilogbf; + __ilogbl; + __j0f; #LSARC/2003/279 + __j0l; #LSARC/2003/279 + __j1f; #LSARC/2003/279 + __j1l; #LSARC/2003/279 + __jnf; #LSARC/2003/279 + __jnl; #LSARC/2003/279 + __ldexp; + __lgammaf; + __lgammaf_r; #LSARC/2003/279 + __lgammal; + __lgammal_r; #LSARC/2003/279 + __llrint; + __llrintf; + __llrintl; + __llround; + __llroundf; + __llroundl; + __log1pf; + __log1pl; + __log2; + __log2f; + __log2l; + __logbf; + __logbl; + __lrint; + __lrintf; + __lrintl; + __lround; + __lroundf; + __lroundl; + __modf; + __nan; + __nanf; + __nanl; + __nearbyint; + __nearbyintf; + __nearbyintl; + __nextafterf; + __nextafterl; + __nexttoward; + __nexttowardf; + __nexttowardl; + __remainderf; + __remainderl; + __remquo; + __remquof; + __remquol; + __rintf; + __rintl; + __round; + __roundf; + __roundl; + __scalbf; #LSARC/2003/279 + __scalbl; #LSARC/2003/279 + __scalbln; + __scalblnf; + __scalblnl; + __scalbnf; + __scalbnl; + __signgamf; #LSARC/2003/279 + __signgaml; #LSARC/2003/279 + __significandf; #LSARC/2003/279 + __significandl; #LSARC/2003/279 + __sincos; #LSARC/2003/279 + __sincosf; #LSARC/2003/279 + __sincosl; #LSARC/2003/279 + __tgamma; + __tgammaf; + __tgammal; + __trunc; + __truncf; + __truncl; + __xpg6; #private contract with libc group + __y0f; #LSARC/2003/279 + __y0l; #LSARC/2003/279 + __y1f; #LSARC/2003/279 + __y1l; #LSARC/2003/279 + __ynf; #LSARC/2003/279 + __ynl; #LSARC/2003/279 + acosf; + acoshf; + acoshl; + acosl; + asinf; + asinhf; + asinhl; + asinl; + atan2f; + atan2l; + atanf; + atanhf; + atanhl; + atanl; + cabs; + cabsf; + cabsl; + cacos; + cacosf; + cacosh; + cacoshf; + cacoshl; + cacosl; + carg; + cargf; + cargl; + casin; + casinf; + casinh; + casinhf; + casinhl; + casinl; + catan; + catanf; + catanh; + catanhf; + catanhl; + catanl; + cbrtf; + cbrtl; + ccos; + ccosf; + ccosh; + ccoshf; + ccoshl; + ccosl; + ceilf; + ceill; + cexp; + cexpf; + cexpl; + cimag; + cimagf; + cimagl; + clog; + clogf; + clogl; + conj; + conjf; + conjl; + copysignf; + copysignl; + cosf; + coshf; + coshl; + cosl; + cpow; + cpowf; + cpowl; + cproj; + cprojf; + cprojl; + creal; + crealf; + creall; + csin; + csinf; + csinh; + csinhf; + csinhl; + csinl; + csqrt; + csqrtf; + csqrtl; + ctan; + ctanf; + ctanh; + ctanhf; + ctanhl; + ctanl; + erfcf; + erfcl; + erff; + erfl; + exp2; + exp2f; + exp2l; + expf; + expl; + expm1f; + expm1l; + fabsf; + fabsl; + fdim; + fdimf; + fdiml; + feclearexcept; + fegetenv; + fegetexceptflag; + fegetprec; #LSARC/1996/175 + fegetround; + feholdexcept; + feraiseexcept; + fesetenv; + fesetexceptflag; + fesetprec; #LSARC/1996/175 + fesetround; + fetestexcept; + feupdateenv; + fex_get_handling; #LSARC/1996/175 + fex_get_log; #LSARC/1996/175 + fex_get_log_depth; #LSARC/1996/175 + fex_getexcepthandler; #LSARC/1996/175 + fex_log_entry; #LSARC/1996/175 + fex_merge_flags; #LSARC/1996/175 + fex_set_handling; #LSARC/1996/175 + fex_set_log; #LSARC/1996/175 + fex_set_log_depth; #LSARC/1996/175 + fex_setexcepthandler; #LSARC/1996/175 + floorf; + floorl; + fma; + fmaf; + fmal; + fmax; + fmaxf; + fmaxl; + fmin; + fminf; + fminl; + fmodf; + fmodl; + frexp; + frexpf; + frexpl; + gammaf; #LSARC/2003/279 + gammaf_r; #LSARC/2003/279 + gammal; #LSARC/2003/279 + gammal_r; #LSARC/2003/279 + hypotf; + hypotl; + ilogbf; + ilogbl; + j0f; #LSARC/2003/279 + j0l; #LSARC/2003/279 + j1f; #LSARC/2003/279 + j1l; #LSARC/2003/279 + jnf; #LSARC/2003/279 + jnl; #LSARC/2003/279 + ldexp; + ldexpf; + ldexpl; + lgammaf; + lgammaf_r; #LSARC/2003/279 + lgammal; + lgammal_r; #LSARC/2003/279 + llrint; + llrintf; + llrintl; + llround; + llroundf; + llroundl; + log10f; + log10l; + log1pf; + log1pl; + log2; + log2f; + log2l; + logbf; + logbl; + logf; + logl; + lrint; + lrintf; + lrintl; + lround; + lroundf; + lroundl; + modf; + modff; + modfl; + nan; + nanf; + nanl; + nearbyint; + nearbyintf; + nearbyintl; + nextafterf; + nextafterl; + nexttoward; + nexttowardf; + nexttowardl; + powf; + powl; + remainderf; + remainderl; + remquo; + remquof; + remquol; + rintf; + rintl; + round; + roundf; + roundl; + scalbf; #LSARC/2003/279 + scalbl; #LSARC/2003/279 + scalbln; + scalblnf; + scalblnl; + scalbnf; + scalbnl; + signgamf; #LSARC/2003/279 + signgaml; #LSARC/2003/279 + significandf; #LSARC/2003/279 + significandl; #LSARC/2003/279 + sincos; #LSARC/2003/279 + sincosf; #LSARC/2003/279 + sincosl; #LSARC/2003/279 + sinf; + sinhf; + sinhl; + sinl; + sqrtf; + sqrtl; + tanf; + tanhf; + tanhl; + tanl; + tgamma; + tgammaf; + tgammal; + trunc; + truncf; + truncl; + y0f; #LSARC/2003/279 + y0l; #LSARC/2003/279 + y1f; #LSARC/2003/279 + y1l; #LSARC/2003/279 + ynf; #LSARC/2003/279 + ynl; #LSARC/2003/279 +} SUNW_1.1.1; + +SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +SUNWprivate_1.3 { + global: + __libm_mt_fex_sync; # -lmtsk + __mt_fex_sync; # -lmtsk +} SUNWprivate_1.2; + +SUNWprivate_1.2 { + global: + __libm_errno; # SC3.0.1 -lmopt +} SUNWprivate_1.1; + +SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/libm/wos/mapfiles/libmv2-sparc b/usr/src/libm/wos/mapfiles/libmv2-sparc new file mode 100644 index 0000000..ffc86bb --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmv2-sparc @@ -0,0 +1,688 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmv2-sparc 1.9 06/01/31 SMI +# +# Interface definition for libm.so.2 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +SUNW_1.3 { + global: + __isnanf; #LSARC/2003/658 + __isnanl; #LSARC/2003/658 + _isnan; #LSARC/2003/658 + _isnand; #LSARC/2003/658 + _isnanf; #LSARC/2003/658 + _logb; #LSARC/2003/658 + _modf; #LSARC/2003/658 + _modff; #LSARC/2003/658 + _nextafter; #LSARC/2003/658 + _scalb; #LSARC/2003/658 + isnand; #LSARC/2003/658 + isnanf; #LSARC/2003/658 + isnanl; #LSARC/2003/658 +} SUNW_1.2; + +SUNW_1.2 { + global: + __acoshf; + __acoshl; + __asinhf; + __asinhl; + __atanhf; + __atanhl; + __cabs; + __cabsf; + __cabsl; + __cacos; + __cacosf; + __cacosh; + __cacoshf; + __cacoshl; + __cacosl; + __carg; + __cargf; + __cargl; + __casin; + __casinf; + __casinh; + __casinhf; + __casinhl; + __casinl; + __catan; + __catanf; + __catanh; + __catanhf; + __catanhl; + __catanl; + __cbrtf; + __cbrtl; + __ccos; + __ccosf; + __ccosh; + __ccoshf; + __ccoshl; + __ccosl; + __cexp; + __cexpf; + __cexpl; + __cimag; + __cimagf; + __cimagl; + __clog; + __clogf; + __clogl; + __conj; + __conjf; + __conjl; + __copysignf; + __copysignl; + __cpow; + __cpowf; + __cpowl; + __cproj; + __cprojf; + __cprojl; + __creal; + __crealf; + __creall; + __csin; + __csinf; + __csinh; + __csinhf; + __csinhl; + __csinl; + __csqrt; + __csqrtf; + __csqrtl; + __ctan; + __ctanf; + __ctanh; + __ctanhf; + __ctanhl; + __ctanl; + __erfcf; + __erfcl; + __erff; + __erfl; + __exp2; + __exp2f; + __exp2l; + __expm1f; + __expm1l; + __fdim; + __fdimf; + __fdiml; + __feclearexcept; + __fegetenv; + __fegetexceptflag; + __fegetround; + __feholdexcept; + __fenv_dfl_env; #LSARC/1996/175 + __feraiseexcept; + __fesetenv; + __fesetexceptflag; + __fesetround; + __fetestexcept; + __feupdateenv; + __fex_get_handling; #LSARC/1996/175 + __fex_get_log; #LSARC/1996/175 + __fex_get_log_depth; #LSARC/1996/175 + __fex_getexcepthandler; #LSARC/1996/175 + __fex_log_entry; #LSARC/1996/175 + __fex_merge_flags; #LSARC/1996/175 + __fex_set_handling; #LSARC/1996/175 + __fex_set_log; #LSARC/1996/175 + __fex_set_log_depth; #LSARC/1996/175 + __fex_setexcepthandler; #LSARC/1996/175 + __fma; + __fmaf; + __fmal; + __fmax; + __fmaxf; + __fmaxl; + __fmin; + __fminf; + __fminl; + __frexp; + __gammaf; #LSARC/2003/279 + __gammaf_r; #LSARC/2003/279 + __gammal; #LSARC/2003/279 + __gammal_r; #LSARC/2003/279 + __hypotf; + __hypotl; + __ilogbf; + __ilogbl; + __j0f; #LSARC/2003/279 + __j0l; #LSARC/2003/279 + __j1f; #LSARC/2003/279 + __j1l; #LSARC/2003/279 + __jnf; #LSARC/2003/279 + __jnl; #LSARC/2003/279 + __ldexp; + __lgammaf; + __lgammaf_r; #LSARC/2003/279 + __lgammal; + __lgammal_r; #LSARC/2003/279 + __llrint; + __llrintf; + __llrintl; + __llround; + __llroundf; + __llroundl; + __log1pf; + __log1pl; + __log2; + __log2f; + __log2l; + __logbf; + __logbl; + __lrint; + __lrintf; + __lrintl; + __lround; + __lroundf; + __lroundl; + __modf; + __nan; + __nanf; + __nanl; + __nearbyint; + __nearbyintf; + __nearbyintl; + __nextafterf; + __nextafterl; + __nexttoward; + __nexttowardf; + __nexttowardl; + __remainderf; + __remainderl; + __remquo; + __remquof; + __remquol; + __rintf; + __rintl; + __round; + __roundf; + __roundl; + __scalbf; #LSARC/2003/279 + __scalbl; #LSARC/2003/279 + __scalbln; + __scalblnf; + __scalblnl; + __scalbnf; + __scalbnl; + __signgamf; #LSARC/2003/279 + __signgaml; #LSARC/2003/279 + __significandf; #LSARC/2003/279 + __significandl; #LSARC/2003/279 + __sincos; #LSARC/2003/279 + __sincosf; #LSARC/2003/279 + __sincosl; #LSARC/2003/279 + __tgamma; + __tgammaf; + __tgammal; + __trunc; + __truncf; + __truncl; + __xpg6; #private contract with libc group + __y0f; #LSARC/2003/279 + __y0l; #LSARC/2003/279 + __y1f; #LSARC/2003/279 + __y1l; #LSARC/2003/279 + __ynf; #LSARC/2003/279 + __ynl; #LSARC/2003/279 + acosf; + acoshf; + acoshl; + acosl; + asinf; + asinhf; + asinhl; + asinl; + atan2f; + atan2l; + atanf; + atanhf; + atanhl; + atanl; + cabs; + cabsf; + cabsl; + cacos; + cacosf; + cacosh; + cacoshf; + cacoshl; + cacosl; + carg; + cargf; + cargl; + casin; + casinf; + casinh; + casinhf; + casinhl; + casinl; + catan; + catanf; + catanh; + catanhf; + catanhl; + catanl; + cbrtf; + cbrtl; + ccos; + ccosf; + ccosh; + ccoshf; + ccoshl; + ccosl; + ceilf; + ceill; + cexp; + cexpf; + cexpl; + cimag; + cimagf; + cimagl; + clog; + clogf; + clogl; + conj; + conjf; + conjl; + copysignf; + copysignl; + cosf; + coshf; + coshl; + cosl; + cpow; + cpowf; + cpowl; + cproj; + cprojf; + cprojl; + creal; + crealf; + creall; + csin; + csinf; + csinh; + csinhf; + csinhl; + csinl; + csqrt; + csqrtf; + csqrtl; + ctan; + ctanf; + ctanh; + ctanhf; + ctanhl; + ctanl; + erfcf; + erfcl; + erff; + erfl; + exp2; + exp2f; + exp2l; + expf; + expl; + expm1f; + expm1l; + fabsf; + fabsl; + fdim; + fdimf; + fdiml; + feclearexcept; + fegetenv; + fegetexceptflag; + fegetround; + feholdexcept; + feraiseexcept; + fesetenv; + fesetexceptflag; + fesetround; + fetestexcept; + feupdateenv; + fex_get_handling; #LSARC/1996/175 + fex_get_log; #LSARC/1996/175 + fex_get_log_depth; #LSARC/1996/175 + fex_getexcepthandler; #LSARC/1996/175 + fex_log_entry; #LSARC/1996/175 + fex_merge_flags; #LSARC/1996/175 + fex_set_handling; #LSARC/1996/175 + fex_set_log; #LSARC/1996/175 + fex_set_log_depth; #LSARC/1996/175 + fex_setexcepthandler; #LSARC/1996/175 + floorf; + floorl; + fma; + fmaf; + fmal; + fmax; + fmaxf; + fmaxl; + fmin; + fminf; + fminl; + fmodf; + fmodl; + frexp; + frexpf; + frexpl; + gammaf; #LSARC/2003/279 + gammaf_r; #LSARC/2003/279 + gammal; #LSARC/2003/279 + gammal_r; #LSARC/2003/279 + hypotf; + hypotl; + ilogbf; + ilogbl; + j0f; #LSARC/2003/279 + j0l; #LSARC/2003/279 + j1f; #LSARC/2003/279 + j1l; #LSARC/2003/279 + jnf; #LSARC/2003/279 + jnl; #LSARC/2003/279 + ldexp; + ldexpf; + ldexpl; + lgammaf; + lgammaf_r; #LSARC/2003/279 + lgammal; + lgammal_r; #LSARC/2003/279 + llrint; + llrintf; + llrintl; + llround; + llroundf; + llroundl; + log10f; + log10l; + log1pf; + log1pl; + log2; + log2f; + log2l; + logbf; + logbl; + logf; + logl; + lrint; + lrintf; + lrintl; + lround; + lroundf; + lroundl; + modf; + modff; + modfl; + nan; + nanf; + nanl; + nearbyint; + nearbyintf; + nearbyintl; + nextafterf; + nextafterl; + nexttoward; + nexttowardf; + nexttowardl; + powf; + powl; + remainderf; + remainderl; + remquo; + remquof; + remquol; + rintf; + rintl; + round; + roundf; + roundl; + scalbf; #LSARC/2003/279 + scalbl; #LSARC/2003/279 + scalbln; + scalblnf; + scalblnl; + scalbnf; + scalbnl; + signgamf; #LSARC/2003/279 + signgaml; #LSARC/2003/279 + significandf; #LSARC/2003/279 + significandl; #LSARC/2003/279 + sincos; #LSARC/2003/279 + sincosf; #LSARC/2003/279 + sincosl; #LSARC/2003/279 + sinf; + sinhf; + sinhl; + sinl; + sqrtf; + sqrtl; + tanf; + tanhf; + tanhl; + tanl; + tgamma; + tgammaf; + tgammal; + trunc; + truncf; + truncl; + y0f; #LSARC/2003/279 + y0l; #LSARC/2003/279 + y1f; #LSARC/2003/279 + y1l; #LSARC/2003/279 + ynf; #LSARC/2003/279 + ynl; #LSARC/2003/279 +} SUNW_1.1.1; + +SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +SUNWprivate_1.3 { + global: + __libm_mt_fex_sync; # -lmtsk + __mt_fex_sync; # -lmtsk +} SUNWprivate_1.2; + +SUNWprivate_1.2 { + global: + __libm_errno; # SC3.0.1 -lmopt +} SUNWprivate_1.1; + +SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/libm/wos/mapfiles/libmvec-common b/usr/src/libm/wos/mapfiles/libmvec-common new file mode 100644 index 0000000..cbb4b55 --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmvec-common @@ -0,0 +1,128 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-common 1.4 06/01/31 SMI +# +# Interface definition for libmvec.so.1 + +SUNW_1.1 { + global: + __vatan2; #LSARC/2003/737 + __vatan2_; #LSARC/2003/737 + __vatan2f; #LSARC/2003/737 + __vatan2f_; #LSARC/2003/737 + __vatan; #LSARC/2003/737 + __vatan_; #LSARC/2003/737 + __vatanf; #LSARC/2003/737 + __vatanf_; #LSARC/2003/737 + __vc_abs; #LSARC/2003/737 + __vc_abs_; #LSARC/2003/737 + __vc_exp; #LSARC/2003/737 + __vc_exp_; #LSARC/2003/737 + __vc_log; #LSARC/2003/737 + __vc_log_; #LSARC/2003/737 + __vc_pow; #LSARC/2003/737 + __vc_pow_; #LSARC/2003/737 + __vcos; #LSARC/2003/737 + __vcos_; #LSARC/2003/737 + __vcosf; #LSARC/2003/737 + __vcosf_; #LSARC/2003/737 + __vexp; #LSARC/2003/737 + __vexp_; #LSARC/2003/737 + __vexpf; #LSARC/2003/737 + __vexpf_; #LSARC/2003/737 + __vhypot; #LSARC/2003/737 + __vhypot_; #LSARC/2003/737 + __vhypotf; #LSARC/2003/737 + __vhypotf_; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vlog_; #LSARC/2003/737 + __vlogf; #LSARC/2003/737 + __vlogf_; #LSARC/2003/737 + __vpow; #LSARC/2003/737 + __vpow_; #LSARC/2003/737 + __vpowf; #LSARC/2003/737 + __vpowf_; #LSARC/2003/737 + __vrhypot; #LSARC/2003/737 + __vrhypot_; #LSARC/2003/737 + __vrhypotf; #LSARC/2003/737 + __vrhypotf_; #LSARC/2003/737 + __vrsqrt; #LSARC/2003/737 + __vrsqrt_; #LSARC/2003/737 + __vrsqrtf; #LSARC/2003/737 + __vrsqrtf_; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsin_; #LSARC/2003/737 + __vsincos; #LSARC/2003/737 + __vsincos_; #LSARC/2003/737 + __vsincosf; #LSARC/2003/737 + __vsincosf_; #LSARC/2003/737 + __vsinf; #LSARC/2003/737 + __vsinf_; #LSARC/2003/737 + __vsqrt; #LSARC/2003/737 + __vsqrt_; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + __vsqrtf_; #LSARC/2003/737 + __vz_abs; #LSARC/2003/737 + __vz_abs_; #LSARC/2003/737 + __vz_exp; #LSARC/2003/737 + __vz_exp_; #LSARC/2003/737 + __vz_log; #LSARC/2003/737 + __vz_log_; #LSARC/2003/737 + __vz_pow; #LSARC/2003/737 + __vz_pow_; #LSARC/2003/737 + vatan2_; #LSARC/2003/737 + vatan2f_; #LSARC/2003/737 + vatan_; #LSARC/2003/737 + vatanf_; #LSARC/2003/737 + vc_abs_; #LSARC/2003/737 + vc_exp_; #LSARC/2003/737 + vc_log_; #LSARC/2003/737 + vc_pow_; #LSARC/2003/737 + vcos_; #LSARC/2003/737 + vcosf_; #LSARC/2003/737 + vexp_; #LSARC/2003/737 + vexpf_; #LSARC/2003/737 + vhypot_; #LSARC/2003/737 + vhypotf_; #LSARC/2003/737 + vlog_; #LSARC/2003/737 + vlogf_; #LSARC/2003/737 + vpow_; #LSARC/2003/737 + vpowf_; #LSARC/2003/737 + vrhypot_; #LSARC/2003/737 + vrhypotf_; #LSARC/2003/737 + vrsqrt_; #LSARC/2003/737 + vrsqrtf_; #LSARC/2003/737 + vsin_; #LSARC/2003/737 + vsincos_; #LSARC/2003/737 + vsincosf_; #LSARC/2003/737 + vsinf_; #LSARC/2003/737 + vsqrt_; #LSARC/2003/737 + vsqrtf_; #LSARC/2003/737 + vz_abs_; #LSARC/2003/737 + vz_exp_; #LSARC/2003/737 + vz_log_; #LSARC/2003/737 + vz_pow_; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/libm/wos/mapfiles/libmvec-ia1 b/usr/src/libm/wos/mapfiles/libmvec-ia1 new file mode 100644 index 0000000..8a7eccc --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmvec-ia1 @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-ia1 1.4 06/01/31 SMI +# + +hwcap_1 = fpu cmov sse sse2; diff --git a/usr/src/libm/wos/mapfiles/libmvec-v8plusa b/usr/src/libm/wos/mapfiles/libmvec-v8plusa new file mode 100644 index 0000000..ba5626a --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmvec-v8plusa @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-v8plusa 1.4 06/01/31 SMI +# +# Interface definition for cpu/sparcv8plus+vis/libmvec_isa.so.1 + +SUNW_1.1 { + global: + __vatan; #LSARC/2003/737 + __vatan2; #LSARC/2003/737 + __vatan2f; #LSARC/2003/737 + __vatanf; #LSARC/2003/737 + __vcos; #LSARC/2003/737 + __vcosf; #LSARC/2003/737 + __vexp; #LSARC/2003/737 + __vexpf; #LSARC/2003/737 + __vhypot; #LSARC/2003/737 + __vhypotf; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vlogf; #LSARC/2003/737 + __vpow; #LSARC/2003/737 + __vpowf; #LSARC/2003/737 + __vrhypot; #LSARC/2003/737 + __vrhypotf; #LSARC/2003/737 + __vrsqrt; #LSARC/2003/737 + __vrsqrtf; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsincos; #LSARC/2003/737 + __vsincosf; #LSARC/2003/737 + __vsinf; #LSARC/2003/737 + __vsqrt; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/libm/wos/mapfiles/libmvec-v8plusb b/usr/src/libm/wos/mapfiles/libmvec-v8plusb new file mode 100644 index 0000000..1ae5114 --- /dev/null +++ b/usr/src/libm/wos/mapfiles/libmvec-v8plusb @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-v8plusb 1.4 06/01/31 SMI +# +# Interface definition for cpu/sparcv9+vis2/libmvec_isa.so.1 + +SUNW_1.1 { + global: + __vcos; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/libm/wos64/Makefile b/usr/src/libm/wos64/Makefile new file mode 100644 index 0000000..bb39998 --- /dev/null +++ b/usr/src/libm/wos64/Makefile @@ -0,0 +1,1064 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile 1.38 06/01/31 SMI" +# + +SUNW_ISA:sh = \ + case "`uname -p`" in \ + sparc) echo sparcv9 ;; \ + i386) echo amd64 ;; \ + esac + +MCPU:sh = uname -p + +DESTDIR = destdir-$(SUNW_ISA) + +CG_sparc = v9 +CG_i386 = amd64 +CG = $(CG_$(MCPU)) +CHIP_sparc = ultra +CHIP_i386 = amd64 +CHIP = $(CHIP_$(MCPU)) +XARCH = $(CG) + +VnA = sparcv9+vis +VnB = sparcv9+vis2 +ISA_DIRS_sparc = $(VnA) $(VnB) +ISA_DIRS_i386 = +ISA_DIRS = $(ISA_DIRS_$(MCPU)) + +# +# system commands +# + +AR = /usr/ccs/bin/ar +CPP_CMD = $(CC) -E -Xs +INSTALL = /usr/sbin/install +INST_CMD = $(INSTALL) $(OWNED_BY) -m 644 -f $(@D) $< +INST_EXEC_CMD = $(INSTALL) $(OWNED_BY) -m 755 -f $(@D) $< +LD = /usr/ccs/bin/ld +M4 = /usr/ccs/bin/m4 +MCS = /usr/ccs/bin/mcs +STRIP = /usr/ccs/bin/strip +TR = /usr/bin/tr + +# +# compiler-related symbols and flags +# + +OSREL:sh = /usr/bin/uname -r +POUND_SIGN:sh = /usr/bin/printf "\\043" +BUILDDATE:sh = /usr/bin/date +%m/%d/%Y +BUILDNAME = Generic +PATCHID = +BUILDINFO = SunOS $(OSREL) $(BUILDNAME) $(PATCHID) +LIBM_VERSION = "@($(POUND_SIGN))RELEASE VERSION $(BUILDINFO) $(BUILDDATE)" + +FPDEF_sparc = -D__$(SUNW_ISA) -DARCH_$(CG) +FPDEF_i386 = -D__$(SUNW_ISA) -DARCH_$(CG) +FPDEF = $(FPDEF_$(MCPU)) + +# +# above and beyond what CDEF defines; so -DELFOBJ -DPIC is removed +# +ASDEF = -D_ASM $(FPDEF) + +ASOPT_sparc = -xarch=$(XARCH) -Kpic +ASOPT_i386 = $(ASOPT_sparc) +ASOPT = $(ASOPT_$(MCPU)) + +ASSUFFIX_sparc = S +ASSUFFIX_i386 = s +ASSUFFIX = $(ASSUFFIX_$(MCPU)) + +CDEF_sparc = +CDEF_i386 = -D__i386 ###-DAMD64_BRINGUP_WORKAROUND +CDEF = -DELFOBJ -DPIC -D_STDC_C99= -D_REENTRANT \ + -DLIBM_MT_FEX_SYNC $(CDEF_$(MCPU)) + +CINC = -I../inc -I$(SRC)/C + +# +# __INLINE turns on the use of __inline_sqrt in sqrt.c +# +OLVL_sparc = -xO4 +OLVL_i386 = -O +COPT_sparc = -D__INLINE -xregs=no%appl -xarch=$(XARCH) -xchip=ultra3 +COPT_i386 = -D__INLINE -xarch=$(XARCH) -Wu,-a +COPT = -Xa -Kpic -xstrconst $(OLVL_$(MCPU)) $(COPT_$(MCPU)) $(LM_IL) + +# +# $(LDEND) must come *last* when link editing with versioning; otherwise +# _lib_version, isnan, logb, nextafter, scalb will disappear from version +# information due to the fact that they are also part of libc +# +MAPFMV1 = mapfiles/libmv1-common +MAPFMV2 = mapfiles/libmv2-$(MCPU) +MAPFMVEC = mapfiles/libmvec-common +MAPFMVECA = mapfiles/libmvec-v9a +MAPFMVECB = mapfiles/libmvec-v9b +MAPFILES_sparc = $(MAPFMV1) $(MAPFMV2) $(MAPFMVEC) $(MAPFMVECA) $(MAPFMVECB) +MAPFILES_i386 = $(MAPFMV2) $(MAPFMVEC) +MAPFILES = $(MAPFILES_$(MCPU)) +LDOPT0 = -z redlocsym +LDOPT = -dy -G $(LDOPT0) -z combreloc -z text -z defs -h $(@F) \ + -YP,/lib/$(SUNW_ISA) +LDEND = -lc + +APPFLAGS = $(ASDEF) -Wp,-P +ASFLAGS = $(ASOPT) +CFLAGS = $(COPT) +CPPFLAGS = $(CDEF) $(CINC) +LINTFLAGS = -Xa -u -errchk=longptr64 -D__$(SUNW_ISA) + +OWNED_BY = -u root + +M4FLAGS = -D__STDC__ -DELFOBJ -DPIC + +# +# SVR4 hack +# +CMD.S = $(CPP_CMD) $(APPFLAGS) $(CPPFLAGS) $< > $(CG)/$(@F:%.o=%.s); \ + $(COMPILE.S) -o $@ $(CG)/$(@F:%.o=%.s); \ + $(RM) $(CG)/$(@F:%.o=%.s) + +# +# Directories +# +MDIR = $(MCPU) +ROOTLIB = $(DESTDIR)/lib +SRC = ../src + +LDBLDIR_sparc = Q +LDBLDIR_i386 = LD +LDBLDIR = $(LDBLDIR_$(MCPU)) + +# +# Symbols and flags +# + +LLIB_LM = llib-lm.ln +M_LN = $(LLIB_LM:%=$(CG)/misc/%) + +LIBM_IL_SRC = $(SRC)/$(MDIR)/$(CG)/libm.m4 +LOCALLIBM_IL = locallibm.il +LM_IL = $(LOCALLIBM_IL:%=$(CG)/misc/%) + +M9X_IL_sparcv9 = __fenv_sparc.il +M9X_IL_amd64 = __fenv_amd64.il +M9X_IL = $(M9X_IL_$(SUNW_ISA)) +m9x_IL = $(M9X_IL:%=$(SRC)/m9x/%) + +COBJS_sparc = \ + _TBL_atan.o \ + _TBL_exp2.o \ + _TBL_log.o \ + _TBL_log2.o \ + #end + +COBJS_i386 = $(COBJS_sparc) + +# +# atan2pi.o and sincospi.o is for internal use only +# +COBJS = \ + $(COBJS_$(MCPU)) \ + __cos.o \ + __lgamma.o \ + __rem_pio2.o \ + __rem_pio2m.o \ + __sin.o \ + __sincos.o \ + __tan.o \ + __tanf.o \ + __xpg6.o \ + _lib_version.o \ + _SVID_error.o \ + _TBL_ipio2.o \ + _TBL_sin.o \ + _TBL_tan.o \ + acos.o \ + acosh.o \ + asin.o \ + asinh.o \ + atan.o \ + atan2.o \ + atan2pi.o \ + atanh.o \ + cbrt.o \ + ceil.o \ + copysign.o \ + cos.o \ + cosh.o \ + erf.o \ + exp.o \ + exp10.o \ + exp2.o \ + expm1.o \ + fabs.o \ + floor.o \ + fmod.o \ + gamma.o \ + gamma_r.o \ + hypot.o \ + ilogb.o \ + isnan.o \ + j0.o \ + j1.o \ + jn.o \ + lgamma.o \ + lgamma_r.o \ + log.o \ + log10.o \ + log1p.o \ + log2.o \ + logb.o \ + matherr.o \ + nextafter.o \ + pow.o \ + remainder.o \ + rint.o \ + scalb.o \ + scalbn.o \ + signgam.o \ + significand.o \ + sin.o \ + sincos.o \ + sincospi.o \ + sinh.o \ + sqrt.o \ + tan.o \ + tanh.o \ + #end + +# +# LSARC/2003/658 adds isnanl +# +QOBJS_sparc = \ + _TBL_atanl.o \ + _TBL_expl.o \ + _TBL_expm1l.o \ + _TBL_logl.o \ + finitel.o \ + isnanl.o \ + #end + +QOBJS_i386 = \ + finitel.o \ + isnanl.o \ + #end + +# +# atan2pil.o, ieee_funcl.o, rndintl.o, sinpil.o, sincosl.o, sincospil.o +# are for internal use only +# +# LSARC/2003/279 adds the following: +# gammal.o 1 +# gammal_r.o 1 +# j0l.o 2 +# j1l.o 2 +# jnl.o 2 +# lgammal_r.o 1 +# scalbl.o 1 +# significandl.o 1 +# +QOBJS = \ + $(QOBJS_$(MCPU)) \ + __cosl.o \ + __lgammal.o \ + __poly_libmq.o \ + __rem_pio2l.o \ + __sincosl.o \ + __sinl.o \ + __tanl.o \ + _TBL_cosl.o \ + _TBL_ipio2l.o \ + _TBL_sinl.o \ + _TBL_tanl.o \ + acoshl.o \ + acosl.o \ + asinhl.o \ + asinl.o \ + atan2l.o \ + atan2pil.o \ + atanhl.o \ + atanl.o \ + cbrtl.o \ + copysignl.o \ + coshl.o \ + cosl.o \ + erfl.o \ + exp10l.o \ + exp2l.o \ + expl.o \ + expm1l.o \ + fabsl.o \ + floorl.o \ + fmodl.o \ + gammal.o \ + gammal_r.o \ + hypotl.o \ + ieee_funcl.o \ + ilogbl.o \ + j0l.o \ + j1l.o \ + jnl.o \ + lgammal.o \ + lgammal_r.o \ + log10l.o \ + log1pl.o \ + log2l.o \ + logbl.o \ + logl.o \ + nextafterl.o \ + powl.o \ + remainderl.o \ + rintl.o \ + rndintl.o \ + scalbl.o \ + scalbnl.o \ + signgaml.o \ + significandl.o \ + sincosl.o \ + sincospil.o \ + sinhl.o \ + sinl.o \ + sinpil.o \ + sqrtl.o \ + tanhl.o \ + tanl.o \ + #end + +# +# LSARC/2003/658 adds isnanf +# +ROBJS_sparc = \ + isnanf.o \ + #end + +ROBJS_i386 = $(ROBJS_sparc) + +# +# atan2pif.o, sincosf.o, sincospif.o are for internal use only +# +# LSARC/2003/279 adds the following: +# besself.o 6 +# scalbf.o 1 +# gammaf.o 1 +# gammaf_r.o 1 +# lgammaf_r.o 1 +# significandf.o 1 +# +ROBJS = \ + $(ROBJS_$(MCPU)) \ + _TBL_r_atan_.o \ + __cosf.o \ + __sincosf.o \ + __sinf.o \ + acosf.o \ + acoshf.o \ + asinf.o \ + asinhf.o \ + atan2f.o \ + atan2pif.o \ + atanf.o \ + atanhf.o \ + besself.o \ + cbrtf.o \ + copysignf.o \ + cosf.o \ + coshf.o \ + erff.o \ + exp10f.o \ + exp2f.o \ + expf.o \ + expm1f.o \ + fabsf.o \ + floorf.o \ + fmodf.o \ + gammaf.o \ + gammaf_r.o \ + hypotf.o \ + ilogbf.o \ + lgammaf.o \ + lgammaf_r.o \ + log10f.o \ + log1pf.o \ + log2f.o \ + logbf.o \ + logf.o \ + nextafterf.o \ + powf.o \ + remainderf.o \ + rintf.o \ + scalbf.o \ + scalbnf.o \ + signgamf.o \ + significandf.o \ + sinf.o \ + sinhf.o \ + sincosf.o \ + sincospif.o \ + sqrtf.o \ + tanf.o \ + tanhf.o \ + #end + +SOBJS_sparc = \ + #end + +SOBJS_i386 = \ + __swapFLAGS.o \ +# _xtoll.o \ +# _xtoull.o \ + #end + +SOBJS = \ + $(SOBJS_$(MCPU)) \ + #end + +m9xOBJS_amd64 = \ + __fex_sse.o \ + feprec.o \ + #end + +m9xOBJS_sparcv9 = \ + #end + +# +# lrint.o, lrintf.o, lrintl.o, lround.o, lroundf.o & lroundl.o are 32-bit only +# +m9xOBJS = \ + $(m9xOBJS_$(SUNW_ISA)) \ + __fex_$(MCPU).o \ + __fex_hdlr.o \ + __fex_sym.o \ + fdim.o \ + fdimf.o \ + fdiml.o \ + feexcept.o \ + fenv.o \ + feround.o \ + fex_handler.o \ + fex_log.o \ + fma.o \ + fmaf.o \ + fmal.o \ + fmax.o \ + fmaxf.o \ + fmaxl.o \ + fmin.o \ + fminf.o \ + fminl.o \ + frexp.o \ + frexpf.o \ + frexpl.o \ + ldexp.o \ + ldexpf.o \ + ldexpl.o \ + llrint.o \ + llrintf.o \ + llrintl.o \ + llround.o \ + llroundf.o \ + llroundl.o \ + modf.o \ + modff.o \ + modfl.o \ + nan.o \ + nanf.o \ + nanl.o \ + nearbyint.o \ + nearbyintf.o \ + nearbyintl.o \ + nexttoward.o \ + nexttowardf.o \ + nexttowardl.o \ + remquo.o \ + remquof.o \ + remquol.o \ + round.o \ + roundf.o \ + roundl.o \ + scalbln.o \ + scalblnf.o \ + scalblnl.o \ + tgamma.o \ + tgammaf.o \ + tgammal.o \ + trunc.o \ + truncf.o \ + truncl.o \ + #end + +complexOBJS = \ + cabs.o \ + cabsf.o \ + cabsl.o \ + cacos.o \ + cacosf.o \ + cacosh.o \ + cacoshf.o \ + cacoshl.o \ + cacosl.o \ + carg.o \ + cargf.o \ + cargl.o \ + casin.o \ + casinf.o \ + casinh.o \ + casinhf.o \ + casinhl.o \ + casinl.o \ + catan.o \ + catanf.o \ + catanh.o \ + catanhf.o \ + catanhl.o \ + catanl.o \ + ccos.o \ + ccosf.o \ + ccosh.o \ + ccoshf.o \ + ccoshl.o \ + ccosl.o \ + cexp.o \ + cexpf.o \ + cexpl.o \ + cimag.o \ + cimagf.o \ + cimagl.o \ + clog.o \ + clogf.o \ + clogl.o \ + conj.o \ + conjf.o \ + conjl.o \ + cpow.o \ + cpowf.o \ + cpowl.o \ + cproj.o \ + cprojf.o \ + cprojl.o \ + creal.o \ + crealf.o \ + creall.o \ + csin.o \ + csinf.o \ + csinh.o \ + csinhf.o \ + csinhl.o \ + csinl.o \ + csqrt.o \ + csqrtf.o \ + csqrtl.o \ + ctan.o \ + ctanf.o \ + ctanh.o \ + ctanhf.o \ + ctanhl.o \ + ctanl.o \ + k_atan2.o \ + k_atan2l.o \ + k_cexp.o \ + k_cexpl.o \ + k_clog_r.o \ + k_clog_rl.o \ + #end + +LIBMV1_SO_OBJS = \ + libmv1.o \ + #end + +LIBM_SO_OBJS = $(COBJS) $(ROBJS) $(QOBJS) $(SOBJS) $(m9xOBJS) $(complexOBJS) + +mvecOBJS = \ + __vTBL_atan1.o \ + __vTBL_atan2.o \ + __vTBL_rsqrt.o \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vatan.o \ + __vatan2.o \ + __vatan2f.o \ + __vatanf.o \ + __vc_abs.o \ + __vc_exp.o \ + __vc_log.o \ + __vc_pow.o \ + __vcos.o \ + __vcosbig.o \ + __vcosbigf.o \ + __vcosf.o \ + __vexp.o \ + __vexpf.o \ + __vhypot.o \ + __vhypotf.o \ + __vlog.o \ + __vlogf.o \ + __vpow.o \ + __vpowf.o \ + __vrem_pio2m.o \ + __vrhypot.o \ + __vrhypotf.o \ + __vrsqrt.o \ + __vrsqrtf.o \ + __vsin.o \ + __vsinbig.o \ + __vsinbigf.o \ + __vsincos.o \ + __vsincosbig.o \ + __vsincosbigf.o \ + __vsincosf.o \ + __vsinf.o \ + __vsqrt.o \ + __vsqrtf.o \ + __vz_abs.o \ + __vz_exp.o \ + __vz_log.o \ + __vz_pow.o \ + vatan2_.o \ + vatan2f_.o \ + vatan_.o \ + vatanf_.o \ + vc_abs_.o \ + vc_exp_.o \ + vc_log_.o \ + vc_pow_.o \ + vcos_.o \ + vcosf_.o \ + vexp_.o \ + vexpf_.o \ + vhypot_.o \ + vhypotf_.o \ + vlog_.o \ + vlogf_.o \ + vpow_.o \ + vpowf_.o \ + vrhypot_.o \ + vrhypotf_.o \ + vrsqrt_.o \ + vrsqrtf_.o \ + vsin_.o \ + vsincos_.o \ + vsincosf_.o \ + vsinf_.o \ + vsqrt_.o \ + vsqrtf_.o \ + vz_abs_.o \ + vz_exp_.o \ + vz_log_.o \ + vz_pow_.o \ + #end + +mvecaCOBJS = \ + __vTBL_atan1.o \ + __vTBL_atan2.o \ + __vTBL_rsqrt.o \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vcosbig.o \ + __vcosbigf.o \ + __vrem_pio2m.o \ + __vsinbig.o \ + __vsinbigf.o \ + __vsincosbig.o \ + __vsincosbigf.o \ + #end + +mvecaSOBJS = \ + __vatan.o \ + __vatan2.o \ + __vatan2f.o \ + __vatanf.o \ + __vcos.o \ + __vcosf.o \ + __vexp.o \ + __vexpf.o \ + __vhypot.o \ + __vhypotf.o \ + __vlog.o \ + __vlogf.o \ + __vpow.o \ + __vpowf.o \ + __vrhypot.o \ + __vrhypotf.o \ + __vrsqrt.o \ + __vrsqrtf.o \ + __vsin.o \ + __vsincos.o \ + __vsincosf.o \ + __vsinf.o \ + __vsqrt.o \ + __vsqrtf.o \ + #end + +mvecbCOBJS = \ + __vTBL_sincos.o \ + __vTBL_sincos2.o \ + __vTBL_sqrtf.o \ + __vcosbig.o \ + __vcosbig_ultra3.o \ + __vrem_pio2m.o \ + __vsinbig.o \ + __vsinbig_ultra3.o \ + #end + +mvecbSOBJS = \ + __vcos_ultra3.o \ + __vlog_ultra3.o \ + __vsin_ultra3.o \ + __vsqrtf_ultra3.o \ + #end + +LIBMVEC_SO_OBJS = $(mvecOBJS) +LIBMVEC_VnA_OBJS = $(mvecaCOBJS) $(mvecaSOBJS) +LIBMVEC_VnB_OBJS = $(mvecbCOBJS) $(mvecbSOBJS) + +# +# Header files under $(SRC) +# + +CHDR = \ + libm.h \ + libm_macros.h \ + libm_protos.h \ + libm_synonyms.h \ + libm_thread.h \ + xpg6.h \ + #end + +QHDR = longdouble.h + +complexHDR = complex_wrapper.h + +m9xHDR = \ + fenv_synonyms.h \ + fex_handler.h \ + fma.h \ + regset.h \ + #end + +C_HDR = $(CHDR:%=$(SRC)/C/%) +Q_HDR = $(QHDR:%=$(SRC)/$(LDBLDIR)/%) +complex_HDR = $(complexHDR:%=$(SRC)/complex/%) +m9x_HDR = $(m9xHDR:%=$(SRC)/m9x/%) +HDRS = $(C_HDR) $(Q_HDR) $(complex_HDR) $(m9x_HDR) + +# +# Header files above and beyond +# + +_HEADERS = \ + complex.h \ + fenv.h \ + floatingpoint.h \ + iso/math_c99.h \ + iso/math_iso.h \ + math.h \ + sys/ieeefp.h \ + tgmath.h \ + #end + +HEADERS = $(_HEADERS:%=../inc/%) + +INCS = $(HEADERS) + +# +# Objects +# +LIBMV1_S = $(CG)/libm.so.1 +LIBM_P = +LIBM_S = $(CG)/libm.so.2 +LIBMS = $(LIBMV1_S) $(LIBM_S) + +LIBMVEC_S = $(CG)/libmvec.so.1 +LIBMVEC_VnA = $(VnA)/libmvec_isa.so.1 +LIBMVEC_VnB = $(VnB)/libmvec_isa.so.1 +LIBMVECS_sparc = $(LIBMVEC_S) $(LIBMVEC_VnA) $(LIBMVEC_VnB) +LIBMVECS_i386 = $(LIBMVEC_S) +LIBMVECS = $(LIBMVECS_$(MCPU)) + +LIBS = $(LIBMS) $(LIBMVECS) + +$(LIBM_P) := VARIANT = $(CG)/mobj_p +$(LIBMV1_S) := VARIANT = $(CG)/mobj_s +$(LIBM_S) := VARIANT = $(CG)/mobj_s + +$(LIBMVEC_S) := VARIANT = $(CG)/mvobj_s +$(LIBMVEC_VnA) := VARIANT = $(VnA)/mvobj_s +$(LIBMVEC_VnB) := VARIANT = $(VnB)/mvobj_s + +OBJS_LIBMV1_SO = $(LIBMV1_SO_OBJS:%=$(VARIANT)/%) +OBJS_LIBM_SO = $(LIBM_SO_OBJS:%=$(VARIANT)/%) + +OBJS_LIBMVEC_SO = $(LIBMVEC_SO_OBJS:%=$(VARIANT)/%) +OBJS_LIBMVEC_VnA = $(LIBMVEC_VnA_OBJS:%=$(VARIANT)/%) +OBJS_LIBMVEC_VnB = $(LIBMVEC_VnB_OBJS:%=$(VARIANT)/%) + +OBJS_LIB = \ + $(LIBM_SO_OBJS:%=$(CG)/mobj_s/%) \ + $(LIBMVEC_SO_OBJS:%=$(CG)/mvobj_s/%) \ + $(LIBMVEC_VnA_OBJS:%=$(VnA)/mvobj_s/%) \ + $(LIBMVEC_VnB_OBJS:%=$(VnB)/mvobj_s/%) \ + #end + +%.o := FPDEF_sparc += -DFPADD_TRAPS_INCOMPLETE_ON_NAN +%.o := CDEF_sparc += -DFPADD_TRAPS_INCOMPLETE_ON_NAN +%.o := CDEF_sparc += -DFDTOS_TRAPS_INCOMPLETE_IN_FNS_MODE + +$(CG)/mobj%.o := ASDEF += -DLIBM_BUILD +$(CG)/mobj%.o := CDEF += -DLIBM_BUILD +$(CG)/mobj%.o := COPT_sparc += -dalign + +$(CG)/mobj_p/%.o := ASDEF += -DPROF +$(CG)/mobj_p/%.o := CDEF += -DPROF +$(CG)/mobj_p/%.o := COPT += -p + +# +# __vatan.c, __vatan2.c, __vpow.c ,__vexp.c reference fabs; +# we need acomp to treat fabs as an intrinsic. +# +$(CG)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD +$(CG)/mv%.o := CDEF_i386 += -Dfabs=__fabs +$(CG)/mv%.o := COPT_sparc += -W0,-xintrinsic + +$(VnA)/mv%.o := CHIP = vis +$(VnA)/mv%.o := XARCH = v9a +$(VnA)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD + +$(VnB)/mv%.o := CHIP = vis +$(VnB)/mv%.o := XARCH = v9b +$(VnB)/mv%.o := CDEF += -DLIBMVEC_SO_BUILD + +%/exp10.o %/exp2.o := CDEF_i386 += -D__anint=__round + +# +# AMD64 bringup workarounds +# +###%/__vexpf.o := OLVL_i386 = -xO1 + +dryrun := DR_BGN = -@echo ' +dryrun := DR_END = ' +dryrun := DR_NBGN = true || echo ' +dryrun := DR_NEND = ' + +CG_DIRS_sparc = mobj_p mobj_s mvobj_s misc +CG_DIRS_i386 = $(CG_DIRS_sparc) +CG_DIRS = $(CG_DIRS_$(MCPU)) + +DIRS_CG = \ + $(CG_DIRS:%=$(CG)/%) \ + $(ISA_DIRS:%=%/mvobj_s) \ + #end + +INST_DIRS = $(SUNW_ISA) $(ISA_DIRS:%=cpu/%/$(SUNW_ISA)) +DIRS_INST = $(INST_DIRS:%=$(ROOTLIB)/%) + +LIBS_INST_sparc = \ + $(LIBMS:$(CG)/%=$(ROOTLIB)/$(SUNW_ISA)/%) \ + $(LIBMVEC_S:$(CG)/%=$(ROOTLIB)/$(SUNW_ISA)/%) \ + $(LIBMVEC_VnA:$(VnA)/%=$(ROOTLIB)/cpu/$(VnA)/$(SUNW_ISA)/%) \ + $(LIBMVEC_VnB:$(VnB)/%=$(ROOTLIB)/cpu/$(VnB)/$(SUNW_ISA)/%) \ + #end +LIBS_INST_i386 = \ + $(LIBMS:$(CG)/%=$(ROOTLIB)/$(SUNW_ISA)/%) \ + $(LIBMVEC_S:$(CG)/%=$(ROOTLIB)/$(SUNW_ISA)/%) \ + #end +LIBS_INST = $(LIBS_INST_$(MCPU)) + +INST_FILES = $(LLIB_LM) +FILES_INST = $(INST_FILES:%=$(ROOTLIB)/$(SUNW_ISA)/%) + +# +# libm build rules +# + +.INIT: $(DIRS_CG) $(MAPFILES) + +.NO_PARALLEL: $(DIRS_CG) $(DIRS_INST) $(LIBS_INST) + +.PARALLEL: $(OBJS_LIB) + +.DONE: + -@echo $(CG) DONE + +all: $(DIRS_CG) .WAIT $(INCS) $(HDRS) $(M_LN) $(LIBS) + +dryrun: all + +install: all .WAIT $(DIRS_INST) .WAIT $(FILES_INST) $(LIBS_INST) + +clean: FRC + -$(RM) -r $(CG)/mobj_p $(CG)/mobj_s $(CG)/mvobj_s \ + $(ISA_DIRS:%=%/mvobj_s) $(LM_IL) + + +clobber: clean + -$(RM) -r $(CG) $(ISA_DIRS) + +$(CG)/misc/%.ln: % + $(LINT) -nvx $(LINTFLAGS) $(CPPFLAGS) -om$( $(CG)/$(@F).$(ASSUFFIX); \ + $(CPP_CMD) $(CPPFLAGS) $(CG)/$(@F).$(ASSUFFIX) | \ + sed -e 's/[ ]*$$//' -e '/^#/d' -e '/^$$/d' > $@; \ + $(RM) $(CG)/$(@F).$(ASSUFFIX) + -@echo $@ created with `wc -w < $@` words + +$(LIBM_P) $(LIBM_S): $(MAPFMV2) $$(OBJS_LIBM_SO) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMV2) \'; \ + echo -o '$(@:$(CG)/%=../%) \'; \ + echo $(OBJS_LIBM_SO:$(VARIANT)/%=%) | $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$, $(LDEND),'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMV2) \ + -o $(@:$(CG)/%=../%) \ + $(OBJS_LIBM_SO:$(VARIANT)/%=%) $(LDEND); \ + $(MCS) -d $(@:$(CG)/%=../%); $(STRIP) -x $(@:$(CG)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMV1_S): $(MAPFMV1) $$(OBJS_LIBMV1_SO) + $(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) -M $${_d}/$(MAPFMV1) \ + -o $(@:$(CG)/%=../%) -F libm.so.2 $(OBJS_LIBMV1_SO:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(CG)/%=../%); $(STRIP) -x $(@:$(CG)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_S): $(MAPFMVEC) $$(OBJS_LIBMVEC_SO) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMVEC) \'; \ + echo -f '$$ORIGIN/../cpu/$$ISALIST/$(SUNW_ISA)/libmvec_isa.so.1 \'; \ + echo -o '$(@:$(CG)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_SO:$(VARIANT)/%=%) | $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMVEC) \ + -f '$$ORIGIN/../cpu/$$ISALIST/$(SUNW_ISA)/libmvec_isa.so.1' \ + -o $(@:$(CG)/%=../%) \ + $(OBJS_LIBMVEC_SO:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(CG)/%=../%); $(STRIP) -x $(@:$(CG)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(CG)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_VnA): $(MAPFMVECA) $$(OBJS_LIBMVEC_VnA) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo '-z endfiltee \'; \ + echo -M $${_d}/'$(MAPFMVECA) \'; \ + echo -o '$(@:$(VnA)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_VnA:$(VARIANT)/%=%) | \ + $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(VnA)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -z endfiltee \ + -M $${_d}/$(MAPFMVECA) \ + -o $(@:$(VnA)/%=../%) \ + $(OBJS_LIBMVEC_VnA:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(VnA)/%=../%); $(STRIP) -x $(@:$(VnA)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(VnA)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$(LIBMVEC_VnB): $(MAPFMVECB) $$(OBJS_LIBMVEC_VnB) + -@_d=`pwd`; echo 'cd $(VARIANT); $(LD) $(LDOPT) \'; \ + echo -M $${_d}/'$(MAPFMVECB) \'; \ + echo -o '$(@:$(VnB)/%=../%) \'; \ + echo $(OBJS_LIBMVEC_VnB:$(VARIANT)/%=%) | \ + $(TR) -s ' ' '\012' | \ + fmt -64 | sed -e 's,^, ,' -e 's,$$, \\,' \ + -e '$$s, \\$$,,'; \ + echo '$(MCS) -a $(LIBM_VERSION) $(@:$(VnB)/%=../%)' + @$(DR_NBGN)_d=`pwd`; cd $(VARIANT); $(LD) $(LDOPT) \ + -M $${_d}/$(MAPFMVECB) \ + -o $(@:$(VnB)/%=../%) \ + $(OBJS_LIBMVEC_VnB:$(VARIANT)/%=%); \ + $(MCS) -d $(@:$(VnB)/%=../%); $(STRIP) -x $(@:$(VnB)/%=../%); \ + $(MCS) -a $(LIBM_VERSION) $(@:$(VnB)/%=../%)$(DR_NEND) + -@echo BUILT $@ + +$$(VARIANT)/%.o: $(SRC)/mvec/$$(CHIP)/%.S + $(DR_BGN)$(CMD.S)$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/$(MDIR)/$(CG)/%.$(ASSUFFIX) $(C_HDR) + $(DR_BGN)$(CMD.S)$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/mvec/%.c $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/C/%.c $(C_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/$(LDBLDIR)/%.c $(C_HDR) $(Q_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/R/%.c $(C_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/complex/%.c $(C_HDR) $(complex_HDR) $(HEADERS) $(LM_IL) + $(DR_BGN)$(COMPILE.c) -o $@ $<$(DR_END) + +$$(VARIANT)/%.o: $(SRC)/m9x/%.c $(C_HDR) $(m9x_HDR) $(HEADERS) $(m9x_IL) $(LM_IL) + $(DR_BGN)$(COMPILE.c) $(m9x_IL) -o $@ $<$(DR_END) + +$(DIRS_CG): + $(INSTALL) -d -m 0775 $@ + +$(DIRS_INST): + $(INSTALL) -d $(OWNED_BY) $@ + +$(ROOTLIB)/$(SUNW_ISA)/libm.so.%: $(CG)/libm.so.% + $(INST_EXEC_CMD) + @if [ $< = $(LIBM_S) ]; then \ + (cd $(@D); $(RM) libm.so; ln -s $(@F) libm.so) \ + fi + +$(ROOTLIB)/$(SUNW_ISA)/libmvec.so.%: $(CG)/libmvec.so.% + $(INST_EXEC_CMD) + @cd $(@D); $(RM) libmvec.so; ln -s $(@F) libmvec.so + +$(ROOTLIB)/cpu/%/$(SUNW_ISA)/libmvec_isa.so.1: %/libmvec_isa.so.1 + $(INST_EXEC_CMD) + +$(ROOTLIB)/$(SUNW_ISA)/%:: $(CG)/misc/% + $(INST_CMD) + +FRC: diff --git a/usr/src/libm/wos64/llib-lm b/usr/src/libm/wos64/llib-lm new file mode 100644 index 0000000..db7161d --- /dev/null +++ b/usr/src/libm/wos64/llib-lm @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llib-lm 1.5 05/10/08 SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#undef __PRAGMA_REDEFINE_EXTNAME +#include +#if defined(_STDC_C99) +#undef isnan +extern int isnan(double); +extern int isnand(double); /* LSARC/2003/670 */ +typedef union _h_val { + unsigned long _i[2]; + double _d; +} _h_val; +extern const _h_val __huge_val; +#endif +#include +#include +#undef clog +extern double complex clog(double complex); diff --git a/usr/src/libm/wos64/mapfiles/libmv1-common b/usr/src/libm/wos64/mapfiles/libmv1-common new file mode 100644 index 0000000..af7686b --- /dev/null +++ b/usr/src/libm/wos64/mapfiles/libmv1-common @@ -0,0 +1,200 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmv1-common 1.4 06/01/31 SMI +# +# Interface definition for libm.so.1 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/libm/wos64/mapfiles/libmv2-i386 b/usr/src/libm/wos64/mapfiles/libmv2-i386 new file mode 100644 index 0000000..0e4597f --- /dev/null +++ b/usr/src/libm/wos64/mapfiles/libmv2-i386 @@ -0,0 +1,687 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmv2-i386 1.4 06/01/31 SMI +# +# Interface definition for libm.so.2 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +SUNW_1.3 { + global: + __isnanf; #LSARC/2003/658 + __isnanl; #LSARC/2003/658 + _isnan; #LSARC/2003/658 + _isnand; #LSARC/2003/658 + _isnanf; #LSARC/2003/658 + _logb; #LSARC/2003/658 + _modf; #LSARC/2003/658 + _modff; #LSARC/2003/658 + _nextafter; #LSARC/2003/658 + _scalb; #LSARC/2003/658 + isnand; #LSARC/2003/658 + isnanf; #LSARC/2003/658 + isnanl; #LSARC/2003/658 +} SUNW_1.2; + +SUNW_1.2 { + global: + __acoshf; + __acoshl; + __asinhf; + __asinhl; + __atanhf; + __atanhl; + __cabs; + __cabsf; + __cabsl; + __cacos; + __cacosf; + __cacosh; + __cacoshf; + __cacoshl; + __cacosl; + __carg; + __cargf; + __cargl; + __casin; + __casinf; + __casinh; + __casinhf; + __casinhl; + __casinl; + __catan; + __catanf; + __catanh; + __catanhf; + __catanhl; + __catanl; + __cbrtf; + __cbrtl; + __ccos; + __ccosf; + __ccosh; + __ccoshf; + __ccoshl; + __ccosl; + __cexp; + __cexpf; + __cexpl; + __cimag; + __cimagf; + __cimagl; + __clog; + __clogf; + __clogl; + __conj; + __conjf; + __conjl; + __copysignf; + __copysignl; + __cpow; + __cpowf; + __cpowl; + __cproj; + __cprojf; + __cprojl; + __creal; + __crealf; + __creall; + __csin; + __csinf; + __csinh; + __csinhf; + __csinhl; + __csinl; + __csqrt; + __csqrtf; + __csqrtl; + __ctan; + __ctanf; + __ctanh; + __ctanhf; + __ctanhl; + __ctanl; + __erfcf; + __erfcl; + __erff; + __erfl; + __exp2; + __exp2f; + __exp2l; + __expm1f; + __expm1l; + __fdim; + __fdimf; + __fdiml; + __feclearexcept; + __fegetenv; + __fegetexceptflag; + __fegetprec; + __fegetround; + __feholdexcept; + __fenv_dfl_env; #LSARC/1996/175 + __feraiseexcept; + __fesetenv; + __fesetexceptflag; + __fesetprec; + __fesetround; + __fetestexcept; + __feupdateenv; + __fex_get_handling; #LSARC/1996/175 + __fex_get_log; #LSARC/1996/175 + __fex_get_log_depth; #LSARC/1996/175 + __fex_getexcepthandler; #LSARC/1996/175 + __fex_log_entry; #LSARC/1996/175 + __fex_merge_flags; #LSARC/1996/175 + __fex_set_handling; #LSARC/1996/175 + __fex_set_log; #LSARC/1996/175 + __fex_set_log_depth; #LSARC/1996/175 + __fex_setexcepthandler; #LSARC/1996/175 + __fma; + __fmaf; + __fmal; + __fmax; + __fmaxf; + __fmaxl; + __fmin; + __fminf; + __fminl; + __frexp; + __gammaf; #LSARC/2003/279 + __gammaf_r; #LSARC/2003/279 + __gammal; #LSARC/2003/279 + __gammal_r; #LSARC/2003/279 + __hypotf; + __hypotl; + __ilogbf; + __ilogbl; + __j0f; #LSARC/2003/279 + __j0l; #LSARC/2003/279 + __j1f; #LSARC/2003/279 + __j1l; #LSARC/2003/279 + __jnf; #LSARC/2003/279 + __jnl; #LSARC/2003/279 + __ldexp; + __lgammaf; + __lgammaf_r; #LSARC/2003/279 + __lgammal; + __lgammal_r; #LSARC/2003/279 + __llrint; + __llrintf; + __llrintl; + __llround; + __llroundf; + __llroundl; + __log1pf; + __log1pl; + __log2; + __log2f; + __log2l; + __logbf; + __logbl; + __lrint; + __lrintf; + __lrintl; + __lround; + __lroundf; + __lroundl; + __modf; + __nan; + __nanf; + __nanl; + __nearbyint; + __nearbyintf; + __nearbyintl; + __nextafterf; + __nextafterl; + __nexttoward; + __nexttowardf; + __nexttowardl; + __remainderf; + __remainderl; + __remquo; + __remquof; + __remquol; + __rintf; + __rintl; + __round; + __roundf; + __roundl; + __scalbf; #LSARC/2003/279 + __scalbl; #LSARC/2003/279 + __scalbln; + __scalblnf; + __scalblnl; + __scalbnf; + __scalbnl; + __signgamf; #LSARC/2003/279 + __signgaml; #LSARC/2003/279 + __significandf; #LSARC/2003/279 + __significandl; #LSARC/2003/279 + __sincos; #LSARC/2003/279 + __sincosf; #LSARC/2003/279 + __sincosl; #LSARC/2003/279 + __tgamma; + __tgammaf; + __tgammal; + __trunc; + __truncf; + __truncl; + __xpg6; #private contract with libc group + __y0f; #LSARC/2003/279 + __y0l; #LSARC/2003/279 + __y1f; #LSARC/2003/279 + __y1l; #LSARC/2003/279 + __ynf; #LSARC/2003/279 + __ynl; #LSARC/2003/279 + acosf; + acoshf; + acoshl; + acosl; + asinf; + asinhf; + asinhl; + asinl; + atan2f; + atan2l; + atanf; + atanhf; + atanhl; + atanl; + cabs; + cabsf; + cabsl; + cacos; + cacosf; + cacosh; + cacoshf; + cacoshl; + cacosl; + carg; + cargf; + cargl; + casin; + casinf; + casinh; + casinhf; + casinhl; + casinl; + catan; + catanf; + catanh; + catanhf; + catanhl; + catanl; + cbrtf; + cbrtl; + ccos; + ccosf; + ccosh; + ccoshf; + ccoshl; + ccosl; + ceilf; + ceill; + cexp; + cexpf; + cexpl; + cimag; + cimagf; + cimagl; + clog; + clogf; + clogl; + conj; + conjf; + conjl; + copysignf; + copysignl; + cosf; + coshf; + coshl; + cosl; + cpow; + cpowf; + cpowl; + cproj; + cprojf; + cprojl; + creal; + crealf; + creall; + csin; + csinf; + csinh; + csinhf; + csinhl; + csinl; + csqrt; + csqrtf; + csqrtl; + ctan; + ctanf; + ctanh; + ctanhf; + ctanhl; + ctanl; + erfcf; + erfcl; + erff; + erfl; + exp2; + exp2f; + exp2l; + expf; + expl; + expm1f; + expm1l; + fabsf; + fabsl; + fdim; + fdimf; + fdiml; + feclearexcept; + fegetenv; + fegetexceptflag; + fegetprec; + fegetround; + feholdexcept; + feraiseexcept; + fesetenv; + fesetexceptflag; + fesetprec; + fesetround; + fetestexcept; + feupdateenv; + fex_get_handling; #LSARC/1996/175 + fex_get_log; #LSARC/1996/175 + fex_get_log_depth; #LSARC/1996/175 + fex_getexcepthandler; #LSARC/1996/175 + fex_log_entry; #LSARC/1996/175 + fex_merge_flags; #LSARC/1996/175 + fex_set_handling; #LSARC/1996/175 + fex_set_log; #LSARC/1996/175 + fex_set_log_depth; #LSARC/1996/175 + fex_setexcepthandler; #LSARC/1996/175 + floorf; + floorl; + fma; + fmaf; + fmal; + fmax; + fmaxf; + fmaxl; + fmin; + fminf; + fminl; + fmodf; + fmodl; + frexp; + frexpf; + frexpl; + gammaf; #LSARC/2003/279 + gammaf_r; #LSARC/2003/279 + gammal; #LSARC/2003/279 + gammal_r; #LSARC/2003/279 + hypotf; + hypotl; + ilogbf; + ilogbl; + j0f; #LSARC/2003/279 + j0l; #LSARC/2003/279 + j1f; #LSARC/2003/279 + j1l; #LSARC/2003/279 + jnf; #LSARC/2003/279 + jnl; #LSARC/2003/279 + ldexp; + ldexpf; + ldexpl; + lgammaf; + lgammaf_r; #LSARC/2003/279 + lgammal; + lgammal_r; #LSARC/2003/279 + llrint; + llrintf; + llrintl; + llround; + llroundf; + llroundl; + log10f; + log10l; + log1pf; + log1pl; + log2; + log2f; + log2l; + logbf; + logbl; + logf; + logl; + lrint; + lrintf; + lrintl; + lround; + lroundf; + lroundl; + modf; + modff; + modfl; + nan; + nanf; + nanl; + nearbyint; + nearbyintf; + nearbyintl; + nextafterf; + nextafterl; + nexttoward; + nexttowardf; + nexttowardl; + powf; + powl; + remainderf; + remainderl; + remquo; + remquof; + remquol; + rintf; + rintl; + round; + roundf; + roundl; + scalbf; #LSARC/2003/279 + scalbl; #LSARC/2003/279 + scalbln; + scalblnf; + scalblnl; + scalbnf; + scalbnl; + signgamf; #LSARC/2003/279 + signgaml; #LSARC/2003/279 + significandf; #LSARC/2003/279 + significandl; #LSARC/2003/279 + sincos; #LSARC/2003/279 + sincosf; #LSARC/2003/279 + sincosl; #LSARC/2003/279 + sinf; + sinhf; + sinhl; + sinl; + sqrtf; + sqrtl; + tanf; + tanhf; + tanhl; + tanl; + tgamma; + tgammaf; + tgammal; + trunc; + truncf; + truncl; + y0f; #LSARC/2003/279 + y0l; #LSARC/2003/279 + y1f; #LSARC/2003/279 + y1l; #LSARC/2003/279 + ynf; #LSARC/2003/279 + ynl; #LSARC/2003/279 +} SUNW_1.1.1; + +SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +SUNWprivate_1.2 { + global: + __libm_mt_fex_sync; # -lmtsk + __mt_fex_sync; # -lmtsk +} SUNWprivate_1.1; + +SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/libm/wos64/mapfiles/libmv2-sparc b/usr/src/libm/wos64/mapfiles/libmv2-sparc new file mode 100644 index 0000000..987c1fe --- /dev/null +++ b/usr/src/libm/wos64/mapfiles/libmv2-sparc @@ -0,0 +1,683 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmv2-sparc 1.7 06/01/31 SMI +# +# Interface definition for libm.so.2 +# +# For information regarding the establishment of versioned definitions see: +# The Linker and Libraries Manual (version 2.5 or greater) +# This is part of the Developers Guide in the Answerbook. Specifically refer +# to Chapter 2 under section "Defining Additional Symbols" through section +# "Reducing Symbol Scope", and Chapter 5 "Versioning". +# +# For specific rules for the modification (evolution) of these version +# definitions see: +# psarc_1995_14: Integration of Scoped Libraries +# (/shared/sac/PSARC/1995/014) +# Policy for Shared Library Version Names and Interface Definitions +# (/shared/ON/general_docs/scoping-rules.ps) + +SUNW_1.3 { + global: + __isnanf; #LSARC/2003/658 + __isnanl; #LSARC/2003/658 + _isnan; #LSARC/2003/658 + _isnand; #LSARC/2003/658 + _isnanf; #LSARC/2003/658 + _logb; #LSARC/2003/658 + _modf; #LSARC/2003/658 + _modff; #LSARC/2003/658 + _nextafter; #LSARC/2003/658 + _scalb; #LSARC/2003/658 + isnand; #LSARC/2003/658 + isnanf; #LSARC/2003/658 + isnanl; #LSARC/2003/658 +} SUNW_1.2; + +SUNW_1.2 { + global: + __acoshf; + __acoshl; + __asinhf; + __asinhl; + __atanhf; + __atanhl; + __cabs; + __cabsf; + __cabsl; + __cacos; + __cacosf; + __cacosh; + __cacoshf; + __cacoshl; + __cacosl; + __carg; + __cargf; + __cargl; + __casin; + __casinf; + __casinh; + __casinhf; + __casinhl; + __casinl; + __catan; + __catanf; + __catanh; + __catanhf; + __catanhl; + __catanl; + __cbrtf; + __cbrtl; + __ccos; + __ccosf; + __ccosh; + __ccoshf; + __ccoshl; + __ccosl; + __cexp; + __cexpf; + __cexpl; + __cimag; + __cimagf; + __cimagl; + __clog; + __clogf; + __clogl; + __conj; + __conjf; + __conjl; + __copysignf; + __copysignl; + __cpow; + __cpowf; + __cpowl; + __cproj; + __cprojf; + __cprojl; + __creal; + __crealf; + __creall; + __csin; + __csinf; + __csinh; + __csinhf; + __csinhl; + __csinl; + __csqrt; + __csqrtf; + __csqrtl; + __ctan; + __ctanf; + __ctanh; + __ctanhf; + __ctanhl; + __ctanl; + __erfcf; + __erfcl; + __erff; + __erfl; + __exp2; + __exp2f; + __exp2l; + __expm1f; + __expm1l; + __fdim; + __fdimf; + __fdiml; + __feclearexcept; + __fegetenv; + __fegetexceptflag; + __fegetround; + __feholdexcept; + __fenv_dfl_env; #LSARC/1996/175 + __feraiseexcept; + __fesetenv; + __fesetexceptflag; + __fesetround; + __fetestexcept; + __feupdateenv; + __fex_get_handling; #LSARC/1996/175 + __fex_get_log; #LSARC/1996/175 + __fex_get_log_depth; #LSARC/1996/175 + __fex_getexcepthandler; #LSARC/1996/175 + __fex_log_entry; #LSARC/1996/175 + __fex_merge_flags; #LSARC/1996/175 + __fex_set_handling; #LSARC/1996/175 + __fex_set_log; #LSARC/1996/175 + __fex_set_log_depth; #LSARC/1996/175 + __fex_setexcepthandler; #LSARC/1996/175 + __fma; + __fmaf; + __fmal; + __fmax; + __fmaxf; + __fmaxl; + __fmin; + __fminf; + __fminl; + __frexp; + __gammaf; #LSARC/2003/279 + __gammaf_r; #LSARC/2003/279 + __gammal; #LSARC/2003/279 + __gammal_r; #LSARC/2003/279 + __hypotf; + __hypotl; + __ilogbf; + __ilogbl; + __j0f; #LSARC/2003/279 + __j0l; #LSARC/2003/279 + __j1f; #LSARC/2003/279 + __j1l; #LSARC/2003/279 + __jnf; #LSARC/2003/279 + __jnl; #LSARC/2003/279 + __ldexp; + __lgammaf; + __lgammaf_r; #LSARC/2003/279 + __lgammal; + __lgammal_r; #LSARC/2003/279 + __llrint; + __llrintf; + __llrintl; + __llround; + __llroundf; + __llroundl; + __log1pf; + __log1pl; + __log2; + __log2f; + __log2l; + __logbf; + __logbl; + __lrint; + __lrintf; + __lrintl; + __lround; + __lroundf; + __lroundl; + __modf; + __nan; + __nanf; + __nanl; + __nearbyint; + __nearbyintf; + __nearbyintl; + __nextafterf; + __nextafterl; + __nexttoward; + __nexttowardf; + __nexttowardl; + __remainderf; + __remainderl; + __remquo; + __remquof; + __remquol; + __rintf; + __rintl; + __round; + __roundf; + __roundl; + __scalbf; #LSARC/2003/279 + __scalbl; #LSARC/2003/279 + __scalbln; + __scalblnf; + __scalblnl; + __scalbnf; + __scalbnl; + __signgamf; #LSARC/2003/279 + __signgaml; #LSARC/2003/279 + __significandf; #LSARC/2003/279 + __significandl; #LSARC/2003/279 + __sincos; #LSARC/2003/279 + __sincosf; #LSARC/2003/279 + __sincosl; #LSARC/2003/279 + __tgamma; + __tgammaf; + __tgammal; + __trunc; + __truncf; + __truncl; + __xpg6; #private contract with libc group + __y0f; #LSARC/2003/279 + __y0l; #LSARC/2003/279 + __y1f; #LSARC/2003/279 + __y1l; #LSARC/2003/279 + __ynf; #LSARC/2003/279 + __ynl; #LSARC/2003/279 + acosf; + acoshf; + acoshl; + acosl; + asinf; + asinhf; + asinhl; + asinl; + atan2f; + atan2l; + atanf; + atanhf; + atanhl; + atanl; + cabs; + cabsf; + cabsl; + cacos; + cacosf; + cacosh; + cacoshf; + cacoshl; + cacosl; + carg; + cargf; + cargl; + casin; + casinf; + casinh; + casinhf; + casinhl; + casinl; + catan; + catanf; + catanh; + catanhf; + catanhl; + catanl; + cbrtf; + cbrtl; + ccos; + ccosf; + ccosh; + ccoshf; + ccoshl; + ccosl; + ceilf; + ceill; + cexp; + cexpf; + cexpl; + cimag; + cimagf; + cimagl; + clog; + clogf; + clogl; + conj; + conjf; + conjl; + copysignf; + copysignl; + cosf; + coshf; + coshl; + cosl; + cpow; + cpowf; + cpowl; + cproj; + cprojf; + cprojl; + creal; + crealf; + creall; + csin; + csinf; + csinh; + csinhf; + csinhl; + csinl; + csqrt; + csqrtf; + csqrtl; + ctan; + ctanf; + ctanh; + ctanhf; + ctanhl; + ctanl; + erfcf; + erfcl; + erff; + erfl; + exp2; + exp2f; + exp2l; + expf; + expl; + expm1f; + expm1l; + fabsf; + fabsl; + fdim; + fdimf; + fdiml; + feclearexcept; + fegetenv; + fegetexceptflag; + fegetround; + feholdexcept; + feraiseexcept; + fesetenv; + fesetexceptflag; + fesetround; + fetestexcept; + feupdateenv; + fex_get_handling; #LSARC/1996/175 + fex_get_log; #LSARC/1996/175 + fex_get_log_depth; #LSARC/1996/175 + fex_getexcepthandler; #LSARC/1996/175 + fex_log_entry; #LSARC/1996/175 + fex_merge_flags; #LSARC/1996/175 + fex_set_handling; #LSARC/1996/175 + fex_set_log; #LSARC/1996/175 + fex_set_log_depth; #LSARC/1996/175 + fex_setexcepthandler; #LSARC/1996/175 + floorf; + floorl; + fma; + fmaf; + fmal; + fmax; + fmaxf; + fmaxl; + fmin; + fminf; + fminl; + fmodf; + fmodl; + frexp; + frexpf; + frexpl; + gammaf; #LSARC/2003/279 + gammaf_r; #LSARC/2003/279 + gammal; #LSARC/2003/279 + gammal_r; #LSARC/2003/279 + hypotf; + hypotl; + ilogbf; + ilogbl; + j0f; #LSARC/2003/279 + j0l; #LSARC/2003/279 + j1f; #LSARC/2003/279 + j1l; #LSARC/2003/279 + jnf; #LSARC/2003/279 + jnl; #LSARC/2003/279 + ldexp; + ldexpf; + ldexpl; + lgammaf; + lgammaf_r; #LSARC/2003/279 + lgammal; + lgammal_r; #LSARC/2003/279 + llrint; + llrintf; + llrintl; + llround; + llroundf; + llroundl; + log10f; + log10l; + log1pf; + log1pl; + log2; + log2f; + log2l; + logbf; + logbl; + logf; + logl; + lrint; + lrintf; + lrintl; + lround; + lroundf; + lroundl; + modf; + modff; + modfl; + nan; + nanf; + nanl; + nearbyint; + nearbyintf; + nearbyintl; + nextafterf; + nextafterl; + nexttoward; + nexttowardf; + nexttowardl; + powf; + powl; + remainderf; + remainderl; + remquo; + remquof; + remquol; + rintf; + rintl; + round; + roundf; + roundl; + scalbf; #LSARC/2003/279 + scalbl; #LSARC/2003/279 + scalbln; + scalblnf; + scalblnl; + scalbnf; + scalbnl; + signgamf; #LSARC/2003/279 + signgaml; #LSARC/2003/279 + significandf; #LSARC/2003/279 + significandl; #LSARC/2003/279 + sincos; #LSARC/2003/279 + sincosf; #LSARC/2003/279 + sincosl; #LSARC/2003/279 + sinf; + sinhf; + sinhl; + sinl; + sqrtf; + sqrtl; + tanf; + tanhf; + tanhl; + tanl; + tgamma; + tgammaf; + tgammal; + trunc; + truncf; + truncl; + y0f; #LSARC/2003/279 + y0l; #LSARC/2003/279 + y1f; #LSARC/2003/279 + y1l; #LSARC/2003/279 + ynf; #LSARC/2003/279 + ynl; #LSARC/2003/279 +} SUNW_1.1.1; + +SUNW_1.1.1 { + global: + __acosf; + __acosl; + __asinf; + __asinl; + __atan2f; + __atan2l; + __atanf; + __atanl; + __ceilf; + __ceill; + __cosf; + __coshf; + __coshl; + __cosl; + __expf; + __expl; + __fabsf; + __fabsl; + __floorf; + __floorl; + __fmodf; + __fmodl; + __frexpf; + __frexpl; + __ldexpf; + __ldexpl; + __log10f; + __log10l; + __logf; + __logl; + __modff; + __modfl; + __powf; + __powl; + __sinf; + __sinhf; + __sinhl; + __sinl; + __sqrtf; + __sqrtl; + __tanf; + __tanhf; + __tanhl; + __tanl; +} SUNW_1.1; + +SUNW_1.1 { + global: + __acos; + __acosh; + __asin; + __asinh; + __atan; + __atan2; + __atanh; + __cbrt; + __ceil; + __copysign; + __cos; + __cosh; + __erf; + __erfc; + __exp; + __expm1; + __fabs; + __floor; + __fmod; + __gamma; + __gamma_r; + __hypot; + __ilogb; + __isnan; + __j0; + __j1; + __jn; + __lgamma; + __lgamma_r; + __log; + __log10; + __log1p; + __logb; + __nextafter; + __pow; + __remainder; + __rint; + __scalb; + __scalbn; + __signgam; + __significand; + __sin; + __sinh; + __sqrt; + __tan; + __tanh; + __y0; + __y1; + __yn; + acos; + acosh; + asin; + asinh; + atan; + atan2; + atanh; + cbrt; + ceil; + copysign; + cos; + cosh; + erf; + erfc; + exp; + expm1; + fabs; + floor; + fmod; + gamma; + gamma_r; + hypot; + ilogb; + isnan; + j0; + j1; + jn; + lgamma; + lgamma_r; + log; + log10; + log1p; + logb; + matherr; + nextafter; + pow; + remainder; + rint; + scalb; + scalbn; + signgam; + significand; + sin; + sinh; + sqrt; + tan; + tanh; + y0; + y1; + yn; +}; + +SUNWprivate_1.2 { + global: + __libm_mt_fex_sync; # -lmtsk + __mt_fex_sync; # -lmtsk +} SUNWprivate_1.1; + +SUNWprivate_1.1 { + global: + _lib_version; + __libm__rem_pio2; + __libm__rem_pio2m; + # anything else is local + local: + *; # symbols not mentioned in this file are scoped out +}; diff --git a/usr/src/libm/wos64/mapfiles/libmvec-common b/usr/src/libm/wos64/mapfiles/libmvec-common new file mode 100644 index 0000000..c961f52 --- /dev/null +++ b/usr/src/libm/wos64/mapfiles/libmvec-common @@ -0,0 +1,128 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-common 1.4 06/01/31 SMI +# +# Interface definition for sparcv9/libmvec.so.1 + +SUNW_1.1 { + global: + __vatan2; #LSARC/2003/737 + __vatan2_; #LSARC/2003/737 + __vatan2f; #LSARC/2003/737 + __vatan2f_; #LSARC/2003/737 + __vatan; #LSARC/2003/737 + __vatan_; #LSARC/2003/737 + __vatanf; #LSARC/2003/737 + __vatanf_; #LSARC/2003/737 + __vc_abs; #LSARC/2003/737 + __vc_abs_; #LSARC/2003/737 + __vc_exp; #LSARC/2003/737 + __vc_exp_; #LSARC/2003/737 + __vc_log; #LSARC/2003/737 + __vc_log_; #LSARC/2003/737 + __vc_pow; #LSARC/2003/737 + __vc_pow_; #LSARC/2003/737 + __vcos; #LSARC/2003/737 + __vcos_; #LSARC/2003/737 + __vcosf; #LSARC/2003/737 + __vcosf_; #LSARC/2003/737 + __vexp; #LSARC/2003/737 + __vexp_; #LSARC/2003/737 + __vexpf; #LSARC/2003/737 + __vexpf_; #LSARC/2003/737 + __vhypot; #LSARC/2003/737 + __vhypot_; #LSARC/2003/737 + __vhypotf; #LSARC/2003/737 + __vhypotf_; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vlog_; #LSARC/2003/737 + __vlogf; #LSARC/2003/737 + __vlogf_; #LSARC/2003/737 + __vpow; #LSARC/2003/737 + __vpow_; #LSARC/2003/737 + __vpowf; #LSARC/2003/737 + __vpowf_; #LSARC/2003/737 + __vrhypot; #LSARC/2003/737 + __vrhypot_; #LSARC/2003/737 + __vrhypotf; #LSARC/2003/737 + __vrhypotf_; #LSARC/2003/737 + __vrsqrt; #LSARC/2003/737 + __vrsqrt_; #LSARC/2003/737 + __vrsqrtf; #LSARC/2003/737 + __vrsqrtf_; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsin_; #LSARC/2003/737 + __vsincos; #LSARC/2003/737 + __vsincos_; #LSARC/2003/737 + __vsincosf; #LSARC/2003/737 + __vsincosf_; #LSARC/2003/737 + __vsinf; #LSARC/2003/737 + __vsinf_; #LSARC/2003/737 + __vsqrt; #LSARC/2003/737 + __vsqrt_; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + __vsqrtf_; #LSARC/2003/737 + __vz_abs; #LSARC/2003/737 + __vz_abs_; #LSARC/2003/737 + __vz_exp; #LSARC/2003/737 + __vz_exp_; #LSARC/2003/737 + __vz_log; #LSARC/2003/737 + __vz_log_; #LSARC/2003/737 + __vz_pow; #LSARC/2003/737 + __vz_pow_; #LSARC/2003/737 + vatan2_; #LSARC/2003/737 + vatan2f_; #LSARC/2003/737 + vatan_; #LSARC/2003/737 + vatanf_; #LSARC/2003/737 + vc_abs_; #LSARC/2003/737 + vc_exp_; #LSARC/2003/737 + vc_log_; #LSARC/2003/737 + vc_pow_; #LSARC/2003/737 + vcos_; #LSARC/2003/737 + vcosf_; #LSARC/2003/737 + vexp_; #LSARC/2003/737 + vexpf_; #LSARC/2003/737 + vhypot_; #LSARC/2003/737 + vhypotf_; #LSARC/2003/737 + vlog_; #LSARC/2003/737 + vlogf_; #LSARC/2003/737 + vpow_; #LSARC/2003/737 + vpowf_; #LSARC/2003/737 + vrhypot_; #LSARC/2003/737 + vrhypotf_; #LSARC/2003/737 + vrsqrt_; #LSARC/2003/737 + vrsqrtf_; #LSARC/2003/737 + vsin_; #LSARC/2003/737 + vsincos_; #LSARC/2003/737 + vsincosf_; #LSARC/2003/737 + vsinf_; #LSARC/2003/737 + vsqrt_; #LSARC/2003/737 + vsqrtf_; #LSARC/2003/737 + vz_abs_; #LSARC/2003/737 + vz_exp_; #LSARC/2003/737 + vz_log_; #LSARC/2003/737 + vz_pow_; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/libm/wos64/mapfiles/libmvec-v9a b/usr/src/libm/wos64/mapfiles/libmvec-v9a new file mode 100644 index 0000000..ede1339 --- /dev/null +++ b/usr/src/libm/wos64/mapfiles/libmvec-v9a @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-v9a 1.4 06/01/31 SMI +# +# Interface definition for cpu/sparcv9+vis/sparcv9/libmvec_isa.so.1 + +SUNW_1.1 { + global: + __vatan; #LSARC/2003/737 + __vatan2; #LSARC/2003/737 + __vatan2f; #LSARC/2003/737 + __vatanf; #LSARC/2003/737 + __vcos; #LSARC/2003/737 + __vcosf; #LSARC/2003/737 + __vexp; #LSARC/2003/737 + __vexpf; #LSARC/2003/737 + __vhypot; #LSARC/2003/737 + __vhypotf; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vlogf; #LSARC/2003/737 + __vpow; #LSARC/2003/737 + __vpowf; #LSARC/2003/737 + __vrhypot; #LSARC/2003/737 + __vrhypotf; #LSARC/2003/737 + __vrsqrt; #LSARC/2003/737 + __vrsqrtf; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsincos; #LSARC/2003/737 + __vsincosf; #LSARC/2003/737 + __vsinf; #LSARC/2003/737 + __vsqrt; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + local: + *; +}; diff --git a/usr/src/libm/wos64/mapfiles/libmvec-v9b b/usr/src/libm/wos64/mapfiles/libmvec-v9b new file mode 100644 index 0000000..d84c2bf --- /dev/null +++ b/usr/src/libm/wos64/mapfiles/libmvec-v9b @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# @(#)libmvec-v9b 1.4 06/01/31 SMI +# +# Interface definition for cpu/sparcv9+vis2/sparcv9/libmvec_isa.so.1 + +SUNW_1.1 { + global: + __vcos; #LSARC/2003/737 + __vlog; #LSARC/2003/737 + __vsin; #LSARC/2003/737 + __vsqrtf; #LSARC/2003/737 + local: + *; +}; -- cgit v1.2.3