summaryrefslogtreecommitdiff
path: root/usr/src/lib/libmvec/common/vis/__vexp.S
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libmvec/common/vis/__vexp.S')
-rw-r--r--usr/src/lib/libmvec/common/vis/__vexp.S1282
1 files changed, 1282 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/vis/__vexp.S b/usr/src/lib/libmvec/common/vis/__vexp.S
new file mode 100644
index 0000000000..fc11df08ee
--- /dev/null
+++ b/usr/src/lib/libmvec/common/vis/__vexp.S
@@ -0,0 +1,1282 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .file "__vexp.S"
+
+#include "libm.h"
+
+ RO_DATA
+
+/********************************************************************
+ * vexp() algorithm is from mopt:f_exp.c. Basics are included here
+ * to supplement comments within this file. vexp() has been unrolled
+ * to a depth of 3. Only element 0 is documented.
+ *
+ * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
+ * 2^44 to allow *2^k w/o shifting within the FP registers. These
+ * had to be removed for CHEETAH to avoid the fdtox of a very large
+ * number, which would trap to kernel (2^52).
+ *
+ * Let x = (k + j/256)ln2 + r
+ * then exp(x) = exp(ln2^(k+j/256)) * exp(r)
+ * = 2^k * 2^(j/256) * exp(r)
+ * where r is polynomial approximation
+ * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
+ * = 1 + r*(1+r*(B1+r*(B2+r*B3)))
+ * let
+ * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r)
+ * q = 2^(j/256) (high 64 bits)
+ * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[]
+ * then
+ * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
+ * then actual computation is 2^k * ( q + ( t + q*p ) )
+ *
+ ********************************************************************/
+
+ .align 16
+TBL:
+ .word 0x3ff00000,0x00000000
+ .word 0x00000000,0x00000000
+ .word 0x3ff00b1a,0xfa5abcbf
+ .word 0xbc84f6b2,0xa7609f71
+ .word 0x3ff0163d,0xa9fb3335
+ .word 0x3c9b6129,0x9ab8cdb7
+ .word 0x3ff02168,0x143b0281
+ .word 0xbc82bf31,0x0fc54eb6
+ .word 0x3ff02c9a,0x3e778061
+ .word 0xbc719083,0x535b085d
+ .word 0x3ff037d4,0x2e11bbcc
+ .word 0x3c656811,0xeeade11a
+ .word 0x3ff04315,0xe86e7f85
+ .word 0xbc90a31c,0x1977c96e
+ .word 0x3ff04e5f,0x72f654b1
+ .word 0x3c84c379,0x3aa0d08c
+ .word 0x3ff059b0,0xd3158574
+ .word 0x3c8d73e2,0xa475b465
+ .word 0x3ff0650a,0x0e3c1f89
+ .word 0xbc95cb7b,0x5799c396
+ .word 0x3ff0706b,0x29ddf6de
+ .word 0xbc8c91df,0xe2b13c26
+ .word 0x3ff07bd4,0x2b72a836
+ .word 0x3c832334,0x54458700
+ .word 0x3ff08745,0x18759bc8
+ .word 0x3c6186be,0x4bb284ff
+ .word 0x3ff092bd,0xf66607e0
+ .word 0xbc968063,0x800a3fd1
+ .word 0x3ff09e3e,0xcac6f383
+ .word 0x3c914878,0x18316136
+ .word 0x3ff0a9c7,0x9b1f3919
+ .word 0x3c85d16c,0x873d1d38
+ .word 0x3ff0b558,0x6cf9890f
+ .word 0x3c98a62e,0x4adc610a
+ .word 0x3ff0c0f1,0x45e46c85
+ .word 0x3c94f989,0x06d21cef
+ .word 0x3ff0cc92,0x2b7247f7
+ .word 0x3c901edc,0x16e24f71
+ .word 0x3ff0d83b,0x23395dec
+ .word 0xbc9bc14d,0xe43f316a
+ .word 0x3ff0e3ec,0x32d3d1a2
+ .word 0x3c403a17,0x27c57b53
+ .word 0x3ff0efa5,0x5fdfa9c5
+ .word 0xbc949db9,0xbc54021b
+ .word 0x3ff0fb66,0xaffed31b
+ .word 0xbc6b9bed,0xc44ebd7b
+ .word 0x3ff10730,0x28d7233e
+ .word 0x3c8d46eb,0x1692fdd5
+ .word 0x3ff11301,0xd0125b51
+ .word 0xbc96c510,0x39449b3a
+ .word 0x3ff11edb,0xab5e2ab6
+ .word 0xbc9ca454,0xf703fb72
+ .word 0x3ff12abd,0xc06c31cc
+ .word 0xbc51b514,0xb36ca5c7
+ .word 0x3ff136a8,0x14f204ab
+ .word 0xbc67108f,0xba48dcf0
+ .word 0x3ff1429a,0xaea92de0
+ .word 0xbc932fbf,0x9af1369e
+ .word 0x3ff14e95,0x934f312e
+ .word 0xbc8b91e8,0x39bf44ab
+ .word 0x3ff15a98,0xc8a58e51
+ .word 0x3c82406a,0xb9eeab0a
+ .word 0x3ff166a4,0x5471c3c2
+ .word 0x3c58f23b,0x82ea1a32
+ .word 0x3ff172b8,0x3c7d517b
+ .word 0xbc819041,0xb9d78a76
+ .word 0x3ff17ed4,0x8695bbc0
+ .word 0x3c709e3f,0xe2ac5a64
+ .word 0x3ff18af9,0x388c8dea
+ .word 0xbc911023,0xd1970f6c
+ .word 0x3ff19726,0x58375d2f
+ .word 0x3c94aadd,0x85f17e08
+ .word 0x3ff1a35b,0xeb6fcb75
+ .word 0x3c8e5b4c,0x7b4968e4
+ .word 0x3ff1af99,0xf8138a1c
+ .word 0x3c97bf85,0xa4b69280
+ .word 0x3ff1bbe0,0x84045cd4
+ .word 0xbc995386,0x352ef607
+ .word 0x3ff1c82f,0x95281c6b
+ .word 0x3c900977,0x8010f8c9
+ .word 0x3ff1d487,0x3168b9aa
+ .word 0x3c9e016e,0x00a2643c
+ .word 0x3ff1e0e7,0x5eb44027
+ .word 0xbc96fdd8,0x088cb6de
+ .word 0x3ff1ed50,0x22fcd91d
+ .word 0xbc91df98,0x027bb78c
+ .word 0x3ff1f9c1,0x8438ce4d
+ .word 0xbc9bf524,0xa097af5c
+ .word 0x3ff2063b,0x88628cd6
+ .word 0x3c8dc775,0x814a8494
+ .word 0x3ff212be,0x3578a819
+ .word 0x3c93592d,0x2cfcaac9
+ .word 0x3ff21f49,0x917ddc96
+ .word 0x3c82a97e,0x9494a5ee
+ .word 0x3ff22bdd,0xa27912d1
+ .word 0x3c8d34fb,0x5577d69e
+ .word 0x3ff2387a,0x6e756238
+ .word 0x3c99b07e,0xb6c70573
+ .word 0x3ff2451f,0xfb82140a
+ .word 0x3c8acfcc,0x911ca996
+ .word 0x3ff251ce,0x4fb2a63f
+ .word 0x3c8ac155,0xbef4f4a4
+ .word 0x3ff25e85,0x711ece75
+ .word 0x3c93e1a2,0x4ac31b2c
+ .word 0x3ff26b45,0x65e27cdd
+ .word 0x3c82bd33,0x9940e9d9
+ .word 0x3ff2780e,0x341ddf29
+ .word 0x3c9e067c,0x05f9e76c
+ .word 0x3ff284df,0xe1f56381
+ .word 0xbc9a4c3a,0x8c3f0d7e
+ .word 0x3ff291ba,0x7591bb70
+ .word 0xbc82cc72,0x28401cbc
+ .word 0x3ff29e9d,0xf51fdee1
+ .word 0x3c8612e8,0xafad1255
+ .word 0x3ff2ab8a,0x66d10f13
+ .word 0xbc995743,0x191690a7
+ .word 0x3ff2b87f,0xd0dad990
+ .word 0xbc410adc,0xd6381aa4
+ .word 0x3ff2c57e,0x39771b2f
+ .word 0xbc950145,0xa6eb5124
+ .word 0x3ff2d285,0xa6e4030b
+ .word 0x3c900247,0x54db41d5
+ .word 0x3ff2df96,0x1f641589
+ .word 0x3c9d16cf,0xfbbce198
+ .word 0x3ff2ecaf,0xa93e2f56
+ .word 0x3c71ca0f,0x45d52383
+ .word 0x3ff2f9d2,0x4abd886b
+ .word 0xbc653c55,0x532bda93
+ .word 0x3ff306fe,0x0a31b715
+ .word 0x3c86f46a,0xd23182e4
+ .word 0x3ff31432,0xedeeb2fd
+ .word 0x3c8959a3,0xf3f3fcd0
+ .word 0x3ff32170,0xfc4cd831
+ .word 0x3c8a9ce7,0x8e18047c
+ .word 0x3ff32eb8,0x3ba8ea32
+ .word 0xbc9c45e8,0x3cb4f318
+ .word 0x3ff33c08,0xb26416ff
+ .word 0x3c932721,0x843659a6
+ .word 0x3ff34962,0x66e3fa2d
+ .word 0xbc835a75,0x930881a4
+ .word 0x3ff356c5,0x5f929ff1
+ .word 0xbc8b5cee,0x5c4e4628
+ .word 0x3ff36431,0xa2de883b
+ .word 0xbc8c3144,0xa06cb85e
+ .word 0x3ff371a7,0x373aa9cb
+ .word 0xbc963aea,0xbf42eae2
+ .word 0x3ff37f26,0x231e754a
+ .word 0xbc99f5ca,0x9eceb23c
+ .word 0x3ff38cae,0x6d05d866
+ .word 0xbc9e958d,0x3c9904bd
+ .word 0x3ff39a40,0x1b7140ef
+ .word 0xbc99a9a5,0xfc8e2934
+ .word 0x3ff3a7db,0x34e59ff7
+ .word 0xbc75e436,0xd661f5e3
+ .word 0x3ff3b57f,0xbfec6cf4
+ .word 0x3c954c66,0xe26fff18
+ .word 0x3ff3c32d,0xc313a8e5
+ .word 0xbc9efff8,0x375d29c3
+ .word 0x3ff3d0e5,0x44ede173
+ .word 0x3c7fe8d0,0x8c284c71
+ .word 0x3ff3dea6,0x4c123422
+ .word 0x3c8ada09,0x11f09ebc
+ .word 0x3ff3ec70,0xdf1c5175
+ .word 0xbc8af663,0x7b8c9bca
+ .word 0x3ff3fa45,0x04ac801c
+ .word 0xbc97d023,0xf956f9f3
+ .word 0x3ff40822,0xc367a024
+ .word 0x3c8bddf8,0xb6f4d048
+ .word 0x3ff4160a,0x21f72e2a
+ .word 0xbc5ef369,0x1c309278
+ .word 0x3ff423fb,0x2709468a
+ .word 0xbc98462d,0xc0b314dd
+ .word 0x3ff431f5,0xd950a897
+ .word 0xbc81c7dd,0xe35f7998
+ .word 0x3ff43ffa,0x3f84b9d4
+ .word 0x3c8880be,0x9704c002
+ .word 0x3ff44e08,0x6061892d
+ .word 0x3c489b7a,0x04ef80d0
+ .word 0x3ff45c20,0x42a7d232
+ .word 0xbc686419,0x82fb1f8e
+ .word 0x3ff46a41,0xed1d0057
+ .word 0x3c9c944b,0xd1648a76
+ .word 0x3ff4786d,0x668b3237
+ .word 0xbc9c20f0,0xed445733
+ .word 0x3ff486a2,0xb5c13cd0
+ .word 0x3c73c1a3,0xb69062f0
+ .word 0x3ff494e1,0xe192aed2
+ .word 0xbc83b289,0x5e499ea0
+ .word 0x3ff4a32a,0xf0d7d3de
+ .word 0x3c99cb62,0xf3d1be56
+ .word 0x3ff4b17d,0xea6db7d7
+ .word 0xbc8125b8,0x7f2897f0
+ .word 0x3ff4bfda,0xd5362a27
+ .word 0x3c7d4397,0xafec42e2
+ .word 0x3ff4ce41,0xb817c114
+ .word 0x3c905e29,0x690abd5d
+ .word 0x3ff4dcb2,0x99fddd0d
+ .word 0x3c98ecdb,0xbc6a7833
+ .word 0x3ff4eb2d,0x81d8abff
+ .word 0xbc95257d,0x2e5d7a52
+ .word 0x3ff4f9b2,0x769d2ca7
+ .word 0xbc94b309,0xd25957e3
+ .word 0x3ff50841,0x7f4531ee
+ .word 0x3c7a249b,0x49b7465f
+ .word 0x3ff516da,0xa2cf6642
+ .word 0xbc8f7685,0x69bd93ee
+ .word 0x3ff5257d,0xe83f4eef
+ .word 0xbc7c998d,0x43efef71
+ .word 0x3ff5342b,0x569d4f82
+ .word 0xbc807abe,0x1db13cac
+ .word 0x3ff542e2,0xf4f6ad27
+ .word 0x3c87926d,0x192d5f7e
+ .word 0x3ff551a4,0xca5d920f
+ .word 0xbc8d689c,0xefede59a
+ .word 0x3ff56070,0xdde910d2
+ .word 0xbc90fb6e,0x168eebf0
+ .word 0x3ff56f47,0x36b527da
+ .word 0x3c99bb2c,0x011d93ad
+ .word 0x3ff57e27,0xdbe2c4cf
+ .word 0xbc90b98c,0x8a57b9c4
+ .word 0x3ff58d12,0xd497c7fd
+ .word 0x3c8295e1,0x5b9a1de8
+ .word 0x3ff59c08,0x27ff07cc
+ .word 0xbc97e2ce,0xe467e60f
+ .word 0x3ff5ab07,0xdd485429
+ .word 0x3c96324c,0x054647ad
+ .word 0x3ff5ba11,0xfba87a03
+ .word 0xbc9b77a1,0x4c233e1a
+ .word 0x3ff5c926,0x8a5946b7
+ .word 0x3c3c4b1b,0x816986a2
+ .word 0x3ff5d845,0x90998b93
+ .word 0xbc9cd6a7,0xa8b45642
+ .word 0x3ff5e76f,0x15ad2148
+ .word 0x3c9ba6f9,0x3080e65e
+ .word 0x3ff5f6a3,0x20dceb71
+ .word 0xbc89eadd,0xe3cdcf92
+ .word 0x3ff605e1,0xb976dc09
+ .word 0xbc93e242,0x9b56de47
+ .word 0x3ff6152a,0xe6cdf6f4
+ .word 0x3c9e4b3e,0x4ab84c27
+ .word 0x3ff6247e,0xb03a5585
+ .word 0xbc9383c1,0x7e40b497
+ .word 0x3ff633dd,0x1d1929fd
+ .word 0x3c984710,0xbeb964e5
+ .word 0x3ff64346,0x34ccc320
+ .word 0xbc8c483c,0x759d8932
+ .word 0x3ff652b9,0xfebc8fb7
+ .word 0xbc9ae3d5,0xc9a73e08
+ .word 0x3ff66238,0x82552225
+ .word 0xbc9bb609,0x87591c34
+ .word 0x3ff671c1,0xc70833f6
+ .word 0xbc8e8732,0x586c6134
+ .word 0x3ff68155,0xd44ca973
+ .word 0x3c6038ae,0x44f73e65
+ .word 0x3ff690f4,0xb19e9538
+ .word 0x3c8804bd,0x9aeb445c
+ .word 0x3ff6a09e,0x667f3bcd
+ .word 0xbc9bdd34,0x13b26456
+ .word 0x3ff6b052,0xfa75173e
+ .word 0x3c7a38f5,0x2c9a9d0e
+ .word 0x3ff6c012,0x750bdabf
+ .word 0xbc728956,0x67ff0b0d
+ .word 0x3ff6cfdc,0xddd47645
+ .word 0x3c9c7aa9,0xb6f17309
+ .word 0x3ff6dfb2,0x3c651a2f
+ .word 0xbc6bbe3a,0x683c88ab
+ .word 0x3ff6ef92,0x98593ae5
+ .word 0xbc90b974,0x9e1ac8b2
+ .word 0x3ff6ff7d,0xf9519484
+ .word 0xbc883c0f,0x25860ef6
+ .word 0x3ff70f74,0x66f42e87
+ .word 0x3c59d644,0xd45aa65f
+ .word 0x3ff71f75,0xe8ec5f74
+ .word 0xbc816e47,0x86887a99
+ .word 0x3ff72f82,0x86ead08a
+ .word 0xbc920aa0,0x2cd62c72
+ .word 0x3ff73f9a,0x48a58174
+ .word 0xbc90a8d9,0x6c65d53c
+ .word 0x3ff74fbd,0x35d7cbfd
+ .word 0x3c9047fd,0x618a6e1c
+ .word 0x3ff75feb,0x564267c9
+ .word 0xbc902459,0x57316dd3
+ .word 0x3ff77024,0xb1ab6e09
+ .word 0x3c9b7877,0x169147f8
+ .word 0x3ff78069,0x4fde5d3f
+ .word 0x3c9866b8,0x0a02162c
+ .word 0x3ff790b9,0x38ac1cf6
+ .word 0x3c9349a8,0x62aadd3e
+ .word 0x3ff7a114,0x73eb0187
+ .word 0xbc841577,0xee04992f
+ .word 0x3ff7b17b,0x0976cfdb
+ .word 0xbc9bebb5,0x8468dc88
+ .word 0x3ff7c1ed,0x0130c132
+ .word 0x3c9f124c,0xd1164dd6
+ .word 0x3ff7d26a,0x62ff86f0
+ .word 0x3c91bddb,0xfb72b8b4
+ .word 0x3ff7e2f3,0x36cf4e62
+ .word 0x3c705d02,0xba15797e
+ .word 0x3ff7f387,0x8491c491
+ .word 0xbc807f11,0xcf9311ae
+ .word 0x3ff80427,0x543e1a12
+ .word 0xbc927c86,0x626d972b
+ .word 0x3ff814d2,0xadd106d9
+ .word 0x3c946437,0x0d151d4d
+ .word 0x3ff82589,0x994cce13
+ .word 0xbc9d4c1d,0xd41532d8
+ .word 0x3ff8364c,0x1eb941f7
+ .word 0x3c999b9a,0x31df2bd5
+ .word 0x3ff8471a,0x4623c7ad
+ .word 0xbc88d684,0xa341cdfb
+ .word 0x3ff857f4,0x179f5b21
+ .word 0xbc5ba748,0xf8b216d0
+ .word 0x3ff868d9,0x9b4492ec
+ .word 0x3ca01c83,0xb21584a3
+ .word 0x3ff879ca,0xd931a436
+ .word 0x3c85d2d7,0xd2db47bc
+ .word 0x3ff88ac7,0xd98a6699
+ .word 0x3c9994c2,0xf37cb53a
+ .word 0x3ff89bd0,0xa478580f
+ .word 0x3c9d5395,0x4475202a
+ .word 0x3ff8ace5,0x422aa0db
+ .word 0x3c96e9f1,0x56864b27
+ .word 0x3ff8be05,0xbad61778
+ .word 0x3c9ecb5e,0xfc43446e
+ .word 0x3ff8cf32,0x16b5448c
+ .word 0xbc70d55e,0x32e9e3aa
+ .word 0x3ff8e06a,0x5e0866d9
+ .word 0xbc97114a,0x6fc9b2e6
+ .word 0x3ff8f1ae,0x99157736
+ .word 0x3c85cc13,0xa2e3976c
+ .word 0x3ff902fe,0xd0282c8a
+ .word 0x3c9592ca,0x85fe3fd2
+ .word 0x3ff9145b,0x0b91ffc6
+ .word 0xbc9dd679,0x2e582524
+ .word 0x3ff925c3,0x53aa2fe2
+ .word 0xbc83455f,0xa639db7f
+ .word 0x3ff93737,0xb0cdc5e5
+ .word 0xbc675fc7,0x81b57ebc
+ .word 0x3ff948b8,0x2b5f98e5
+ .word 0xbc8dc3d6,0x797d2d99
+ .word 0x3ff95a44,0xcbc8520f
+ .word 0xbc764b7c,0x96a5f039
+ .word 0x3ff96bdd,0x9a7670b3
+ .word 0xbc5ba596,0x7f19c896
+ .word 0x3ff97d82,0x9fde4e50
+ .word 0xbc9d185b,0x7c1b85d0
+ .word 0x3ff98f33,0xe47a22a2
+ .word 0x3c7cabda,0xa24c78ed
+ .word 0x3ff9a0f1,0x70ca07ba
+ .word 0xbc9173bd,0x91cee632
+ .word 0x3ff9b2bb,0x4d53fe0d
+ .word 0xbc9dd84e,0x4df6d518
+ .word 0x3ff9c491,0x82a3f090
+ .word 0x3c7c7c46,0xb071f2be
+ .word 0x3ff9d674,0x194bb8d5
+ .word 0xbc9516be,0xa3dd8233
+ .word 0x3ff9e863,0x19e32323
+ .word 0x3c7824ca,0x78e64c6e
+ .word 0x3ff9fa5e,0x8d07f29e
+ .word 0xbc84a9ce,0xaaf1face
+ .word 0x3ffa0c66,0x7b5de565
+ .word 0xbc935949,0x5d1cd533
+ .word 0x3ffa1e7a,0xed8eb8bb
+ .word 0x3c9c6618,0xee8be70e
+ .word 0x3ffa309b,0xec4a2d33
+ .word 0x3c96305c,0x7ddc36ab
+ .word 0x3ffa42c9,0x80460ad8
+ .word 0xbc9aa780,0x589fb120
+ .word 0x3ffa5503,0xb23e255d
+ .word 0xbc9d2f6e,0xdb8d41e1
+ .word 0x3ffa674a,0x8af46052
+ .word 0x3c650f56,0x30670366
+ .word 0x3ffa799e,0x1330b358
+ .word 0x3c9bcb7e,0xcac563c6
+ .word 0x3ffa8bfe,0x53c12e59
+ .word 0xbc94f867,0xb2ba15a8
+ .word 0x3ffa9e6b,0x5579fdbf
+ .word 0x3c90fac9,0x0ef7fd31
+ .word 0x3ffab0e5,0x21356eba
+ .word 0x3c889c31,0xdae94544
+ .word 0x3ffac36b,0xbfd3f37a
+ .word 0xbc8f9234,0xcae76cd0
+ .word 0x3ffad5ff,0x3a3c2774
+ .word 0x3c97ef3b,0xb6b1b8e4
+ .word 0x3ffae89f,0x995ad3ad
+ .word 0x3c97a1cd,0x345dcc81
+ .word 0x3ffafb4c,0xe622f2ff
+ .word 0xbc94b2fc,0x0f315ecc
+ .word 0x3ffb0e07,0x298db666
+ .word 0xbc9bdef5,0x4c80e425
+ .word 0x3ffb20ce,0x6c9a8952
+ .word 0x3c94dd02,0x4a0756cc
+ .word 0x3ffb33a2,0xb84f15fb
+ .word 0xbc62805e,0x3084d708
+ .word 0x3ffb4684,0x15b749b1
+ .word 0xbc7f763d,0xe9df7c90
+ .word 0x3ffb5972,0x8de5593a
+ .word 0xbc9c71df,0xbbba6de3
+ .word 0x3ffb6c6e,0x29f1c52a
+ .word 0x3c92a8f3,0x52883f6e
+ .word 0x3ffb7f76,0xf2fb5e47
+ .word 0xbc75584f,0x7e54ac3b
+ .word 0x3ffb928c,0xf22749e4
+ .word 0xbc9b7216,0x54cb65c6
+ .word 0x3ffba5b0,0x30a1064a
+ .word 0xbc9efcd3,0x0e54292e
+ .word 0x3ffbb8e0,0xb79a6f1f
+ .word 0xbc3f52d1,0xc9696205
+ .word 0x3ffbcc1e,0x904bc1d2
+ .word 0x3c823dd0,0x7a2d9e84
+ .word 0x3ffbdf69,0xc3f3a207
+ .word 0xbc3c2623,0x60ea5b52
+ .word 0x3ffbf2c2,0x5bd71e09
+ .word 0xbc9efdca,0x3f6b9c73
+ .word 0x3ffc0628,0x6141b33d
+ .word 0xbc8d8a5a,0xa1fbca34
+ .word 0x3ffc199b,0xdd85529c
+ .word 0x3c811065,0x895048dd
+ .word 0x3ffc2d1c,0xd9fa652c
+ .word 0xbc96e516,0x17c8a5d7
+ .word 0x3ffc40ab,0x5fffd07a
+ .word 0x3c9b4537,0xe083c60a
+ .word 0x3ffc5447,0x78fafb22
+ .word 0x3c912f07,0x2493b5af
+ .word 0x3ffc67f1,0x2e57d14b
+ .word 0x3c92884d,0xff483cad
+ .word 0x3ffc7ba8,0x8988c933
+ .word 0xbc8e76bb,0xbe255559
+ .word 0x3ffc8f6d,0x9406e7b5
+ .word 0x3c71acbc,0x48805c44
+ .word 0x3ffca340,0x5751c4db
+ .word 0xbc87f2be,0xd10d08f4
+ .word 0x3ffcb720,0xdcef9069
+ .word 0x3c7503cb,0xd1e949db
+ .word 0x3ffccb0f,0x2e6d1675
+ .word 0xbc7d220f,0x86009093
+ .word 0x3ffcdf0b,0x555dc3fa
+ .word 0xbc8dd83b,0x53829d72
+ .word 0x3ffcf315,0x5b5bab74
+ .word 0xbc9a08e9,0xb86dff57
+ .word 0x3ffd072d,0x4a07897c
+ .word 0xbc9cbc37,0x43797a9c
+ .word 0x3ffd1b53,0x2b08c968
+ .word 0x3c955636,0x219a36ee
+ .word 0x3ffd2f87,0x080d89f2
+ .word 0xbc9d487b,0x719d8578
+ .word 0x3ffd43c8,0xeacaa1d6
+ .word 0x3c93db53,0xbf5a1614
+ .word 0x3ffd5818,0xdcfba487
+ .word 0x3c82ed02,0xd75b3706
+ .word 0x3ffd6c76,0xe862e6d3
+ .word 0x3c5fe87a,0x4a8165a0
+ .word 0x3ffd80e3,0x16c98398
+ .word 0xbc911ec1,0x8beddfe8
+ .word 0x3ffd955d,0x71ff6075
+ .word 0x3c9a052d,0xbb9af6be
+ .word 0x3ffda9e6,0x03db3285
+ .word 0x3c9c2300,0x696db532
+ .word 0x3ffdbe7c,0xd63a8315
+ .word 0xbc9b76f1,0x926b8be4
+ .word 0x3ffdd321,0xf301b460
+ .word 0x3c92da57,0x78f018c2
+ .word 0x3ffde7d5,0x641c0658
+ .word 0xbc9ca552,0x8e79ba8f
+ .word 0x3ffdfc97,0x337b9b5f
+ .word 0xbc91a5cd,0x4f184b5c
+ .word 0x3ffe1167,0x6b197d17
+ .word 0xbc72b529,0xbd5c7f44
+ .word 0x3ffe2646,0x14f5a129
+ .word 0xbc97b627,0x817a1496
+ .word 0x3ffe3b33,0x3b16ee12
+ .word 0xbc99f4a4,0x31fdc68a
+ .word 0x3ffe502e,0xe78b3ff6
+ .word 0x3c839e89,0x80a9cc8f
+ .word 0x3ffe6539,0x24676d76
+ .word 0xbc863ff8,0x7522b734
+ .word 0x3ffe7a51,0xfbc74c83
+ .word 0x3c92d522,0xca0c8de2
+ .word 0x3ffe8f79,0x77cdb740
+ .word 0xbc910894,0x80b054b1
+ .word 0x3ffea4af,0xa2a490da
+ .word 0xbc9e9c23,0x179c2893
+ .word 0x3ffeb9f4,0x867cca6e
+ .word 0x3c94832f,0x2293e4f2
+ .word 0x3ffecf48,0x2d8e67f1
+ .word 0xbc9c93f3,0xb411ad8c
+ .word 0x3ffee4aa,0xa2188510
+ .word 0x3c91c68d,0xa487568d
+ .word 0x3ffefa1b,0xee615a27
+ .word 0x3c9dc7f4,0x86a4b6b0
+ .word 0x3fff0f9c,0x1cb6412a
+ .word 0xbc932200,0x65181d45
+ .word 0x3fff252b,0x376bba97
+ .word 0x3c93a1a5,0xbf0d8e43
+ .word 0x3fff3ac9,0x48dd7274
+ .word 0xbc795a5a,0x3ed837de
+ .word 0x3fff5076,0x5b6e4540
+ .word 0x3c99d3e1,0x2dd8a18b
+ .word 0x3fff6632,0x798844f8
+ .word 0x3c9fa37b,0x3539343e
+ .word 0x3fff7bfd,0xad9cbe14
+ .word 0xbc9dbb12,0xd006350a
+ .word 0x3fff91d8,0x02243c89
+ .word 0xbc612ea8,0xa779f689
+ .word 0x3fffa7c1,0x819e90d8
+ .word 0x3c874853,0xf3a5931e
+ .word 0x3fffbdba,0x3692d514
+ .word 0xbc796773,0x15098eb6
+ .word 0x3fffd3c2,0x2b8f71f1
+ .word 0x3c62eb74,0x966579e7
+ .word 0x3fffe9d9,0x6b2a23d9
+ .word 0x3c74a603,0x7442fde3
+
+ .align 16
+constants:
+ .word 0x3ef00000,0x00000000
+ .word 0x40862e42,0xfefa39ef
+ .word 0x01000000,0x00000000
+ .word 0x7f000000,0x00000000
+ .word 0x80000000,0x00000000
+ .word 0x43f00000,0x00000000 ! scaling 2^12 two96
+ .word 0xfff00000,0x00000000
+ .word 0x3ff00000,0x00000000
+ .word 0x3fdfffff,0xfffffff6
+ .word 0x3fc55555,0x721a1d14
+ .word 0x3fa55555,0x6e0896af
+ .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256
+ .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
+ .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l
+
+ ! base set w/o scaling
+ ! .word 0x43300000,0x00000000 ! scaling two96
+ ! .word 0x40771547,0x652b82fe ! scaling invln2_256
+ ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h
+ ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l
+
+#define ox3ef 0x0
+#define thresh 0x8
+#define tiny 0x10
+#define huge 0x18
+#define signbit 0x20
+#define two96 0x28
+#define neginf 0x30
+#define one 0x38
+#define B1OFF 0x40
+#define B2OFF 0x48
+#define B3OFF 0x50
+#define invln2_256 0x58
+#define ln2_256h 0x60
+#define ln2_256l 0x68
+
+! local storage indices
+
+#define m2 STACK_BIAS-0x4
+#define m1 STACK_BIAS-0x8
+#define m0 STACK_BIAS-0xc
+#define jnk STACK_BIAS-0x20
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! g1 TBL
+
+! l0 m0
+! l1 m1
+! l2 m2
+! l3 j0,oy0
+! l4 j1,oy1
+! l5 j2,oy2
+! l6 0x3e300000
+! l7 0x40862e41
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 scratch
+! o4 scratch
+! o5 0x40874910
+! o7 0x7ff00000
+
+! f0 x0
+! f2
+! f4
+! f6
+! f8
+! f10 x1
+! f12
+! f14
+! f16
+! f18
+! f20 x2
+! f22
+! f24
+! f26
+! f28
+! f30
+! f32
+! f34
+! f36 0x3ef0...
+! f38 thresh
+! f40 tiny
+! f42 huge
+! f44 signbit
+! f46 two96
+! f48 neginf
+! f50 one
+! f52 B1
+! f54 B2
+! f56 B3
+! f58 invln2_256
+! f60 ln2_256h
+! f62 ln2_256l
+#define BOUNDRY %f36
+#define THRESH %f38
+#define TINY %f40
+#define HUGE %f42
+#define SIGNBIT %f44
+#define TWO96 %f46
+#define NEGINF %f48
+#define ONE %f50
+#define B1 %f52
+#define B2 %f54
+#define B3 %f56
+#define INVLN2_256 %f58
+#define LN2_256H %f60
+#define LN2_256L %f62
+
+ ENTRY(__vexp)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o3)
+ PIC_SET(l7,TBL,o0)
+ mov %o0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+
+ sethi %hi(0x80000000),%i5
+ sethi %hi(0x3e300000),%l6
+ sethi %hi(0x40862e41),%l7
+ or %l7,%lo(0x40862e41),%l7
+ sethi %hi(0x40874910),%o5
+ or %o5,%lo(0x40874910),%o5
+ sethi %hi(0x7ff00000),%o7
+ ldd [%o3+ox3ef],BOUNDRY
+ ldd [%o3+thresh],THRESH
+ ldd [%o3+tiny],TINY
+ ldd [%o3+huge],HUGE
+ ldd [%o3+signbit],SIGNBIT
+ ldd [%o3+two96],TWO96
+ ldd [%o3+neginf],NEGINF
+ ldd [%o3+one],ONE
+ ldd [%o3+B1OFF],B1
+ ldd [%o3+B2OFF],B2
+ ldd [%o3+B3OFF],B3
+ ldd [%o3+invln2_256],INVLN2_256
+ ldd [%o3+ln2_256h],LN2_256H
+ ldd [%o3+ln2_256l],LN2_256L
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,jnk,%l3 ! precondition loop
+ add %fp,jnk,%l4
+ add %fp,jnk,%l5
+ ld [%i1],%l0 ! hx = *x
+ ld [%i1],%f0
+ ld [%i1+4],%f1
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+ ba .loop0
+ add %i1,%i2,%i1 ! x += stridex
+
+ .align 16
+! -- 16 byte aligned
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,%l6,%o3
+ sub %l7,%l0,%o4
+ fand %f0,SIGNBIT,%f2 ! get sign bit
+
+ lda [%i1]%asi,%f10
+ orcc %o3,%o4,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ for %f2,TWO96,%f2 ! used to strip least sig bits
+ fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,%l6,%o3
+ sub %l7,%l1,%o4
+ fand %f10,SIGNBIT,%f12
+
+ lda [%i1]%asi,%f20
+ orcc %o3,%o4,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41
+
+! delay slot
+ lda [%i1+4]%asi,%f21
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ for %f12,TWO96,%f12
+ fmuld %f10,INVLN2_256,%f14
+
+.loop2:
+ sub %l2,%l6,%o3
+ sub %l7,%l2,%o4
+ fand %f20,SIGNBIT,%f22
+ fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment
+
+ orcc %o3,%o4,%g0
+ bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41
+! delay slot
+ for %f22,TWO96,%f22
+ faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits
+
+.cont:
+ faddd %f14,%f12,%f14
+ mov %i3,%o2 ! py2 = y
+
+ faddd %f24,%f22,%f24
+ add %i3,%i4,%i3 ! y += stridey
+
+ ! BUBBLE USIII
+
+ fsubd %f4,%f2,%f8 ! creating k+j/256: sll
+ st %f6,[%l3] ! store previous loop x0
+
+ fsubd %f14,%f12,%f18
+ st %f7,[%l3+4] ! store previous loop x0
+
+ fsubd %f24,%f22,%f28
+ st %f16,[%l4]
+
+ ! BUBBLE USIII
+
+ fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x
+ st %f17,[%l4+4]
+
+ fmuld %f18,LN2_256H,%f12
+ st %f26,[%l5]
+
+ fmuld %f28,LN2_256H,%f22
+ st %f27,[%l5+4]
+
+ ! BUBBLE USIII
+
+ fsubd %f0,%f2,%f0 ! r = x - p*LN2_256H
+ fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec
+
+ fsubd %f10,%f12,%f10
+ fmuld %f18,LN2_256L,%f14
+
+ fsubd %f20,%f22,%f20
+ fmuld %f28,LN2_256L,%f24
+
+ ! BUBBLE USIII
+
+ fsubd %f0,%f4,%f0 ! r -= p*LN2_256L
+
+ fsubd %f10,%f14,%f10
+
+ fsubd %f20,%f24,%f20
+
+!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here
+
+ ! Alternate polynomial grouping allowing non-sequential calc of p
+ ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
+ ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
+ !
+ ! let SLi Ri SRi be accumulators
+
+ fmuld %f0,B3,%f2 ! SR1 = r1 * B3
+ fdtoi %f8,%f8 ! convert k+j/256 to int
+ st %f8,[%fp+m0] ! store k, to shift return/use
+
+ fmuld %f10,B3,%f12 ! SR2 = r2 * B3
+ fdtoi %f18,%f18 ! convert k+j/256 to int
+ st %f18,[%fp+m1] ! store k, to shift return/use
+
+ fmuld %f20,B3,%f22 ! SR3 = r3 * B3
+ fdtoi %f28,%f28 ! convert k+j/256 to int
+ st %f28,[%fp+m2] ! store k, to shift return/use
+
+ fmuld %f0,%f0,%f4 ! R1 = r1 * r1
+
+ fmuld %f10,%f10,%f14 ! R2 = r2 * r2
+ faddd %f2,B2,%f2 ! SR1 += B2
+
+ fmuld %f20,%f20,%f24 ! R3 = r3 * r3
+ faddd %f12,B2,%f12 ! SR2 += B2
+
+ faddd %f22,B2,%f22 ! SR3 += B2
+ fmuld %f0,B1,%f6 ! SL1 = r1 * B1
+
+ fmuld %f10,B1,%f32 ! SL2 = r2 * B1
+ fand %f8,NEGINF,%f8
+ ! best here for RAW BYPASS
+ ld [%fp+m0],%l0 ! get nonshifted k into intreg
+
+ fmuld %f20,B1,%f34 ! SL3 = r3 * B1
+ fand %f18,NEGINF,%f18
+ ld [%fp+m1],%l1 ! get nonshifted k into intreg
+
+ fmuld %f4,%f2,%f4 ! R1 = R1 * SR1
+ fand %f28,NEGINF,%f28
+ ld [%fp+m2],%l2 ! get nonshifted k into intreg
+
+ fmuld %f14,%f12,%f14 ! R2 = R2 * SR2
+ faddd %f6,ONE,%f6 ! SL1 += 1
+
+ fmuld %f24,%f22,%f24 ! R3 = R3 * SR3
+ faddd %f32,ONE,%f32 ! SL2 += 1
+ sra %l0,8,%l3 ! shift k tobe offset 256-8byte
+
+ faddd %f34,ONE,%f34 ! SL3 += 1
+ sra %l1,8,%l4 ! shift k tobe offset 256-8byte
+ sra %l2,8,%l5 ! shift k tobe offset 256-8byte
+
+ ! BUBBLE in USIII
+ and %l3,0xff0,%l3
+ and %l4,0xff0,%l4
+
+
+
+ faddd %f6,%f4,%f6 ! R1 = SL1 + R1
+ ldd [%g1+%l3],%f4 ! tbl[j]
+ add %l3,8,%l3 ! inc j
+ and %l5,0xff0,%l5
+
+
+ faddd %f32,%f14,%f32 ! R2 = SL2 + R2
+ ldd [%g1+%l4],%f14 ! tbl[j]
+ add %l4,8,%l4 ! inc j
+ sra %l0,20,%o3
+
+ faddd %f34,%f24,%f34 ! R3 = SL3 + R3
+ ldd [%g1+%l5],%f24 ! tbl[j]
+ add %l5,8,%l5 ! inc j
+ sra %l1,20,%l1
+
+ ! BUBBLE in USIII
+ ldd [%g1+%l4],%f16 ! tbl[j+1]
+ add %o3,1021,%o3 ! inc j
+
+ fmuld %f0,%f6,%f0 ! p1 = r1 * R1
+ ldd [%g1+%l3],%f6 ! tbl[j+1]
+ add %l1,1021,%l1 ! inc j
+ sra %l2,20,%l2
+
+ fmuld %f10,%f32,%f10 ! p2 = r2 * R2
+ ldd [%g1+%l5],%f26 ! tbl[j+1]
+ add %l2,1021,%l2 ! inc j
+
+ fmuld %f20,%f34,%f20 ! p3 = r3 * R3
+
+
+
+
+
+!!!!!!!!!!!!!!!!!!! poly-reorder - ends here
+
+ fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j]
+ mov %o0,%l3
+
+ fmuld %f10,%f14,%f10
+ mov %o1,%l4
+
+ fmuld %f20,%f24,%f20
+ mov %o2,%l5
+
+ faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ faddd %f10,%f16,%f16
+ lda [%i1]%asi,%f0
+
+ faddd %f20,%f26,%f26
+ lda [%i1+4]%asi,%f1
+
+ faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits
+ add %i1,%i2,%i1 ! x += stridex
+
+ faddd %f16,%f14,%f16
+ andn %l0,%i5,%l0
+ or %o3,%l1,%o4
+
+! -- 16 byte aligned
+ orcc %o4,%l2,%o4
+ bl,pn %icc,.small
+! delay slot
+ faddd %f26,%f24,%f26
+
+ fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k
+ fpadd32 %f16,%f18,%f16
+
+
+ addcc %i0,-1,%i0
+ bg,pn %icc,.loop0
+! delay slot
+ fpadd32 %f26,%f28,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+ .align 16
+.small:
+ tst %o3
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f6,%f8,%f6
+ fpadd32 %f6,BOUNDRY,%f6
+ fmuld %f6,TINY,%f6
+1:
+ tst %l1
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f16,%f18,%f16
+ fpadd32 %f16,BOUNDRY,%f16
+ fmuld %f16,TINY,%f16
+1:
+ tst %l2
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f26,%f28,%f26
+ fpadd32 %f26,BOUNDRY,%f26
+ fmuld %f26,TINY,%f26
+1:
+ addcc %i0,-1,%i0
+ bg,pn %icc,.loop0
+! delay slot
+ nop
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+.endloop2:
+ for %f12,TWO96,%f12
+ fmuld %f10,INVLN2_256,%f14
+ faddd %f14,%f12,%f14
+ fsubd %f14,%f12,%f18
+ fmuld %f18,LN2_256H,%f12
+ fsubd %f10,%f12,%f10
+ fmuld %f18,LN2_256L,%f14
+ fsubd %f10,%f14,%f10
+ fmuld %f10,B3,%f12
+ fdtoi %f18,%f18
+ st %f18,[%fp+m1]
+ fmuld %f10,%f10,%f14
+ faddd %f12,B2,%f12
+ fmuld %f10,B1,%f32
+ fand %f18,NEGINF,%f18
+ ld [%fp+m1],%l1
+ fmuld %f14,%f12,%f14
+ faddd %f32,ONE,%f32
+ sra %l1,8,%o4
+ and %o4,0xff0,%o4
+ faddd %f32,%f14,%f32
+ ldd [%g1+%o4],%f14
+ add %o4,8,%o4
+ sra %l1,20,%l1
+ ldd [%g1+%o4],%f30
+ addcc %l1,1021,%l1
+ fmuld %f10,%f32,%f10
+ fmuld %f10,%f14,%f10
+ faddd %f10,%f30,%f30
+ faddd %f30,%f14,%f30
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f30,%f18,%f30
+ fpadd32 %f30,BOUNDRY,%f30
+ fmuld %f30,TINY,%f30
+1:
+ st %f30,[%o1]
+ st %f31,[%o1+4]
+
+.endloop1:
+ for %f2,TWO96,%f2
+ fmuld %f0,INVLN2_256,%f4
+ faddd %f4,%f2,%f4
+ fsubd %f4,%f2,%f8
+ fmuld %f8,LN2_256H,%f2
+ fsubd %f0,%f2,%f0
+ fmuld %f8,LN2_256L,%f4
+ fsubd %f0,%f4,%f0
+ fmuld %f0,B3,%f2
+ fdtoi %f8,%f8
+ st %f8,[%fp+m0]
+ fmuld %f0,%f0,%f4
+ faddd %f2,B2,%f2
+ fmuld %f0,B1,%f32
+ fand %f8,NEGINF,%f8
+ ld [%fp+m0],%l0
+ fmuld %f4,%f2,%f4
+ faddd %f32,ONE,%f32
+ sra %l0,8,%o4
+ and %o4,0xff0,%o4
+ faddd %f32,%f4,%f32
+ ldd [%g1+%o4],%f4
+ add %o4,8,%o4
+ sra %l0,20,%o3
+ ldd [%g1+%o4],%f30
+ addcc %o3,1021,%o3
+ fmuld %f0,%f32,%f0
+ fmuld %f0,%f4,%f0
+ faddd %f0,%f30,%f30
+ faddd %f30,%f4,%f30
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f30,%f8,%f30
+ fpadd32 %f30,BOUNDRY,%f30
+ fmuld %f30,TINY,%f30
+1:
+ st %f30,[%o0]
+ st %f31,[%o0+4]
+
+.endloop0:
+ st %f6,[%l3]
+ st %f7,[%l3+4]
+ st %f16,[%l4]
+ st %f17,[%l4+4]
+ st %f26,[%l5]
+ st %f27,[%l5+4]
+ ret
+ restore
+
+
+.range0:
+ cmp %l0,%l6
+ bl,a,pt %icc,3f ! if x is tiny
+! delay slot, annulled if branch not taken
+ faddd %f0,ONE,%f4
+
+ cmp %l0,%o5
+ bg,pt %icc,1f ! if x is huge, inf, nan
+! delay slot
+ nop
+
+ fcmpd %fcc0,%f0,THRESH
+ fbg,a,pt %fcc0,3f ! if x is huge and positive
+! delay slot, annulled if branch not taken
+ fmuld HUGE,HUGE,%f4
+
+! x is near the extremes but within range; return to the loop
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop1
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ for %f2,TWO96,%f2
+ ba,pt %icc,.loop1
+! delay slot
+ fmuld %f0,INVLN2_256,%f4
+
+1:
+ cmp %l0,%o7
+ bl,pn %icc,2f ! if x is finite
+! delay slot
+ nop
+ fzero %f4
+ fcmpd %fcc0,%f0,NEGINF
+ fmovdne %fcc0,%f0,%f4
+ ba,pt %icc,3f
+ fmuld %f4,%f4,%f4 ! x*x or zero*zero
+2:
+ fmovd HUGE,%f4
+ fcmpd %fcc0,%f0,ONE
+ fmovdl %fcc0,TINY,%f4
+ fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny
+3:
+ st %f4,[%o0]
+ andn %l1,%i5,%l0
+ add %i1,%i2,%i1 ! x += stridex
+ fmovd %f10,%f0
+ st %f5,[%o0+4]
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+.range1:
+ cmp %l1,%l6
+ bl,a,pt %icc,3f ! if x is tiny
+! delay slot, annulled if branch not taken
+ faddd %f10,ONE,%f14
+
+ cmp %l1,%o5
+ bg,pt %icc,1f ! if x is huge, inf, nan
+! delay slot
+ nop
+
+ fcmpd %fcc0,%f10,THRESH
+ fbg,a,pt %fcc0,3f ! if x is huge and positive
+! delay slot, annulled if branch not taken
+ fmuld HUGE,HUGE,%f14
+
+! x is near the extremes but within range; return to the loop
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop2
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ for %f12,TWO96,%f12
+ ba,pt %icc,.loop2
+! delay slot
+ fmuld %f10,INVLN2_256,%f14
+
+1:
+ cmp %l1,%o7
+ bl,pn %icc,2f ! if x is finite
+! delay slot
+ nop
+ fzero %f14
+ fcmpd %fcc0,%f10,NEGINF
+ fmovdne %fcc0,%f10,%f14
+ ba,pt %icc,3f
+ fmuld %f14,%f14,%f14 ! x*x or zero*zero
+2:
+ fmovd HUGE,%f14
+ fcmpd %fcc0,%f10,ONE
+ fmovdl %fcc0,TINY,%f14
+ fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny
+3:
+ st %f14,[%o1]
+ andn %l2,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ fmovd %f20,%f10
+ st %f15,[%o1+4]
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop1
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ ba,pt %icc,.endloop1
+! delay slot
+ nop
+
+
+.range2:
+ cmp %l2,%l6
+ bl,a,pt %icc,3f ! if x is tiny
+! delay slot, annulled if branch not taken
+ faddd %f20,ONE,%f24
+
+ cmp %l2,%o5
+ bg,pt %icc,1f ! if x is huge, inf, nan
+! delay slot
+ nop
+
+ fcmpd %fcc0,%f20,THRESH
+ fbg,a,pt %fcc0,3f ! if x is huge and positive
+! delay slot, annulled if branch not taken
+ fmuld HUGE,HUGE,%f24
+
+! x is near the extremes but within range; return to the loop
+ ba,pt %icc,.cont
+! delay slot
+ faddd %f4,%f2,%f4
+
+1:
+ cmp %l2,%o7
+ bl,pn %icc,2f ! if x is finite
+! delay slot
+ nop
+ fzero %f24
+ fcmpd %fcc0,%f20,NEGINF
+ fmovdne %fcc0,%f20,%f24
+ ba,pt %icc,3f
+ fmuld %f24,%f24,%f24 ! x*x or zero*zero
+2:
+ fmovd HUGE,%f24
+ fcmpd %fcc0,%f20,ONE
+ fmovdl %fcc0,TINY,%f24
+ fmuld %f24,%f24,%f24 ! huge*huge or tiny*tiny
+3:
+ st %f24,[%i3]
+ st %f25,[%i3+4]
+ lda [%i1]%asi,%l2 ! preload next argument
+ lda [%i1]%asi,%f20
+ lda [%i1+4]%asi,%f21
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop2
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ ba,pt %icc,.endloop2
+! delay slot
+ nop
+
+ SET_SIZE(__vexp)
+