summaryrefslogtreecommitdiff
path: root/usr/src/lib/libmvec/common/vis/__vexp.S
blob: fc11df08ee0b8440a635223dd026961fea88b40d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

	.file	"__vexp.S"

#include "libm.h"

	RO_DATA

/********************************************************************
 * vexp() algorithm is from mopt:f_exp.c.  Basics are included here
 * to supplement comments within this file.  vexp() has been unrolled
 * to a depth of 3.  Only element 0 is documented.
 *
 * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
 *	2^44 to allow *2^k w/o shifting within the FP registers.  These
 *	had to be removed for CHEETAH to avoid the fdtox of a very large
 *	number, which would trap to kernel (2^52).
 *
 * Let 	x = (k + j/256)ln2 + r
 * then	exp(x) = exp(ln2^(k+j/256)) * exp(r)
 *	       = 2^k * 2^(j/256) * exp(r)
 * where r is polynomial approximation
 *	exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
 *	       = 1 + r*(1+r*(B1+r*(B2+r*B3)))
 *	let
 *	p = r*(1+r*(B1+r*(B2+r*B3)))	! notice, not quite exp(r)
 *	q = 2^(j/256) (high 64 bits)
 *	t = 2^(j/256) (extra precision)	! both from _TBL_exp_z[]
 *	then
 *	2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
 *	then actual computation is 2^k * ( q + ( t + q*p ) )
 * 
 ********************************************************************/

	.align	16
TBL:
	.word	0x3ff00000,0x00000000
	.word	0x00000000,0x00000000
	.word	0x3ff00b1a,0xfa5abcbf
	.word	0xbc84f6b2,0xa7609f71
	.word	0x3ff0163d,0xa9fb3335
	.word	0x3c9b6129,0x9ab8cdb7
	.word	0x3ff02168,0x143b0281
	.word	0xbc82bf31,0x0fc54eb6
	.word	0x3ff02c9a,0x3e778061
	.word	0xbc719083,0x535b085d
	.word	0x3ff037d4,0x2e11bbcc
	.word	0x3c656811,0xeeade11a
	.word	0x3ff04315,0xe86e7f85
	.word	0xbc90a31c,0x1977c96e
	.word	0x3ff04e5f,0x72f654b1
	.word	0x3c84c379,0x3aa0d08c
	.word	0x3ff059b0,0xd3158574
	.word	0x3c8d73e2,0xa475b465
	.word	0x3ff0650a,0x0e3c1f89
	.word	0xbc95cb7b,0x5799c396
	.word	0x3ff0706b,0x29ddf6de
	.word	0xbc8c91df,0xe2b13c26
	.word	0x3ff07bd4,0x2b72a836
	.word	0x3c832334,0x54458700
	.word	0x3ff08745,0x18759bc8
	.word	0x3c6186be,0x4bb284ff
	.word	0x3ff092bd,0xf66607e0
	.word	0xbc968063,0x800a3fd1
	.word	0x3ff09e3e,0xcac6f383
	.word	0x3c914878,0x18316136
	.word	0x3ff0a9c7,0x9b1f3919
	.word	0x3c85d16c,0x873d1d38
	.word	0x3ff0b558,0x6cf9890f
	.word	0x3c98a62e,0x4adc610a
	.word	0x3ff0c0f1,0x45e46c85
	.word	0x3c94f989,0x06d21cef
	.word	0x3ff0cc92,0x2b7247f7
	.word	0x3c901edc,0x16e24f71
	.word	0x3ff0d83b,0x23395dec
	.word	0xbc9bc14d,0xe43f316a
	.word	0x3ff0e3ec,0x32d3d1a2
	.word	0x3c403a17,0x27c57b53
	.word	0x3ff0efa5,0x5fdfa9c5
	.word	0xbc949db9,0xbc54021b
	.word	0x3ff0fb66,0xaffed31b
	.word	0xbc6b9bed,0xc44ebd7b
	.word	0x3ff10730,0x28d7233e
	.word	0x3c8d46eb,0x1692fdd5
	.word	0x3ff11301,0xd0125b51
	.word	0xbc96c510,0x39449b3a
	.word	0x3ff11edb,0xab5e2ab6
	.word	0xbc9ca454,0xf703fb72
	.word	0x3ff12abd,0xc06c31cc
	.word	0xbc51b514,0xb36ca5c7
	.word	0x3ff136a8,0x14f204ab
	.word	0xbc67108f,0xba48dcf0
	.word	0x3ff1429a,0xaea92de0
	.word	0xbc932fbf,0x9af1369e
	.word	0x3ff14e95,0x934f312e
	.word	0xbc8b91e8,0x39bf44ab
	.word	0x3ff15a98,0xc8a58e51
	.word	0x3c82406a,0xb9eeab0a
	.word	0x3ff166a4,0x5471c3c2
	.word	0x3c58f23b,0x82ea1a32
	.word	0x3ff172b8,0x3c7d517b
	.word	0xbc819041,0xb9d78a76
	.word	0x3ff17ed4,0x8695bbc0
	.word	0x3c709e3f,0xe2ac5a64
	.word	0x3ff18af9,0x388c8dea
	.word	0xbc911023,0xd1970f6c
	.word	0x3ff19726,0x58375d2f
	.word	0x3c94aadd,0x85f17e08
	.word	0x3ff1a35b,0xeb6fcb75
	.word	0x3c8e5b4c,0x7b4968e4
	.word	0x3ff1af99,0xf8138a1c
	.word	0x3c97bf85,0xa4b69280
	.word	0x3ff1bbe0,0x84045cd4
	.word	0xbc995386,0x352ef607
	.word	0x3ff1c82f,0x95281c6b
	.word	0x3c900977,0x8010f8c9
	.word	0x3ff1d487,0x3168b9aa
	.word	0x3c9e016e,0x00a2643c
	.word	0x3ff1e0e7,0x5eb44027
	.word	0xbc96fdd8,0x088cb6de
	.word	0x3ff1ed50,0x22fcd91d
	.word	0xbc91df98,0x027bb78c
	.word	0x3ff1f9c1,0x8438ce4d
	.word	0xbc9bf524,0xa097af5c
	.word	0x3ff2063b,0x88628cd6
	.word	0x3c8dc775,0x814a8494
	.word	0x3ff212be,0x3578a819
	.word	0x3c93592d,0x2cfcaac9
	.word	0x3ff21f49,0x917ddc96
	.word	0x3c82a97e,0x9494a5ee
	.word	0x3ff22bdd,0xa27912d1
	.word	0x3c8d34fb,0x5577d69e
	.word	0x3ff2387a,0x6e756238
	.word	0x3c99b07e,0xb6c70573
	.word	0x3ff2451f,0xfb82140a
	.word	0x3c8acfcc,0x911ca996
	.word	0x3ff251ce,0x4fb2a63f
	.word	0x3c8ac155,0xbef4f4a4
	.word	0x3ff25e85,0x711ece75
	.word	0x3c93e1a2,0x4ac31b2c
	.word	0x3ff26b45,0x65e27cdd
	.word	0x3c82bd33,0x9940e9d9
	.word	0x3ff2780e,0x341ddf29
	.word	0x3c9e067c,0x05f9e76c
	.word	0x3ff284df,0xe1f56381
	.word	0xbc9a4c3a,0x8c3f0d7e
	.word	0x3ff291ba,0x7591bb70
	.word	0xbc82cc72,0x28401cbc
	.word	0x3ff29e9d,0xf51fdee1
	.word	0x3c8612e8,0xafad1255
	.word	0x3ff2ab8a,0x66d10f13
	.word	0xbc995743,0x191690a7
	.word	0x3ff2b87f,0xd0dad990
	.word	0xbc410adc,0xd6381aa4
	.word	0x3ff2c57e,0x39771b2f
	.word	0xbc950145,0xa6eb5124
	.word	0x3ff2d285,0xa6e4030b
	.word	0x3c900247,0x54db41d5
	.word	0x3ff2df96,0x1f641589
	.word	0x3c9d16cf,0xfbbce198
	.word	0x3ff2ecaf,0xa93e2f56
	.word	0x3c71ca0f,0x45d52383
	.word	0x3ff2f9d2,0x4abd886b
	.word	0xbc653c55,0x532bda93
	.word	0x3ff306fe,0x0a31b715
	.word	0x3c86f46a,0xd23182e4
	.word	0x3ff31432,0xedeeb2fd
	.word	0x3c8959a3,0xf3f3fcd0
	.word	0x3ff32170,0xfc4cd831
	.word	0x3c8a9ce7,0x8e18047c
	.word	0x3ff32eb8,0x3ba8ea32
	.word	0xbc9c45e8,0x3cb4f318
	.word	0x3ff33c08,0xb26416ff
	.word	0x3c932721,0x843659a6
	.word	0x3ff34962,0x66e3fa2d
	.word	0xbc835a75,0x930881a4
	.word	0x3ff356c5,0x5f929ff1
	.word	0xbc8b5cee,0x5c4e4628
	.word	0x3ff36431,0xa2de883b
	.word	0xbc8c3144,0xa06cb85e
	.word	0x3ff371a7,0x373aa9cb
	.word	0xbc963aea,0xbf42eae2
	.word	0x3ff37f26,0x231e754a
	.word	0xbc99f5ca,0x9eceb23c
	.word	0x3ff38cae,0x6d05d866
	.word	0xbc9e958d,0x3c9904bd
	.word	0x3ff39a40,0x1b7140ef
	.word	0xbc99a9a5,0xfc8e2934
	.word	0x3ff3a7db,0x34e59ff7
	.word	0xbc75e436,0xd661f5e3
	.word	0x3ff3b57f,0xbfec6cf4
	.word	0x3c954c66,0xe26fff18
	.word	0x3ff3c32d,0xc313a8e5
	.word	0xbc9efff8,0x375d29c3
	.word	0x3ff3d0e5,0x44ede173
	.word	0x3c7fe8d0,0x8c284c71
	.word	0x3ff3dea6,0x4c123422
	.word	0x3c8ada09,0x11f09ebc
	.word	0x3ff3ec70,0xdf1c5175
	.word	0xbc8af663,0x7b8c9bca
	.word	0x3ff3fa45,0x04ac801c
	.word	0xbc97d023,0xf956f9f3
	.word	0x3ff40822,0xc367a024
	.word	0x3c8bddf8,0xb6f4d048
	.word	0x3ff4160a,0x21f72e2a
	.word	0xbc5ef369,0x1c309278
	.word	0x3ff423fb,0x2709468a
	.word	0xbc98462d,0xc0b314dd
	.word	0x3ff431f5,0xd950a897
	.word	0xbc81c7dd,0xe35f7998
	.word	0x3ff43ffa,0x3f84b9d4
	.word	0x3c8880be,0x9704c002
	.word	0x3ff44e08,0x6061892d
	.word	0x3c489b7a,0x04ef80d0
	.word	0x3ff45c20,0x42a7d232
	.word	0xbc686419,0x82fb1f8e
	.word	0x3ff46a41,0xed1d0057
	.word	0x3c9c944b,0xd1648a76
	.word	0x3ff4786d,0x668b3237
	.word	0xbc9c20f0,0xed445733
	.word	0x3ff486a2,0xb5c13cd0
	.word	0x3c73c1a3,0xb69062f0
	.word	0x3ff494e1,0xe192aed2
	.word	0xbc83b289,0x5e499ea0
	.word	0x3ff4a32a,0xf0d7d3de
	.word	0x3c99cb62,0xf3d1be56
	.word	0x3ff4b17d,0xea6db7d7
	.word	0xbc8125b8,0x7f2897f0
	.word	0x3ff4bfda,0xd5362a27
	.word	0x3c7d4397,0xafec42e2
	.word	0x3ff4ce41,0xb817c114
	.word	0x3c905e29,0x690abd5d
	.word	0x3ff4dcb2,0x99fddd0d
	.word	0x3c98ecdb,0xbc6a7833
	.word	0x3ff4eb2d,0x81d8abff
	.word	0xbc95257d,0x2e5d7a52
	.word	0x3ff4f9b2,0x769d2ca7
	.word	0xbc94b309,0xd25957e3
	.word	0x3ff50841,0x7f4531ee
	.word	0x3c7a249b,0x49b7465f
	.word	0x3ff516da,0xa2cf6642
	.word	0xbc8f7685,0x69bd93ee
	.word	0x3ff5257d,0xe83f4eef
	.word	0xbc7c998d,0x43efef71
	.word	0x3ff5342b,0x569d4f82
	.word	0xbc807abe,0x1db13cac
	.word	0x3ff542e2,0xf4f6ad27
	.word	0x3c87926d,0x192d5f7e
	.word	0x3ff551a4,0xca5d920f
	.word	0xbc8d689c,0xefede59a
	.word	0x3ff56070,0xdde910d2
	.word	0xbc90fb6e,0x168eebf0
	.word	0x3ff56f47,0x36b527da
	.word	0x3c99bb2c,0x011d93ad
	.word	0x3ff57e27,0xdbe2c4cf
	.word	0xbc90b98c,0x8a57b9c4
	.word	0x3ff58d12,0xd497c7fd
	.word	0x3c8295e1,0x5b9a1de8
	.word	0x3ff59c08,0x27ff07cc
	.word	0xbc97e2ce,0xe467e60f
	.word	0x3ff5ab07,0xdd485429
	.word	0x3c96324c,0x054647ad
	.word	0x3ff5ba11,0xfba87a03
	.word	0xbc9b77a1,0x4c233e1a
	.word	0x3ff5c926,0x8a5946b7
	.word	0x3c3c4b1b,0x816986a2
	.word	0x3ff5d845,0x90998b93
	.word	0xbc9cd6a7,0xa8b45642
	.word	0x3ff5e76f,0x15ad2148
	.word	0x3c9ba6f9,0x3080e65e
	.word	0x3ff5f6a3,0x20dceb71
	.word	0xbc89eadd,0xe3cdcf92
	.word	0x3ff605e1,0xb976dc09
	.word	0xbc93e242,0x9b56de47
	.word	0x3ff6152a,0xe6cdf6f4
	.word	0x3c9e4b3e,0x4ab84c27
	.word	0x3ff6247e,0xb03a5585
	.word	0xbc9383c1,0x7e40b497
	.word	0x3ff633dd,0x1d1929fd
	.word	0x3c984710,0xbeb964e5
	.word	0x3ff64346,0x34ccc320
	.word	0xbc8c483c,0x759d8932
	.word	0x3ff652b9,0xfebc8fb7
	.word	0xbc9ae3d5,0xc9a73e08
	.word	0x3ff66238,0x82552225
	.word	0xbc9bb609,0x87591c34
	.word	0x3ff671c1,0xc70833f6
	.word	0xbc8e8732,0x586c6134
	.word	0x3ff68155,0xd44ca973
	.word	0x3c6038ae,0x44f73e65
	.word	0x3ff690f4,0xb19e9538
	.word	0x3c8804bd,0x9aeb445c
	.word	0x3ff6a09e,0x667f3bcd
	.word	0xbc9bdd34,0x13b26456
	.word	0x3ff6b052,0xfa75173e
	.word	0x3c7a38f5,0x2c9a9d0e
	.word	0x3ff6c012,0x750bdabf
	.word	0xbc728956,0x67ff0b0d
	.word	0x3ff6cfdc,0xddd47645
	.word	0x3c9c7aa9,0xb6f17309
	.word	0x3ff6dfb2,0x3c651a2f
	.word	0xbc6bbe3a,0x683c88ab
	.word	0x3ff6ef92,0x98593ae5
	.word	0xbc90b974,0x9e1ac8b2
	.word	0x3ff6ff7d,0xf9519484
	.word	0xbc883c0f,0x25860ef6
	.word	0x3ff70f74,0x66f42e87
	.word	0x3c59d644,0xd45aa65f
	.word	0x3ff71f75,0xe8ec5f74
	.word	0xbc816e47,0x86887a99
	.word	0x3ff72f82,0x86ead08a
	.word	0xbc920aa0,0x2cd62c72
	.word	0x3ff73f9a,0x48a58174
	.word	0xbc90a8d9,0x6c65d53c
	.word	0x3ff74fbd,0x35d7cbfd
	.word	0x3c9047fd,0x618a6e1c
	.word	0x3ff75feb,0x564267c9
	.word	0xbc902459,0x57316dd3
	.word	0x3ff77024,0xb1ab6e09
	.word	0x3c9b7877,0x169147f8
	.word	0x3ff78069,0x4fde5d3f
	.word	0x3c9866b8,0x0a02162c
	.word	0x3ff790b9,0x38ac1cf6
	.word	0x3c9349a8,0x62aadd3e
	.word	0x3ff7a114,0x73eb0187
	.word	0xbc841577,0xee04992f
	.word	0x3ff7b17b,0x0976cfdb
	.word	0xbc9bebb5,0x8468dc88
	.word	0x3ff7c1ed,0x0130c132
	.word	0x3c9f124c,0xd1164dd6
	.word	0x3ff7d26a,0x62ff86f0
	.word	0x3c91bddb,0xfb72b8b4
	.word	0x3ff7e2f3,0x36cf4e62
	.word	0x3c705d02,0xba15797e
	.word	0x3ff7f387,0x8491c491
	.word	0xbc807f11,0xcf9311ae
	.word	0x3ff80427,0x543e1a12
	.word	0xbc927c86,0x626d972b
	.word	0x3ff814d2,0xadd106d9
	.word	0x3c946437,0x0d151d4d
	.word	0x3ff82589,0x994cce13
	.word	0xbc9d4c1d,0xd41532d8
	.word	0x3ff8364c,0x1eb941f7
	.word	0x3c999b9a,0x31df2bd5
	.word	0x3ff8471a,0x4623c7ad
	.word	0xbc88d684,0xa341cdfb
	.word	0x3ff857f4,0x179f5b21
	.word	0xbc5ba748,0xf8b216d0
	.word	0x3ff868d9,0x9b4492ec
	.word	0x3ca01c83,0xb21584a3
	.word	0x3ff879ca,0xd931a436
	.word	0x3c85d2d7,0xd2db47bc
	.word	0x3ff88ac7,0xd98a6699
	.word	0x3c9994c2,0xf37cb53a
	.word	0x3ff89bd0,0xa478580f
	.word	0x3c9d5395,0x4475202a
	.word	0x3ff8ace5,0x422aa0db
	.word	0x3c96e9f1,0x56864b27
	.word	0x3ff8be05,0xbad61778
	.word	0x3c9ecb5e,0xfc43446e
	.word	0x3ff8cf32,0x16b5448c
	.word	0xbc70d55e,0x32e9e3aa
	.word	0x3ff8e06a,0x5e0866d9
	.word	0xbc97114a,0x6fc9b2e6
	.word	0x3ff8f1ae,0x99157736
	.word	0x3c85cc13,0xa2e3976c
	.word	0x3ff902fe,0xd0282c8a
	.word	0x3c9592ca,0x85fe3fd2
	.word	0x3ff9145b,0x0b91ffc6
	.word	0xbc9dd679,0x2e582524
	.word	0x3ff925c3,0x53aa2fe2
	.word	0xbc83455f,0xa639db7f
	.word	0x3ff93737,0xb0cdc5e5
	.word	0xbc675fc7,0x81b57ebc
	.word	0x3ff948b8,0x2b5f98e5
	.word	0xbc8dc3d6,0x797d2d99
	.word	0x3ff95a44,0xcbc8520f
	.word	0xbc764b7c,0x96a5f039
	.word	0x3ff96bdd,0x9a7670b3
	.word	0xbc5ba596,0x7f19c896
	.word	0x3ff97d82,0x9fde4e50
	.word	0xbc9d185b,0x7c1b85d0
	.word	0x3ff98f33,0xe47a22a2
	.word	0x3c7cabda,0xa24c78ed
	.word	0x3ff9a0f1,0x70ca07ba
	.word	0xbc9173bd,0x91cee632
	.word	0x3ff9b2bb,0x4d53fe0d
	.word	0xbc9dd84e,0x4df6d518
	.word	0x3ff9c491,0x82a3f090
	.word	0x3c7c7c46,0xb071f2be
	.word	0x3ff9d674,0x194bb8d5
	.word	0xbc9516be,0xa3dd8233
	.word	0x3ff9e863,0x19e32323
	.word	0x3c7824ca,0x78e64c6e
	.word	0x3ff9fa5e,0x8d07f29e
	.word	0xbc84a9ce,0xaaf1face
	.word	0x3ffa0c66,0x7b5de565
	.word	0xbc935949,0x5d1cd533
	.word	0x3ffa1e7a,0xed8eb8bb
	.word	0x3c9c6618,0xee8be70e
	.word	0x3ffa309b,0xec4a2d33
	.word	0x3c96305c,0x7ddc36ab
	.word	0x3ffa42c9,0x80460ad8
	.word	0xbc9aa780,0x589fb120
	.word	0x3ffa5503,0xb23e255d
	.word	0xbc9d2f6e,0xdb8d41e1
	.word	0x3ffa674a,0x8af46052
	.word	0x3c650f56,0x30670366
	.word	0x3ffa799e,0x1330b358
	.word	0x3c9bcb7e,0xcac563c6
	.word	0x3ffa8bfe,0x53c12e59
	.word	0xbc94f867,0xb2ba15a8
	.word	0x3ffa9e6b,0x5579fdbf
	.word	0x3c90fac9,0x0ef7fd31
	.word	0x3ffab0e5,0x21356eba
	.word	0x3c889c31,0xdae94544
	.word	0x3ffac36b,0xbfd3f37a
	.word	0xbc8f9234,0xcae76cd0
	.word	0x3ffad5ff,0x3a3c2774
	.word	0x3c97ef3b,0xb6b1b8e4
	.word	0x3ffae89f,0x995ad3ad
	.word	0x3c97a1cd,0x345dcc81
	.word	0x3ffafb4c,0xe622f2ff
	.word	0xbc94b2fc,0x0f315ecc
	.word	0x3ffb0e07,0x298db666
	.word	0xbc9bdef5,0x4c80e425
	.word	0x3ffb20ce,0x6c9a8952
	.word	0x3c94dd02,0x4a0756cc
	.word	0x3ffb33a2,0xb84f15fb
	.word	0xbc62805e,0x3084d708
	.word	0x3ffb4684,0x15b749b1
	.word	0xbc7f763d,0xe9df7c90
	.word	0x3ffb5972,0x8de5593a
	.word	0xbc9c71df,0xbbba6de3
	.word	0x3ffb6c6e,0x29f1c52a
	.word	0x3c92a8f3,0x52883f6e
	.word	0x3ffb7f76,0xf2fb5e47
	.word	0xbc75584f,0x7e54ac3b
	.word	0x3ffb928c,0xf22749e4
	.word	0xbc9b7216,0x54cb65c6
	.word	0x3ffba5b0,0x30a1064a
	.word	0xbc9efcd3,0x0e54292e
	.word	0x3ffbb8e0,0xb79a6f1f
	.word	0xbc3f52d1,0xc9696205
	.word	0x3ffbcc1e,0x904bc1d2
	.word	0x3c823dd0,0x7a2d9e84
	.word	0x3ffbdf69,0xc3f3a207
	.word	0xbc3c2623,0x60ea5b52
	.word	0x3ffbf2c2,0x5bd71e09
	.word	0xbc9efdca,0x3f6b9c73
	.word	0x3ffc0628,0x6141b33d
	.word	0xbc8d8a5a,0xa1fbca34
	.word	0x3ffc199b,0xdd85529c
	.word	0x3c811065,0x895048dd
	.word	0x3ffc2d1c,0xd9fa652c
	.word	0xbc96e516,0x17c8a5d7
	.word	0x3ffc40ab,0x5fffd07a
	.word	0x3c9b4537,0xe083c60a
	.word	0x3ffc5447,0x78fafb22
	.word	0x3c912f07,0x2493b5af
	.word	0x3ffc67f1,0x2e57d14b
	.word	0x3c92884d,0xff483cad
	.word	0x3ffc7ba8,0x8988c933
	.word	0xbc8e76bb,0xbe255559
	.word	0x3ffc8f6d,0x9406e7b5
	.word	0x3c71acbc,0x48805c44
	.word	0x3ffca340,0x5751c4db
	.word	0xbc87f2be,0xd10d08f4
	.word	0x3ffcb720,0xdcef9069
	.word	0x3c7503cb,0xd1e949db
	.word	0x3ffccb0f,0x2e6d1675
	.word	0xbc7d220f,0x86009093
	.word	0x3ffcdf0b,0x555dc3fa
	.word	0xbc8dd83b,0x53829d72
	.word	0x3ffcf315,0x5b5bab74
	.word	0xbc9a08e9,0xb86dff57
	.word	0x3ffd072d,0x4a07897c
	.word	0xbc9cbc37,0x43797a9c
	.word	0x3ffd1b53,0x2b08c968
	.word	0x3c955636,0x219a36ee
	.word	0x3ffd2f87,0x080d89f2
	.word	0xbc9d487b,0x719d8578
	.word	0x3ffd43c8,0xeacaa1d6
	.word	0x3c93db53,0xbf5a1614
	.word	0x3ffd5818,0xdcfba487
	.word	0x3c82ed02,0xd75b3706
	.word	0x3ffd6c76,0xe862e6d3
	.word	0x3c5fe87a,0x4a8165a0
	.word	0x3ffd80e3,0x16c98398
	.word	0xbc911ec1,0x8beddfe8
	.word	0x3ffd955d,0x71ff6075
	.word	0x3c9a052d,0xbb9af6be
	.word	0x3ffda9e6,0x03db3285
	.word	0x3c9c2300,0x696db532
	.word	0x3ffdbe7c,0xd63a8315
	.word	0xbc9b76f1,0x926b8be4
	.word	0x3ffdd321,0xf301b460
	.word	0x3c92da57,0x78f018c2
	.word	0x3ffde7d5,0x641c0658
	.word	0xbc9ca552,0x8e79ba8f
	.word	0x3ffdfc97,0x337b9b5f
	.word	0xbc91a5cd,0x4f184b5c
	.word	0x3ffe1167,0x6b197d17
	.word	0xbc72b529,0xbd5c7f44
	.word	0x3ffe2646,0x14f5a129
	.word	0xbc97b627,0x817a1496
	.word	0x3ffe3b33,0x3b16ee12
	.word	0xbc99f4a4,0x31fdc68a
	.word	0x3ffe502e,0xe78b3ff6
	.word	0x3c839e89,0x80a9cc8f
	.word	0x3ffe6539,0x24676d76
	.word	0xbc863ff8,0x7522b734
	.word	0x3ffe7a51,0xfbc74c83
	.word	0x3c92d522,0xca0c8de2
	.word	0x3ffe8f79,0x77cdb740
	.word	0xbc910894,0x80b054b1
	.word	0x3ffea4af,0xa2a490da
	.word	0xbc9e9c23,0x179c2893
	.word	0x3ffeb9f4,0x867cca6e
	.word	0x3c94832f,0x2293e4f2
	.word	0x3ffecf48,0x2d8e67f1
	.word	0xbc9c93f3,0xb411ad8c
	.word	0x3ffee4aa,0xa2188510
	.word	0x3c91c68d,0xa487568d
	.word	0x3ffefa1b,0xee615a27
	.word	0x3c9dc7f4,0x86a4b6b0
	.word	0x3fff0f9c,0x1cb6412a
	.word	0xbc932200,0x65181d45
	.word	0x3fff252b,0x376bba97
	.word	0x3c93a1a5,0xbf0d8e43
	.word	0x3fff3ac9,0x48dd7274
	.word	0xbc795a5a,0x3ed837de
	.word	0x3fff5076,0x5b6e4540
	.word	0x3c99d3e1,0x2dd8a18b
	.word	0x3fff6632,0x798844f8
	.word	0x3c9fa37b,0x3539343e
	.word	0x3fff7bfd,0xad9cbe14
	.word	0xbc9dbb12,0xd006350a
	.word	0x3fff91d8,0x02243c89
	.word	0xbc612ea8,0xa779f689
	.word	0x3fffa7c1,0x819e90d8
	.word	0x3c874853,0xf3a5931e
	.word	0x3fffbdba,0x3692d514
	.word	0xbc796773,0x15098eb6
	.word	0x3fffd3c2,0x2b8f71f1
	.word	0x3c62eb74,0x966579e7
	.word	0x3fffe9d9,0x6b2a23d9
	.word	0x3c74a603,0x7442fde3

	.align	16
constants:
	.word	0x3ef00000,0x00000000
	.word	0x40862e42,0xfefa39ef
	.word	0x01000000,0x00000000
	.word	0x7f000000,0x00000000
	.word	0x80000000,0x00000000
	.word	0x43f00000,0x00000000 ! scaling 2^12 two96
	.word	0xfff00000,0x00000000
	.word	0x3ff00000,0x00000000
	.word	0x3fdfffff,0xfffffff6
	.word	0x3fc55555,0x721a1d14
	.word	0x3fa55555,0x6e0896af
	.word	0x41371547,0x652b82fe ! scaling 2^12 invln2_256
	.word	0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
	.word	0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l

	! base set w/o scaling
	! .word	0x43300000,0x00000000 ! scaling  two96
	! .word	0x40771547,0x652b82fe ! scaling  invln2_256
	! .word	0x3f662e42,0xfee00000 ! scaling  ln2_256h
	! .word	0x3d6a39ef,0x35793c76 ! scaling  ln2_256l

#define ox3ef		0x0
#define thresh		0x8
#define tiny		0x10
#define huge		0x18
#define signbit		0x20
#define two96		0x28
#define neginf		0x30
#define one		0x38
#define B1OFF		0x40
#define B2OFF		0x48
#define B3OFF		0x50
#define invln2_256	0x58
#define ln2_256h	0x60
#define ln2_256l	0x68

! local storage indices

#define m2		STACK_BIAS-0x4
#define m1		STACK_BIAS-0x8
#define m0		STACK_BIAS-0xc
#define jnk		STACK_BIAS-0x20
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps		0x20

! register use

! i0  n
! i1  x
! i2  stridex
! i3  y
! i4  stridey
! i5  0x80000000

! g1  TBL

! l0  m0
! l1  m1
! l2  m2
! l3  j0,oy0
! l4  j1,oy1
! l5  j2,oy2
! l6  0x3e300000
! l7  0x40862e41

! o0  py0
! o1  py1
! o2  py2
! o3  scratch
! o4  scratch
! o5  0x40874910
! o7  0x7ff00000

! f0  x0
! f2  
! f4  
! f6  
! f8  
! f10 x1
! f12 
! f14 
! f16 
! f18 
! f20 x2
! f22 
! f24 
! f26 
! f28 
! f30 
! f32 
! f34 
! f36 0x3ef0...
! f38 thresh
! f40 tiny
! f42 huge
! f44 signbit
! f46 two96
! f48 neginf
! f50 one
! f52 B1
! f54 B2
! f56 B3
! f58 invln2_256
! f60 ln2_256h
! f62 ln2_256l
#define BOUNDRY %f36
#define THRESH %f38
#define TINY %f40
#define HUGE %f42
#define SIGNBIT %f44
#define TWO96 %f46
#define NEGINF %f48
#define ONE %f50
#define B1 %f52
#define B2 %f54
#define B3 %f56
#define INVLN2_256 %f58
#define LN2_256H %f60
#define LN2_256L %f62

	ENTRY(__vexp)
	save	%sp,-SA(MINFRAME)-tmps,%sp
	PIC_SETUP(l7)
	PIC_SET(l7,constants,o3)
	PIC_SET(l7,TBL,o0)
	mov	%o0,%g1
	wr	%g0,0x82,%asi		! set %asi for non-faulting loads

	sethi	%hi(0x80000000),%i5
	sethi	%hi(0x3e300000),%l6
	sethi	%hi(0x40862e41),%l7
	or	%l7,%lo(0x40862e41),%l7
	sethi	%hi(0x40874910),%o5
	or	%o5,%lo(0x40874910),%o5
	sethi	%hi(0x7ff00000),%o7
	ldd	[%o3+ox3ef],BOUNDRY
	ldd	[%o3+thresh],THRESH
	ldd	[%o3+tiny],TINY
	ldd	[%o3+huge],HUGE
	ldd	[%o3+signbit],SIGNBIT
	ldd	[%o3+two96],TWO96
	ldd	[%o3+neginf],NEGINF
	ldd	[%o3+one],ONE
	ldd	[%o3+B1OFF],B1
	ldd	[%o3+B2OFF],B2
	ldd	[%o3+B3OFF],B3
	ldd	[%o3+invln2_256],INVLN2_256
	ldd	[%o3+ln2_256h],LN2_256H
	ldd	[%o3+ln2_256l],LN2_256L
	sll	%i2,3,%i2		! scale strides
	sll	%i4,3,%i4
	add	%fp,jnk,%l3		! precondition loop
	add	%fp,jnk,%l4
	add	%fp,jnk,%l5
	ld	[%i1],%l0		! hx = *x
	ld	[%i1],%f0
	ld	[%i1+4],%f1
	andn	%l0,%i5,%l0		! hx &= ~0x80000000
	ba	.loop0
	add	%i1,%i2,%i1		! x += stridex

	.align	16
! -- 16 byte aligned
.loop0:
	lda	[%i1]%asi,%l1		! preload next argument
	sub	%l0,%l6,%o3
	sub	%l7,%l0,%o4
	fand	%f0,SIGNBIT,%f2		! get sign bit

	lda	[%i1]%asi,%f10
	orcc	%o3,%o4,%g0
	mov	%i3,%o0			! py0 = y
	bl,pn	%icc,.range0		! if hx < 0x3e300000 or > 0x40862e41

! delay slot
	lda	[%i1+4]%asi,%f11
	addcc	%i0,-1,%i0
	add	%i3,%i4,%i3		! y += stridey
	ble,pn	%icc,.endloop1

! delay slot
	andn	%l1,%i5,%l1
	add	%i1,%i2,%i1		! x += stridex
	for	%f2,TWO96,%f2		! used to strip least sig bits
	fmuld	%f0,INVLN2_256,%f4	! x/ (ln2/256)  , creating k

.loop1:
	lda	[%i1]%asi,%l2		! preload next argument
	sub	%l1,%l6,%o3
	sub	%l7,%l1,%o4
	fand	%f10,SIGNBIT,%f12

	lda	[%i1]%asi,%f20
	orcc	%o3,%o4,%g0
	mov	%i3,%o1			! py1 = y
	bl,pn	%icc,.range1		! if hx < 0x3e300000 or > 0x40862e41

! delay slot
	lda	[%i1+4]%asi,%f21
	addcc	%i0,-1,%i0
	add	%i3,%i4,%i3		! y += stridey
	ble,pn	%icc,.endloop2

! delay slot
	andn	%l2,%i5,%l2
	add	%i1,%i2,%i1		! x += stridex
	for	%f12,TWO96,%f12
	fmuld	%f10,INVLN2_256,%f14

.loop2:
	sub	%l2,%l6,%o3
	sub	%l7,%l2,%o4
	fand	%f20,SIGNBIT,%f22
	fmuld	%f20,INVLN2_256,%f24		! okay to put this here; for alignment

	orcc	%o3,%o4,%g0
	bl,pn	%icc,.range2		! if hx < 0x3e300000 or > 0x40862e41
! delay slot
	for	%f22,TWO96,%f22
	faddd	%f4,%f2,%f4		! creating k+j/256, sra to zero bits

.cont:
	faddd	%f14,%f12,%f14
	mov	%i3,%o2			! py2 = y

	faddd	%f24,%f22,%f24
	add	%i3,%i4,%i3		! y += stridey

	! BUBBLE USIII

	fsubd	%f4,%f2,%f8		! creating k+j/256: sll 
	st	%f6,[%l3]		! store previous loop x0

	fsubd	%f14,%f12,%f18
	st	%f7,[%l3+4]		! store previous loop x0

	fsubd	%f24,%f22,%f28
	st	%f16,[%l4]

	! BUBBLE USIII

	fmuld	%f8,LN2_256H,%f2	! closest LN2_256 to x
	st	%f17,[%l4+4]

	fmuld	%f18,LN2_256H,%f12
	st	%f26,[%l5]

	fmuld	%f28,LN2_256H,%f22
	st	%f27,[%l5+4]

	! BUBBLE USIII

	fsubd	%f0,%f2,%f0		! r = x - p*LN2_256H
	fmuld	%f8,LN2_256L,%f4	! closest LN2_256 to x , added prec

	fsubd	%f10,%f12,%f10
	fmuld	%f18,LN2_256L,%f14

	fsubd	%f20,%f22,%f20
	fmuld	%f28,LN2_256L,%f24

	! BUBBLE USIII

	fsubd	%f0,%f4,%f0		! r -= p*LN2_256L

	fsubd	%f10,%f14,%f10

	fsubd	%f20,%f24,%f20

!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here

	! Alternate polynomial grouping allowing non-sequential calc of p
	! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
	! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
	!
	! let               SLi        Ri           SRi		be accumulators

	fmuld	%f0,B3,%f2	! SR1 = r1 * B3
	fdtoi	%f8,%f8				! convert k+j/256 to int
	st      %f8,[%fp+m0]			! store k, to shift return/use

	fmuld	%f10,B3,%f12	! SR2 = r2 * B3
	fdtoi	%f18,%f18			! convert k+j/256 to int
	st      %f18,[%fp+m1]			! store k, to shift return/use

	fmuld	%f20,B3,%f22	! SR3 = r3 * B3
	fdtoi	%f28,%f28			! convert k+j/256 to int
	st      %f28,[%fp+m2]			! store k, to shift return/use

	fmuld	%f0,%f0,%f4	! R1 = r1 * r1

	fmuld	%f10,%f10,%f14	! R2 = r2 * r2
	faddd	%f2,B2,%f2	! SR1 += B2

	fmuld	%f20,%f20,%f24	! R3 = r3 * r3
	faddd	%f12,B2,%f12	! SR2 += B2

	faddd	%f22,B2,%f22	! SR3 += B2
	fmuld	%f0,B1,%f6	! SL1 = r1 * B1

	fmuld	%f10,B1,%f32	! SL2 = r2 * B1
	fand	%f8,NEGINF,%f8
	! best here for RAW BYPASS
	ld	[%fp+m0],%l0			! get nonshifted k into intreg

	fmuld	%f20,B1,%f34	! SL3 = r3 * B1
	fand	%f18,NEGINF,%f18
	ld	[%fp+m1],%l1			! get nonshifted k into intreg

	fmuld	%f4,%f2,%f4	! R1 = R1 * SR1
	fand	%f28,NEGINF,%f28
	ld	[%fp+m2],%l2			! get nonshifted k into intreg

	fmuld	%f14,%f12,%f14	! R2 = R2 * SR2
	faddd	%f6,ONE,%f6	! SL1 += 1

	fmuld	%f24,%f22,%f24	! R3 = R3 * SR3
	faddd	%f32,ONE,%f32	! SL2 += 1
	sra	%l0,8,%l3			! shift k tobe offset 256-8byte

	faddd	%f34,ONE,%f34	! SL3 += 1
	sra	%l1,8,%l4			! shift k tobe offset 256-8byte
	sra	%l2,8,%l5			! shift k tobe offset 256-8byte

	! BUBBLE in USIII
	and	%l3,0xff0,%l3
	and	%l4,0xff0,%l4



	faddd	%f6,%f4,%f6	! R1 = SL1 + R1
	ldd     [%g1+%l3],%f4			! tbl[j]
	add     %l3,8,%l3			! inc j
	and	%l5,0xff0,%l5


	faddd	%f32,%f14,%f32	! R2 = SL2 + R2
	ldd     [%g1+%l4],%f14			! tbl[j]
	add     %l4,8,%l4			! inc j
	sra	%l0,20,%o3

	faddd	%f34,%f24,%f34	! R3 = SL3 + R3
	ldd     [%g1+%l5],%f24			! tbl[j]
	add     %l5,8,%l5			! inc j
	sra	%l1,20,%l1

	! BUBBLE in USIII
	ldd     [%g1+%l4],%f16		! tbl[j+1]
	add     %o3,1021,%o3		! inc j

	fmuld	%f0,%f6,%f0	! p1 = r1 * R1
	ldd     [%g1+%l3],%f6           ! tbl[j+1]
	add     %l1,1021,%l1		! inc j
	sra	%l2,20,%l2

	fmuld	%f10,%f32,%f10	! p2 = r2 * R2
	ldd     [%g1+%l5],%f26		! tbl[j+1]
	add     %l2,1021,%l2		! inc j

	fmuld	%f20,%f34,%f20	! p3 = r3 * R3

 
 


!!!!!!!!!!!!!!!!!!! poly-reorder - ends here

	fmuld	%f0,%f4,%f0		! start exp(x) = exp(r) * tbl[j]
	mov	%o0,%l3

	fmuld	%f10,%f14,%f10
	mov	%o1,%l4

	fmuld	%f20,%f24,%f20
	mov	%o2,%l5

	faddd	%f0,%f6,%f6		! cont exp(x) : apply tbl[j] high bits
	lda	[%i1]%asi,%l0		! preload next argument

	faddd	%f10,%f16,%f16
	lda	[%i1]%asi,%f0

	faddd	%f20,%f26,%f26
	lda	[%i1+4]%asi,%f1

	faddd	%f6,%f4,%f6		! cont exp(x) : apply tbl[j+1] low bits
	add	%i1,%i2,%i1		! x += stridex

	faddd	%f16,%f14,%f16
	andn	%l0,%i5,%l0
	or	%o3,%l1,%o4

! -- 16 byte aligned
	orcc	%o4,%l2,%o4
	bl,pn	%icc,.small
! delay slot
	faddd	%f26,%f24,%f26

	fpadd32	%f6,%f8,%f6		! done exp(x) : apply 2^k
	fpadd32	%f16,%f18,%f16


	addcc	%i0,-1,%i0
	bg,pn	%icc,.loop0
! delay slot
	fpadd32	%f26,%f28,%f26

	ba,pt	%icc,.endloop0
! delay slot
	nop


	.align	16
.small:
	tst	%o3
	bge,pt	%icc,1f
! delay slot
	fpadd32	%f6,%f8,%f6
	fpadd32	%f6,BOUNDRY,%f6
	fmuld	%f6,TINY,%f6
1:
	tst	%l1
	bge,pt	%icc,1f
! delay slot
	fpadd32	%f16,%f18,%f16
	fpadd32	%f16,BOUNDRY,%f16
	fmuld	%f16,TINY,%f16
1:
	tst	%l2
	bge,pt	%icc,1f
! delay slot
	fpadd32	%f26,%f28,%f26
	fpadd32	%f26,BOUNDRY,%f26
	fmuld	%f26,TINY,%f26
1:
	addcc	%i0,-1,%i0
	bg,pn	%icc,.loop0
! delay slot
	nop
	ba,pt	%icc,.endloop0
! delay slot
	nop


.endloop2:
	for	%f12,TWO96,%f12
	fmuld	%f10,INVLN2_256,%f14
	faddd	%f14,%f12,%f14
	fsubd	%f14,%f12,%f18
	fmuld	%f18,LN2_256H,%f12
	fsubd	%f10,%f12,%f10
	fmuld	%f18,LN2_256L,%f14
	fsubd	%f10,%f14,%f10
	fmuld	%f10,B3,%f12
	fdtoi	%f18,%f18
	st      %f18,[%fp+m1]
	fmuld	%f10,%f10,%f14
	faddd	%f12,B2,%f12
	fmuld	%f10,B1,%f32
	fand	%f18,NEGINF,%f18
	ld	[%fp+m1],%l1
	fmuld	%f14,%f12,%f14
	faddd	%f32,ONE,%f32
	sra	%l1,8,%o4	
	and	%o4,0xff0,%o4
	faddd	%f32,%f14,%f32
	ldd     [%g1+%o4],%f14
	add     %o4,8,%o4
	sra	%l1,20,%l1
	ldd     [%g1+%o4],%f30
	addcc	%l1,1021,%l1
	fmuld	%f10,%f32,%f10
	fmuld	%f10,%f14,%f10
	faddd	%f10,%f30,%f30
	faddd	%f30,%f14,%f30
	bge,pt	%icc,1f
! delay slot
	fpadd32	%f30,%f18,%f30
	fpadd32	%f30,BOUNDRY,%f30
	fmuld	%f30,TINY,%f30
1:
	st	%f30,[%o1]
	st	%f31,[%o1+4]

.endloop1:
	for	%f2,TWO96,%f2
	fmuld	%f0,INVLN2_256,%f4
	faddd	%f4,%f2,%f4
	fsubd	%f4,%f2,%f8
	fmuld	%f8,LN2_256H,%f2
	fsubd	%f0,%f2,%f0
	fmuld	%f8,LN2_256L,%f4
	fsubd	%f0,%f4,%f0
	fmuld	%f0,B3,%f2
	fdtoi	%f8,%f8
	st	%f8,[%fp+m0]
	fmuld	%f0,%f0,%f4
	faddd	%f2,B2,%f2
	fmuld	%f0,B1,%f32
	fand	%f8,NEGINF,%f8
	ld	[%fp+m0],%l0
	fmuld	%f4,%f2,%f4
	faddd	%f32,ONE,%f32
	sra	%l0,8,%o4	
	and	%o4,0xff0,%o4
	faddd	%f32,%f4,%f32
	ldd     [%g1+%o4],%f4
	add     %o4,8,%o4
	sra	%l0,20,%o3
	ldd     [%g1+%o4],%f30
	addcc	%o3,1021,%o3
	fmuld	%f0,%f32,%f0
	fmuld	%f0,%f4,%f0
	faddd	%f0,%f30,%f30
	faddd	%f30,%f4,%f30
	bge,pt	%icc,1f
! delay slot
	fpadd32	%f30,%f8,%f30
	fpadd32	%f30,BOUNDRY,%f30
	fmuld	%f30,TINY,%f30
1:
	st	%f30,[%o0]
	st	%f31,[%o0+4]

.endloop0:
	st	%f6,[%l3]
	st	%f7,[%l3+4]
	st	%f16,[%l4]
	st	%f17,[%l4+4]
	st	%f26,[%l5]
	st	%f27,[%l5+4]
	ret
	restore


.range0:
	cmp	%l0,%l6
	bl,a,pt	%icc,3f			! if x is tiny
! delay slot, annulled if branch not taken
	faddd	%f0,ONE,%f4

	cmp	%l0,%o5
	bg,pt	%icc,1f			! if x is huge, inf, nan
! delay slot
	nop

	fcmpd	%fcc0,%f0,THRESH
	fbg,a,pt %fcc0,3f		! if x is huge and positive
! delay slot, annulled if branch not taken
	fmuld	HUGE,HUGE,%f4

! x is near the extremes but within range; return to the loop
	addcc	%i0,-1,%i0
	add	%i3,%i4,%i3		! y += stridey
	ble,pn	%icc,.endloop1
! delay slot
	andn	%l1,%i5,%l1
	add	%i1,%i2,%i1		! x += stridex
	for	%f2,TWO96,%f2
	ba,pt	%icc,.loop1
! delay slot
	fmuld	%f0,INVLN2_256,%f4

1:
	cmp	%l0,%o7
	bl,pn	%icc,2f			! if x is finite
! delay slot
	nop
	fzero	%f4
	fcmpd	%fcc0,%f0,NEGINF
	fmovdne	%fcc0,%f0,%f4
	ba,pt	%icc,3f
	fmuld	%f4,%f4,%f4		! x*x or zero*zero
2:
	fmovd	HUGE,%f4
	fcmpd	%fcc0,%f0,ONE
	fmovdl	%fcc0,TINY,%f4
	fmuld	%f4,%f4,%f4		! huge*huge or tiny*tiny
3:
	st	%f4,[%o0]
	andn	%l1,%i5,%l0
	add	%i1,%i2,%i1		! x += stridex
	fmovd	%f10,%f0
	st	%f5,[%o0+4]
	addcc	%i0,-1,%i0
	bg,pt	%icc,.loop0
! delay slot
	add	%i3,%i4,%i3		! y += stridey
	ba,pt	%icc,.endloop0
! delay slot
	nop


.range1:
	cmp	%l1,%l6
	bl,a,pt	%icc,3f			! if x is tiny
! delay slot, annulled if branch not taken
	faddd	%f10,ONE,%f14

	cmp	%l1,%o5
	bg,pt	%icc,1f			! if x is huge, inf, nan
! delay slot
	nop

	fcmpd	%fcc0,%f10,THRESH
	fbg,a,pt %fcc0,3f		! if x is huge and positive
! delay slot, annulled if branch not taken
	fmuld	HUGE,HUGE,%f14

! x is near the extremes but within range; return to the loop
	addcc	%i0,-1,%i0
	add	%i3,%i4,%i3		! y += stridey
	ble,pn	%icc,.endloop2
! delay slot
	andn	%l2,%i5,%l2
	add	%i1,%i2,%i1		! x += stridex
	for	%f12,TWO96,%f12
	ba,pt	%icc,.loop2
! delay slot
	fmuld	%f10,INVLN2_256,%f14

1:
	cmp	%l1,%o7
	bl,pn	%icc,2f			! if x is finite
! delay slot
	nop
	fzero	%f14
	fcmpd	%fcc0,%f10,NEGINF
	fmovdne	%fcc0,%f10,%f14
	ba,pt	%icc,3f
	fmuld	%f14,%f14,%f14		! x*x or zero*zero
2:
	fmovd	HUGE,%f14
	fcmpd	%fcc0,%f10,ONE
	fmovdl	%fcc0,TINY,%f14
	fmuld	%f14,%f14,%f14		! huge*huge or tiny*tiny
3:
	st	%f14,[%o1]
	andn	%l2,%i5,%l1
	add	%i1,%i2,%i1		! x += stridex
	fmovd	%f20,%f10
	st	%f15,[%o1+4]
	addcc	%i0,-1,%i0
	bg,pt	%icc,.loop1
! delay slot
	add	%i3,%i4,%i3		! y += stridey
	ba,pt	%icc,.endloop1
! delay slot
	nop


.range2:
	cmp	%l2,%l6
	bl,a,pt	%icc,3f			! if x is tiny
! delay slot, annulled if branch not taken
	faddd	%f20,ONE,%f24

	cmp	%l2,%o5
	bg,pt	%icc,1f			! if x is huge, inf, nan
! delay slot
	nop

	fcmpd	%fcc0,%f20,THRESH
	fbg,a,pt %fcc0,3f		! if x is huge and positive
! delay slot, annulled if branch not taken
	fmuld	HUGE,HUGE,%f24

! x is near the extremes but within range; return to the loop
	ba,pt	%icc,.cont
! delay slot
	faddd	%f4,%f2,%f4

1:
	cmp	%l2,%o7
	bl,pn	%icc,2f			! if x is finite
! delay slot
	nop
	fzero	%f24
	fcmpd	%fcc0,%f20,NEGINF
	fmovdne	%fcc0,%f20,%f24
	ba,pt	%icc,3f
	fmuld	%f24,%f24,%f24		! x*x or zero*zero
2:
	fmovd	HUGE,%f24
	fcmpd	%fcc0,%f20,ONE
	fmovdl	%fcc0,TINY,%f24
	fmuld	%f24,%f24,%f24		! huge*huge or tiny*tiny
3:
	st	%f24,[%i3]
	st	%f25,[%i3+4]
	lda	[%i1]%asi,%l2		! preload next argument
	lda	[%i1]%asi,%f20
	lda	[%i1+4]%asi,%f21
	andn	%l2,%i5,%l2
	add	%i1,%i2,%i1		! x += stridex
	addcc	%i0,-1,%i0
	bg,pt	%icc,.loop2
! delay slot
	add	%i3,%i4,%i3		! y += stridey
	ba,pt	%icc,.endloop2
! delay slot
	nop

	SET_SIZE(__vexp)