summaryrefslogtreecommitdiff
path: root/usr/src/lib/libmvec/common/vis/__vlog.S
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libmvec/common/vis/__vlog.S')
-rw-r--r--usr/src/lib/libmvec/common/vis/__vlog.S671
1 files changed, 671 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/vis/__vlog.S b/usr/src/lib/libmvec/common/vis/__vlog.S
new file mode 100644
index 0000000000..9229323d7b
--- /dev/null
+++ b/usr/src/lib/libmvec/common/vis/__vlog.S
@@ -0,0 +1,671 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .file "__vlog.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 32
+TBL:
+ .word 0xbfd522ae, 0x0738a000
+ .word 0xbd2ebe70, 0x8164c759
+ .word 0xbfd3c252, 0x77333000
+ .word 0xbd183b54, 0xb606bd5c
+ .word 0xbfd26962, 0x1134e000
+ .word 0x3d31b61f, 0x10522625
+ .word 0xbfd1178e, 0x8227e000
+ .word 0xbd31ef78, 0xce2d07f2
+ .word 0xbfcf991c, 0x6cb3c000
+ .word 0x3d390d04, 0xcd7cc834
+ .word 0xbfcd1037, 0xf2656000
+ .word 0x3d084a7e, 0x75b6f6e4
+ .word 0xbfca93ed, 0x3c8ae000
+ .word 0x3d287243, 0x50562169
+ .word 0xbfc823c1, 0x6551a000
+ .word 0xbd1e0ddb, 0x9a631e83
+ .word 0xbfc5bf40, 0x6b544000
+ .word 0x3d127023, 0xeb68981c
+ .word 0xbfc365fc, 0xb015a000
+ .word 0x3d3fd3a0, 0xafb9691b
+ .word 0xbfc1178e, 0x8227e000
+ .word 0xbd21ef78, 0xce2d07f2
+ .word 0xbfbda727, 0x63844000
+ .word 0xbd1a8940, 0x1fa71733
+ .word 0xbfb9335e, 0x5d594000
+ .word 0xbd23115c, 0x3abd47da
+ .word 0xbfb4d311, 0x5d208000
+ .word 0x3cf53a25, 0x82f4e1ef
+ .word 0xbfb08598, 0xb59e4000
+ .word 0x3d17e5dd, 0x7009902c
+ .word 0xbfa894aa, 0x149f8000
+ .word 0xbd39a19a, 0x8be97661
+ .word 0xbfa0415d, 0x89e78000
+ .word 0x3d3dddc7, 0xf461c516
+ .word 0xbf902056, 0x58930000
+ .word 0xbd3611d2, 0x7c8e8417
+ .word 0x00000000, 0x00000000
+ .word 0x00000000, 0x00000000
+ .word 0x3f9f829b, 0x0e780000
+ .word 0x3d298026, 0x7c7e09e4
+ .word 0x3faf0a30, 0xc0110000
+ .word 0x3d48a998, 0x5f325c5c
+ .word 0x3fb6f0d2, 0x8ae58000
+ .word 0xbd34b464, 0x1b664613
+ .word 0x3fbe2707, 0x6e2b0000
+ .word 0xbd2a342c, 0x2af0003c
+ .word 0x3fc29552, 0xf8200000
+ .word 0xbd35b967, 0xf4471dfc
+ .word 0x3fc5ff30, 0x70a78000
+ .word 0x3d43d3c8, 0x73e20a07
+ .word 0x3fc9525a, 0x9cf44000
+ .word 0x3d46b476, 0x41307539
+ .word 0x3fcc8ff7, 0xc79a8000
+ .word 0x3d4a21ac, 0x25d81ef3
+ .word 0x3fcfb918, 0x6d5e4000
+ .word 0xbd0d572a, 0xab993c87
+ .word 0x3fd1675c, 0xababa000
+ .word 0x3d38380e, 0x731f55c4
+ .word 0x3fd2e8e2, 0xbae12000
+ .word 0xbd267b1e, 0x99b72bd8
+ .word 0x3fd4618b, 0xc21c6000
+ .word 0xbd13d82f, 0x484c84cc
+ .word 0x3fd5d1bd, 0xbf580000
+ .word 0x3d4394a1, 0x1b1c1ee4
+! constants:
+ .word 0x40000000,0x00000000
+ .word 0x3fe55555,0x555571da
+ .word 0x3fd99999,0x8702be3a
+ .word 0x3fd24af7,0x3f4569b1
+ .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20
+ .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20
+ .word 0xffff8000,0x00000000
+ .word 0x43200000
+ .word 0xfff00000
+ .word 0xc0194000
+ .word 0x4000
+
+#define two 0x200
+#define A1 0x208
+#define A2 0x210
+#define A3 0x218
+#define ln2hi 0x220
+#define ln2lo 0x228
+#define mask 0x230
+#define ox43200000 0x238
+#define oxfff00000 0x23c
+#define oxc0194000 0x240
+#define ox4000 0x244
+
+! local storage indices
+
+#define jnk STACK_BIAS-0x8
+#define tmp2 STACK_BIAS-0x10
+#define tmp1 STACK_BIAS-0x18
+#define tmp0 STACK_BIAS-0x20
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5
+
+! g1 TBL
+
+! l0 j0
+! l1 j1
+! l2 j2
+! l3
+! l4 0x94000
+! l5
+! l6 0x000fffff
+! l7 0x7ff00000
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3
+! o4
+! o5
+! o7
+
+! f0 u0,q0
+! f2 v0,(two-v0)-u0,z0
+! f4 n0,f0,q0
+! f6 s0
+! f8 q
+! f10 u1,q1
+! f12 v1,(two-v1)-u1,z1
+! f14 n1,f1,q1
+! f16 s1
+! f18 t
+! f20 u2,q2
+! f22 v2,(two-v2)-u2,q2
+! f24 n2,f2,q2
+! f26 s2
+! f28 0xfff00000
+! f29 0x43200000
+! f30 0x4000
+! f31 0xc0194000
+! f32 t0
+! f34 h0,f0-(c0-h0)
+! f36 c0
+! f38 A1
+! f40 two
+! f42 t1
+! f44 h1,f1-(c1-h1)
+! f46 c1
+! f48 A2
+! f50 0xffff8000...
+! f52 t2
+! f54 h2,f2-(c2-h2)
+! f56 c2
+! f58 A3
+! f60 ln2hi
+! f62 ln2lo
+
+ ENTRY(__vlog)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,TBL,o0)
+ mov %o0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+ sethi %hi(0x94000),%l4
+ sethi %hi(0x000fffff),%l6
+ or %l6,%lo(0x000fffff),%l6
+ sethi %hi(0x7ff00000),%l7
+ ldd [%g1+two],%f40
+ ldd [%g1+A1],%f38
+ ldd [%g1+A2],%f48
+ ldd [%g1+A3],%f58
+ ldd [%g1+ln2hi],%f60
+ ldd [%g1+ln2lo],%f62
+ ldd [%g1+mask],%f50
+ ld [%g1+ox43200000],%f29
+ ld [%g1+oxfff00000],%f28
+ ld [%g1+oxc0194000],%f31
+ ld [%g1+ox4000],%f30
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,jnk,%o0 ! precondition loop
+ add %fp,jnk,%o1
+ add %fp,jnk,%o2
+ fzero %f2
+ fzero %f6
+ fzero %f18
+ fzero %f36
+ fzero %f12
+ fzero %f14
+ fzero %f16
+ fzero %f42
+ fzero %f44
+ fzero %f46
+ std %f46,[%fp+tmp1]
+ fzero %f24
+ fzero %f26
+ fzero %f52
+ fzero %f54
+ std %f54,[%fp+tmp2]
+ sub %i3,%i4,%i3
+ ld [%i1],%l0 ! ix
+ ld [%i1],%f0 ! u.l[0] = *x
+ ba .loop0
+ ld [%i1+4],%f1 ! u.l[1] = *(1+x)
+
+ .align 16
+! -- 16 byte aligned
+.loop0:
+ sub %l0,%l7,%o3
+ sub %l6,%l0,%o4
+ fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000
+ fmuld %f6,%f2,%f8 ! (previous iteration)
+
+ andcc %o3,%o4,%o4
+ bge,pn %icc,.range0 ! ix <= 0x000fffff or >= 0x7ff00000
+! delay slot
+ fands %f4,%f28,%f4
+
+ add %i1,%i2,%i1 ! x += stridex
+ add %i3,%i4,%i3 ! y += stridey
+ fpsub32s %f0,%f4,%f0 ! u.l[0] -= n
+
+.cont0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ add %l0,%l4,%l0 ! j = ix + 0x94000
+ fpadd32s %f0,%f30,%f2 ! v.l[0] = u.l[0] + 0x4000
+
+ lda [%i1]%asi,%f10
+ srl %l0,11,%l0 ! j = (j >> 11) & 0x1f0
+ fand %f2,%f50,%f2 ! v.l &= 0xffff8000...
+
+ lda [%i1+4]%asi,%f11
+ and %l0,0x1f0,%l0
+ fitod %f4,%f32 ! (double) n
+
+ add %l0,8,%l3
+ fsubd %f0,%f2,%f4 ! f = u.d - v.d
+
+ faddd %f0,%f2,%f6 ! s = f / (u.d + v.d)
+
+ fsubd %f40,%f2,%f2 ! two - v.d
+ fmuld %f32,%f60,%f34 ! h = n * ln2hi + TBL[j]
+
+ faddd %f8,%f18,%f8 ! y = c + (t + q)
+ fmuld %f32,%f62,%f32 ! t = n * ln2lo + TBL[j+1]
+
+ fdivd %f4,%f6,%f6
+
+ faddd %f54,%f24,%f56 ! c = h + f
+ fmuld %f26,%f26,%f22 ! z = s * s
+
+ faddd %f8,%f36,%f8
+ st %f8,[%o0]
+
+ st %f9,[%o0+4]
+ mov %i3,%o0
+ faddd %f14,%f38,%f14
+
+ fsubd %f56,%f54,%f54 ! t += f - (c - h)
+ fmuld %f22,%f58,%f20 ! q = ...
+
+ fsubd %f2,%f0,%f2 ! (two - v.d) - u.d
+ ldd [%g1+%l0],%f36
+
+ faddd %f42,%f44,%f18
+ fmuld %f12,%f14,%f14
+ ldd [%fp+tmp1],%f12
+
+ faddd %f20,%f48,%f20
+ nop
+
+ faddd %f34,%f36,%f34
+ ldd [%g1+%l3],%f0
+
+ faddd %f14,%f12,%f12
+
+ fsubd %f24,%f54,%f54
+ fmuld %f22,%f20,%f24
+
+ std %f2,[%fp+tmp0]
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop0
+! delay slot
+ faddd %f32,%f0,%f32
+
+! -- 16 byte aligned
+.loop1:
+ sub %l1,%l7,%o3
+ sub %l6,%l1,%o4
+ fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000
+ fmuld %f16,%f12,%f8 ! (previous iteration)
+
+ andcc %o3,%o4,%o4
+ bge,pn %icc,.range1 ! ix <= 0x000fffff or >= 0x7ff00000
+! delay slot
+ fands %f14,%f28,%f14
+
+ add %i1,%i2,%i1 ! x += stridex
+ add %i3,%i4,%i3 ! y += stridey
+ fpsub32s %f10,%f14,%f10 ! u.l[0] -= n
+
+.cont1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ add %l1,%l4,%l1 ! j = ix + 0x94000
+ fpadd32s %f10,%f30,%f12 ! v.l[0] = u.l[0] + 0x4000
+
+ lda [%i1]%asi,%f20
+ srl %l1,11,%l1 ! j = (j >> 11) & 0x1f0
+ fand %f12,%f50,%f12 ! v.l &= 0xffff8000...
+
+ lda [%i1+4]%asi,%f21
+ and %l1,0x1f0,%l1
+ fitod %f14,%f42 ! (double) n
+
+ add %l1,8,%l3
+ fsubd %f10,%f12,%f14 ! f = u.d - v.d
+
+ faddd %f10,%f12,%f16 ! s = f / (u.d + v.d)
+
+ fsubd %f40,%f12,%f12 ! two - v.d
+ fmuld %f42,%f60,%f44 ! h = n * ln2hi + TBL[j]
+
+ faddd %f8,%f18,%f8 ! y = c + (t + q)
+ fmuld %f42,%f62,%f42 ! t = n * ln2lo + TBL[j+1]
+
+ fdivd %f14,%f16,%f16
+
+ faddd %f34,%f4,%f36 ! c = h + f
+ fmuld %f6,%f6,%f2 ! z = s * s
+
+ faddd %f8,%f46,%f8
+ st %f8,[%o1]
+
+ st %f9,[%o1+4]
+ mov %i3,%o1
+ faddd %f24,%f38,%f24
+
+ fsubd %f36,%f34,%f34 ! t += f - (c - h)
+ fmuld %f2,%f58,%f0 ! q = ...
+
+ fsubd %f12,%f10,%f12 ! (two - v.d) - u.d
+ ldd [%g1+%l1],%f46
+
+ faddd %f52,%f54,%f18
+ fmuld %f22,%f24,%f24
+ ldd [%fp+tmp2],%f22
+
+ faddd %f0,%f48,%f0
+ nop
+
+ faddd %f44,%f46,%f44
+ ldd [%g1+%l3],%f10
+
+ faddd %f24,%f22,%f22
+
+ fsubd %f4,%f34,%f34
+ fmuld %f2,%f0,%f4
+
+ std %f12,[%fp+tmp1]
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop1
+! delay slot
+ faddd %f42,%f10,%f42
+
+! -- 16 byte aligned
+.loop2:
+ sub %l2,%l7,%o3
+ sub %l6,%l2,%o4
+ fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000
+ fmuld %f26,%f22,%f8 ! (previous iteration)
+
+ andcc %o3,%o4,%o4
+ bge,pn %icc,.range2 ! ix <= 0x000fffff or >= 0x7ff00000
+! delay slot
+ fands %f24,%f28,%f24
+
+ add %i1,%i2,%i1 ! x += stridex
+ add %i3,%i4,%i3 ! y += stridey
+ fpsub32s %f20,%f24,%f20 ! u.l[0] -= n
+
+.cont2:
+ lda [%i1]%asi,%l0 ! preload next argument
+ add %l2,%l4,%l2 ! j = ix + 0x94000
+ fpadd32s %f20,%f30,%f22 ! v.l[0] = u.l[0] + 0x4000
+
+ lda [%i1]%asi,%f0
+ srl %l2,11,%l2 ! j = (j >> 11) & 0x1f0
+ fand %f22,%f50,%f22 ! v.l &= 0xffff8000...
+
+ lda [%i1+4]%asi,%f1
+ and %l2,0x1f0,%l2
+ fitod %f24,%f52 ! (double) n
+
+ add %l2,8,%l3
+ fsubd %f20,%f22,%f24 ! f = u.d - v.d
+
+ faddd %f20,%f22,%f26 ! s = f / (u.d + v.d)
+
+ fsubd %f40,%f22,%f22 ! two - v.d
+ fmuld %f52,%f60,%f54 ! h = n * ln2hi + TBL[j]
+
+ faddd %f8,%f18,%f8 ! y = c + (t + q)
+ fmuld %f52,%f62,%f52 ! t = n * ln2lo + TBL[j+1]
+
+ fdivd %f24,%f26,%f26
+
+ faddd %f44,%f14,%f46 ! c = h + f
+ fmuld %f16,%f16,%f12 ! z = s * s
+
+ faddd %f8,%f56,%f8
+ st %f8,[%o2]
+
+ st %f9,[%o2+4]
+ mov %i3,%o2
+ faddd %f4,%f38,%f4
+
+ fsubd %f46,%f44,%f44 ! t += f - (c - h)
+ fmuld %f12,%f58,%f10 ! q = ...
+
+ fsubd %f22,%f20,%f22 ! (two - v.d) - u.d
+ ldd [%g1+%l2],%f56
+
+ faddd %f32,%f34,%f18
+ fmuld %f2,%f4,%f4
+ ldd [%fp+tmp0],%f2
+
+ faddd %f10,%f48,%f10
+ nop
+
+ faddd %f54,%f56,%f54
+ ldd [%g1+%l3],%f20
+
+ faddd %f4,%f2,%f2
+
+ fsubd %f14,%f44,%f44
+ fmuld %f12,%f10,%f14
+
+ std %f22,[%fp+tmp2]
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ faddd %f52,%f20,%f52
+
+
+! Once we get to the last element, we loop three more times to finish
+! the computations in progress. This means we will load past the end
+! of the argument vector, but since we use non-faulting loads and never
+! use the data, the only potential problem is cache miss. (Note that
+! when the argument is 2, the only exception that occurs in the compu-
+! tation is an inexact result in the final addition, and we break out
+! of the "extra" iterations before then.)
+.endloop2:
+ sethi %hi(0x40000000),%l0 ! "next argument" = two
+ cmp %i0,-3
+ bg,a,pt %icc,.loop0
+! delay slot
+ fmovd %f40,%f0
+ ret
+ restore
+
+ .align 16
+.endloop0:
+ sethi %hi(0x40000000),%l1 ! "next argument" = two
+ cmp %i0,-3
+ bg,a,pt %icc,.loop1
+! delay slot
+ fmovd %f40,%f10
+ ret
+ restore
+
+ .align 16
+.endloop1:
+ sethi %hi(0x40000000),%l2 ! "next argument" = two
+ cmp %i0,-3
+ bg,a,pt %icc,.loop2
+! delay slot
+ fmovd %f40,%f20
+ ret
+ restore
+
+
+ .align 16
+.range0:
+ cmp %l0,%l7
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [%i1+4],%o5
+ fxtod %f0,%f0 ! scale by 2**1074 w/o trapping
+ st %f0,[%fp+tmp0]
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l0,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands %f4,%f28,%f4
+ fpsub32s %f0,%f4,%f0 ! u.l[0] -= n
+ ld [%fp+tmp0],%l0
+ ba,pt %icc,.cont0
+! delay slot
+ fpsub32s %f4,%f29,%f4 ! n -= 0x43200000
+1:
+ fdivs %f29,%f1,%f4 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st %f28,[%i3] ! store -inf
+2:
+ sll %l0,1,%l0 ! lop off sign bit
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l0,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fabsd %f0,%f4 ! *y = (x + |x|) * inf
+ faddd %f0,%f4,%f0
+ fand %f28,%f50,%f4
+ fnegd %f4,%f4
+ fmuld %f0,%f4,%f0
+ st %f0,[%i3]
+3:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop2
+! delay slot
+ st %f1,[%i3+4]
+ ld [%i1],%l0 ! get next argument
+ ld [%i1],%f0
+ ba,pt %icc,.loop0
+! delay slot
+ ld [%i1+4],%f1
+
+
+ .align 16
+.range1:
+ cmp %l1,%l7
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [%i1+4],%o5
+ fxtod %f10,%f10 ! scale by 2**1074 w/o trapping
+ st %f10,[%fp+tmp1]
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l1,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands %f14,%f28,%f14
+ fpsub32s %f10,%f14,%f10 ! u.l[0] -= n
+ ld [%fp+tmp1],%l1
+ ba,pt %icc,.cont1
+! delay slot
+ fpsub32s %f14,%f29,%f14 ! n -= 0x43200000
+1:
+ fdivs %f29,%f11,%f14 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st %f28,[%i3] ! store -inf
+2:
+ sll %l1,1,%l1 ! lop off sign bit
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l1,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fabsd %f10,%f14 ! *y = (x + |x|) * inf
+ faddd %f10,%f14,%f10
+ fand %f28,%f50,%f14
+ fnegd %f14,%f14
+ fmuld %f10,%f14,%f10
+ st %f10,[%i3]
+3:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop0
+! delay slot
+ st %f11,[%i3+4]
+ ld [%i1],%l1 ! get next argument
+ ld [%i1],%f10
+ ba,pt %icc,.loop1
+! delay slot
+ ld [%i1+4],%f11
+
+
+ .align 16
+.range2:
+ cmp %l2,%l7
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [%i1+4],%o5
+ fxtod %f20,%f20 ! scale by 2**1074 w/o trapping
+ st %f20,[%fp+tmp2]
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l2,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands %f24,%f28,%f24
+ fpsub32s %f20,%f24,%f20 ! u.l[0] -= n
+ ld [%fp+tmp2],%l2
+ ba,pt %icc,.cont2
+! delay slot
+ fpsub32s %f24,%f29,%f24 ! n -= 0x43200000
+1:
+ fdivs %f29,%f21,%f24 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st %f28,[%i3] ! store -inf
+2:
+ sll %l2,1,%l2 ! lop off sign bit
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l2,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fabsd %f20,%f24 ! *y = (x + |x|) * inf
+ faddd %f20,%f24,%f20
+ fand %f28,%f50,%f24
+ fnegd %f24,%f24
+ fmuld %f20,%f24,%f20
+ st %f20,[%i3]
+3:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop1
+! delay slot
+ st %f21,[%i3+4]
+ ld [%i1],%l2 ! get next argument
+ ld [%i1],%f20
+ ba,pt %icc,.loop2
+! delay slot
+ ld [%i1+4],%f21
+
+ SET_SIZE(__vlog)
+