summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles L. Dorian <cldorian@gmail.com>2010-06-30 14:44:27 -0700
committerCharles L. Dorian <cldorian@gmail.com>2010-06-30 14:44:27 -0700
commit16234d0566433f78d7c8fee2f6200685f5250a9b (patch)
tree140c9176dc8c481df69baec02f0e891e210da311
parentd9d5009aac5a61a08fd9e2125321197e4cf55976 (diff)
downloadgolang-16234d0566433f78d7c8fee2f6200685f5250a9b.tar.gz
math: amd64 versions of exp and fabs
Benchmark of exp to 28 ns/op from 64 ns/op, on 2.53GHz Intel Core 2 Duo. R=rsc CC=golang-dev http://codereview.appspot.com/1594041 Committer: Russ Cox <rsc@golang.org>
-rw-r--r--src/pkg/math/Makefile3
-rw-r--r--src/pkg/math/all_test.go18
-rw-r--r--src/pkg/math/exp.go5
-rw-r--r--src/pkg/math/exp2.go10
-rw-r--r--src/pkg/math/exp_amd64.s104
-rw-r--r--src/pkg/math/fabs_amd64.s12
6 files changed, 141 insertions, 11 deletions
diff --git a/src/pkg/math/Makefile b/src/pkg/math/Makefile
index 1447fc11d..a2d11e43d 100644
--- a/src/pkg/math/Makefile
+++ b/src/pkg/math/Makefile
@@ -7,6 +7,8 @@ include ../../Make.$(GOARCH)
TARG=math
OFILES_amd64=\
+ exp_amd64.$O\
+ fabs_amd64.$O\
fdim_amd64.$O\
sqrt_amd64.$O\
@@ -48,6 +50,7 @@ ALLGOFILES=\
copysign.go\
erf.go\
exp.go\
+ exp2.go\
expm1.go\
fabs.go\
fdim.go\
diff --git a/src/pkg/math/all_test.go b/src/pkg/math/all_test.go
index 9a4801b00..18a3f1b31 100644
--- a/src/pkg/math/all_test.go
+++ b/src/pkg/math/all_test.go
@@ -2343,9 +2343,9 @@ func BenchmarkExp2(b *testing.B) {
}
}
-func BenchmarkFloor(b *testing.B) {
+func BenchmarkFabs(b *testing.B) {
for i := 0; i < b.N; i++ {
- Floor(.5)
+ Fabs(.5)
}
}
@@ -2355,6 +2355,12 @@ func BenchmarkFdim(b *testing.B) {
}
}
+func BenchmarkFloor(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Floor(.5)
+ }
+}
+
func BenchmarkFmax(b *testing.B) {
for i := 0; i < b.N; i++ {
Fmax(10, 3)
@@ -2445,15 +2451,15 @@ func BenchmarkLogb(b *testing.B) {
}
}
-func BenchmarkLog10(b *testing.B) {
+func BenchmarkLog1p(b *testing.B) {
for i := 0; i < b.N; i++ {
- Log10(.5)
+ Log1p(.5)
}
}
-func BenchmarkLog1p(b *testing.B) {
+func BenchmarkLog10(b *testing.B) {
for i := 0; i < b.N; i++ {
- Log1p(.5)
+ Log10(.5)
}
}
diff --git a/src/pkg/math/exp.go b/src/pkg/math/exp.go
index b801d58ac..90409c341 100644
--- a/src/pkg/math/exp.go
+++ b/src/pkg/math/exp.go
@@ -139,8 +139,3 @@ func Exp(x float64) float64 {
// TODO(rsc): make sure Ldexp can handle boundary k
return Ldexp(y, k)
}
-
-// Exp2 returns 2**x, the base-2 exponential of x.
-//
-// Special cases are the same as Exp.
-func Exp2(x float64) float64 { return Exp(x * Ln2) }
diff --git a/src/pkg/math/exp2.go b/src/pkg/math/exp2.go
new file mode 100644
index 000000000..1e67f29eb
--- /dev/null
+++ b/src/pkg/math/exp2.go
@@ -0,0 +1,10 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+// Exp2 returns 2**x, the base-2 exponential of x.
+//
+// Special cases are the same as Exp.
+func Exp2(x float64) float64 { return Exp(x * Ln2) }
diff --git a/src/pkg/math/exp_amd64.s b/src/pkg/math/exp_amd64.s
new file mode 100644
index 000000000..844b5c923
--- /dev/null
+++ b/src/pkg/math/exp_amd64.s
@@ -0,0 +1,104 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The method is based on a paper by Naoki Shibata: "Efficient evaluation
+// methods of elementary functions suitable for SIMD computation", Proc.
+// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32
+// (May 2010). The paper is available at
+// http://www.springerlink.com/content/340228x165742104/
+//
+// The original code and the constants below are from the author's
+// implementation available at http://freshmeat.net/projects/sleef.
+// The README file says, "The software is in public domain.
+// You can use the software without any obligation."
+//
+// This code is a simplified version of the original.
+
+#define LN2 0.6931471805599453094172321214581766 // log_e(2)
+#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2
+#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2
+#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2
+
+// func Exp(x float64) float64
+TEXT ·Exp(SB),7,$0
+// test bits for not-finite
+ MOVQ x+0(FP), AX
+ MOVQ $0x7ff0000000000000, BX
+ ANDQ BX, AX
+ CMPQ BX, AX
+ JEQ not_finite
+ MOVSD x+0(FP), X0
+ MOVSD $LOG2E, X1
+ MULSD X0, X1
+ CVTTSD2SQ X1, BX // BX = exponent
+ CVTSQ2SD BX, X1
+ MOVSD $LN2U, X2
+ MULSD X1, X2
+ SUBSD X2, X0
+ MOVSD $LN2L, X2
+ MULSD X1, X2
+ SUBSD X2, X0
+ // reduce argument
+ MOVSD $0.0625, X1
+ MULSD X1, X0
+ // Taylor series evaluation
+ MOVSD $2.4801587301587301587e-5, X1
+ MULSD X0, X1
+ MOVSD $1.9841269841269841270e-4, X2
+ ADDSD X2, X1
+ MULSD X0, X1
+ MOVSD $1.3888888888888888889e-3, X2
+ ADDSD X2, X1
+ MULSD X0, X1
+ MOVSD $8.3333333333333333333e-3, X2
+ ADDSD X2, X1
+ MULSD X0, X1
+ MOVSD $4.1666666666666666667e-2, X2
+ ADDSD X2, X1
+ MULSD X0, X1
+ MOVSD $1.6666666666666666667e-1, X2
+ ADDSD X2, X1
+ MULSD X0, X1
+ MOVSD $0.5, X2
+ ADDSD X2, X1
+ MULSD X0, X1
+ MOVSD $1.0, X2
+ ADDSD X2, X1
+ MULSD X1, X0
+ MOVSD $2.0, X1
+ ADDSD X0, X1
+ MULSD X1, X0
+ MOVSD $2.0, X1
+ ADDSD X0, X1
+ MULSD X1, X0
+ MOVSD $2.0, X1
+ ADDSD X0, X1
+ MULSD X1, X0
+ MOVSD $2.0, X1
+ ADDSD X0, X1
+ MULSD X1, X0
+ MOVSD $1.0, X1
+ ADDSD X1, X0
+ // return ldexp(fr, exp)
+ MOVQ $0x3ff, AX // bias + 1
+ ADDQ AX, BX
+ MOVQ BX, X1
+ MOVQ $52, AX // shift
+ MOVQ AX, X2
+ PSLLQ X2, X1
+ MULSD X1, X0
+ MOVSD X0, r+8(FP)
+ RET
+not_finite:
+// test bits for -Inf
+ MOVQ x+0(FP), AX
+ MOVQ $0xfff0000000000000, BX
+ CMPQ BX, AX
+ JNE not_neginf
+ XORQ AX, AX
+ MOVQ AX, r+8(FP)
+ RET
+not_neginf:
+ MOVQ AX, r+8(FP)
+ RET
diff --git a/src/pkg/math/fabs_amd64.s b/src/pkg/math/fabs_amd64.s
new file mode 100644
index 000000000..8a9aedbd7
--- /dev/null
+++ b/src/pkg/math/fabs_amd64.s
@@ -0,0 +1,12 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// func Fabs(x float64) float64
+TEXT ·Fabs(SB),7,$0
+ MOVQ $(1<<63), BX
+ MOVQ BX, X0 // movsd $(-0.0), x0
+ MOVSD x+0(FP), X1
+ ANDNPD X1, X0
+ MOVSD X0, r+8(FP)
+ RET