diff options
author | Charles L. Dorian <cldorian@gmail.com> | 2010-06-30 14:44:27 -0700 |
---|---|---|
committer | Charles L. Dorian <cldorian@gmail.com> | 2010-06-30 14:44:27 -0700 |
commit | 16234d0566433f78d7c8fee2f6200685f5250a9b (patch) | |
tree | 140c9176dc8c481df69baec02f0e891e210da311 | |
parent | d9d5009aac5a61a08fd9e2125321197e4cf55976 (diff) | |
download | golang-16234d0566433f78d7c8fee2f6200685f5250a9b.tar.gz |
math: amd64 versions of exp and fabs
Benchmark of exp to 28 ns/op from 64 ns/op,
on 2.53GHz Intel Core 2 Duo.
R=rsc
CC=golang-dev
http://codereview.appspot.com/1594041
Committer: Russ Cox <rsc@golang.org>
-rw-r--r-- | src/pkg/math/Makefile | 3 | ||||
-rw-r--r-- | src/pkg/math/all_test.go | 18 | ||||
-rw-r--r-- | src/pkg/math/exp.go | 5 | ||||
-rw-r--r-- | src/pkg/math/exp2.go | 10 | ||||
-rw-r--r-- | src/pkg/math/exp_amd64.s | 104 | ||||
-rw-r--r-- | src/pkg/math/fabs_amd64.s | 12 |
6 files changed, 141 insertions, 11 deletions
diff --git a/src/pkg/math/Makefile b/src/pkg/math/Makefile index 1447fc11d..a2d11e43d 100644 --- a/src/pkg/math/Makefile +++ b/src/pkg/math/Makefile @@ -7,6 +7,8 @@ include ../../Make.$(GOARCH) TARG=math OFILES_amd64=\ + exp_amd64.$O\ + fabs_amd64.$O\ fdim_amd64.$O\ sqrt_amd64.$O\ @@ -48,6 +50,7 @@ ALLGOFILES=\ copysign.go\ erf.go\ exp.go\ + exp2.go\ expm1.go\ fabs.go\ fdim.go\ diff --git a/src/pkg/math/all_test.go b/src/pkg/math/all_test.go index 9a4801b00..18a3f1b31 100644 --- a/src/pkg/math/all_test.go +++ b/src/pkg/math/all_test.go @@ -2343,9 +2343,9 @@ func BenchmarkExp2(b *testing.B) { } } -func BenchmarkFloor(b *testing.B) { +func BenchmarkFabs(b *testing.B) { for i := 0; i < b.N; i++ { - Floor(.5) + Fabs(.5) } } @@ -2355,6 +2355,12 @@ func BenchmarkFdim(b *testing.B) { } } +func BenchmarkFloor(b *testing.B) { + for i := 0; i < b.N; i++ { + Floor(.5) + } +} + func BenchmarkFmax(b *testing.B) { for i := 0; i < b.N; i++ { Fmax(10, 3) @@ -2445,15 +2451,15 @@ func BenchmarkLogb(b *testing.B) { } } -func BenchmarkLog10(b *testing.B) { +func BenchmarkLog1p(b *testing.B) { for i := 0; i < b.N; i++ { - Log10(.5) + Log1p(.5) } } -func BenchmarkLog1p(b *testing.B) { +func BenchmarkLog10(b *testing.B) { for i := 0; i < b.N; i++ { - Log1p(.5) + Log10(.5) } } diff --git a/src/pkg/math/exp.go b/src/pkg/math/exp.go index b801d58ac..90409c341 100644 --- a/src/pkg/math/exp.go +++ b/src/pkg/math/exp.go @@ -139,8 +139,3 @@ func Exp(x float64) float64 { // TODO(rsc): make sure Ldexp can handle boundary k return Ldexp(y, k) } - -// Exp2 returns 2**x, the base-2 exponential of x. -// -// Special cases are the same as Exp. -func Exp2(x float64) float64 { return Exp(x * Ln2) } diff --git a/src/pkg/math/exp2.go b/src/pkg/math/exp2.go new file mode 100644 index 000000000..1e67f29eb --- /dev/null +++ b/src/pkg/math/exp2.go @@ -0,0 +1,10 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Exp2 returns 2**x, the base-2 exponential of x. +// +// Special cases are the same as Exp. +func Exp2(x float64) float64 { return Exp(x * Ln2) } diff --git a/src/pkg/math/exp_amd64.s b/src/pkg/math/exp_amd64.s new file mode 100644 index 000000000..844b5c923 --- /dev/null +++ b/src/pkg/math/exp_amd64.s @@ -0,0 +1,104 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The method is based on a paper by Naoki Shibata: "Efficient evaluation +// methods of elementary functions suitable for SIMD computation", Proc. +// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 +// (May 2010). The paper is available at +// http://www.springerlink.com/content/340228x165742104/ +// +// The original code and the constants below are from the author's +// implementation available at http://freshmeat.net/projects/sleef. +// The README file says, "The software is in public domain. +// You can use the software without any obligation." +// +// This code is a simplified version of the original. + +#define LN2 0.6931471805599453094172321214581766 // log_e(2) +#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2 +#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2 +#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2 + +// func Exp(x float64) float64 +TEXT ·Exp(SB),7,$0 +// test bits for not-finite + MOVQ x+0(FP), AX + MOVQ $0x7ff0000000000000, BX + ANDQ BX, AX + CMPQ BX, AX + JEQ not_finite + MOVSD x+0(FP), X0 + MOVSD $LOG2E, X1 + MULSD X0, X1 + CVTTSD2SQ X1, BX // BX = exponent + CVTSQ2SD BX, X1 + MOVSD $LN2U, X2 + MULSD X1, X2 + SUBSD X2, X0 + MOVSD $LN2L, X2 + MULSD X1, X2 + SUBSD X2, X0 + // reduce argument + MOVSD $0.0625, X1 + MULSD X1, X0 + // Taylor series evaluation + MOVSD $2.4801587301587301587e-5, X1 + MULSD X0, X1 + MOVSD $1.9841269841269841270e-4, X2 + ADDSD X2, X1 + MULSD X0, X1 + MOVSD $1.3888888888888888889e-3, X2 + ADDSD X2, X1 + MULSD X0, X1 + MOVSD $8.3333333333333333333e-3, X2 + ADDSD X2, X1 + MULSD X0, X1 + MOVSD $4.1666666666666666667e-2, X2 + ADDSD X2, X1 + MULSD X0, X1 + MOVSD $1.6666666666666666667e-1, X2 + ADDSD X2, X1 + MULSD X0, X1 + MOVSD $0.5, X2 + ADDSD X2, X1 + MULSD X0, X1 + MOVSD $1.0, X2 + ADDSD X2, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $1.0, X1 + ADDSD X1, X0 + // return ldexp(fr, exp) + MOVQ $0x3ff, AX // bias + 1 + ADDQ AX, BX + MOVQ BX, X1 + MOVQ $52, AX // shift + MOVQ AX, X2 + PSLLQ X2, X1 + MULSD X1, X0 + MOVSD X0, r+8(FP) + RET +not_finite: +// test bits for -Inf + MOVQ x+0(FP), AX + MOVQ $0xfff0000000000000, BX + CMPQ BX, AX + JNE not_neginf + XORQ AX, AX + MOVQ AX, r+8(FP) + RET +not_neginf: + MOVQ AX, r+8(FP) + RET diff --git a/src/pkg/math/fabs_amd64.s b/src/pkg/math/fabs_amd64.s new file mode 100644 index 000000000..8a9aedbd7 --- /dev/null +++ b/src/pkg/math/fabs_amd64.s @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// func Fabs(x float64) float64 +TEXT ·Fabs(SB),7,$0 + MOVQ $(1<<63), BX + MOVQ BX, X0 // movsd $(-0.0), x0 + MOVSD x+0(FP), X1 + ANDNPD X1, X0 + MOVSD X0, r+8(FP) + RET |