summaryrefslogtreecommitdiff
path: root/src/pkg/math
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/math')
-rw-r--r--src/pkg/math/abs_arm.s7
-rw-r--r--src/pkg/math/acosh.go2
-rw-r--r--src/pkg/math/all_test.go22
-rw-r--r--src/pkg/math/asinh.go2
-rw-r--r--src/pkg/math/atan.go103
-rw-r--r--src/pkg/math/atanh.go2
-rw-r--r--src/pkg/math/big/arith_386.s18
-rw-r--r--src/pkg/math/big/arith_amd64.s331
-rw-r--r--src/pkg/math/big/arith_arm.s18
-rw-r--r--src/pkg/math/big/arith_test.go86
-rw-r--r--src/pkg/math/big/calibrate_test.go42
-rw-r--r--src/pkg/math/big/gcd_test.go47
-rw-r--r--src/pkg/math/big/int.go124
-rw-r--r--src/pkg/math/big/int_test.go205
-rw-r--r--src/pkg/math/big/nat.go309
-rw-r--r--src/pkg/math/big/nat_test.go135
-rw-r--r--src/pkg/math/big/rat.go269
-rw-r--r--src/pkg/math/big/rat_test.go499
-rw-r--r--src/pkg/math/bits.go2
-rw-r--r--src/pkg/math/cbrt.go2
-rw-r--r--src/pkg/math/copysign.go2
-rw-r--r--src/pkg/math/dim_amd64.s2
-rw-r--r--src/pkg/math/erf.go4
-rw-r--r--src/pkg/math/floor_amd64.s70
-rw-r--r--src/pkg/math/frexp_386.s12
-rw-r--r--src/pkg/math/gamma.go27
-rw-r--r--src/pkg/math/hypot.go2
-rw-r--r--src/pkg/math/hypot_386.s46
-rw-r--r--src/pkg/math/hypot_amd64.s14
-rw-r--r--src/pkg/math/ldexp_386.s6
-rw-r--r--src/pkg/math/log10.go3
-rw-r--r--src/pkg/math/log_amd64.s8
-rw-r--r--src/pkg/math/logb.go4
-rw-r--r--src/pkg/math/modf_386.s14
-rw-r--r--src/pkg/math/rand/example_test.go69
-rw-r--r--src/pkg/math/rand/rand_test.go13
-rw-r--r--src/pkg/math/remainder.go2
-rw-r--r--src/pkg/math/sincos.go2
-rw-r--r--src/pkg/math/sincos_386.s8
-rw-r--r--src/pkg/math/sincos_amd64.s2
-rw-r--r--src/pkg/math/tanh.go89
41 files changed, 2063 insertions, 561 deletions
diff --git a/src/pkg/math/abs_arm.s b/src/pkg/math/abs_arm.s
index 23f6a2a2d..37a1459fe 100644
--- a/src/pkg/math/abs_arm.s
+++ b/src/pkg/math/abs_arm.s
@@ -3,4 +3,9 @@
// license that can be found in the LICENSE file.
TEXT ·Abs(SB),7,$0
- B ·abs(SB)
+ MOVW x+0(FP), R0
+ MOVW x+4(FP), R1
+ AND $((1<<31)-1), R1
+ MOVW R0, r+8(FP)
+ MOVW R1, r+12(FP)
+ RET
diff --git a/src/pkg/math/acosh.go b/src/pkg/math/acosh.go
index c6c8645e1..e394008b0 100644
--- a/src/pkg/math/acosh.go
+++ b/src/pkg/math/acosh.go
@@ -33,7 +33,7 @@ package math
// acosh(NaN) is NaN without signal.
//
-// Acosh(x) calculates the inverse hyperbolic cosine of x.
+// Acosh returns the inverse hyperbolic cosine of x.
//
// Special cases are:
// Acosh(+Inf) = +Inf
diff --git a/src/pkg/math/all_test.go b/src/pkg/math/all_test.go
index ed66a42fb..0d8b10f67 100644
--- a/src/pkg/math/all_test.go
+++ b/src/pkg/math/all_test.go
@@ -1128,11 +1128,11 @@ var vfgammaSC = []float64{
NaN(),
}
var gammaSC = []float64{
+ NaN(),
+ NaN(),
Inf(-1),
Inf(1),
Inf(1),
- Inf(1),
- Inf(1),
NaN(),
}
@@ -1693,6 +1693,17 @@ func alike(a, b float64) bool {
return false
}
+func TestNaN(t *testing.T) {
+ f64 := NaN()
+ if f64 == f64 {
+ t.Fatalf("NaN() returns %g, expected NaN", f64)
+ }
+ f32 := float32(f64)
+ if f32 == f32 {
+ t.Fatalf("float32(NaN()) is %g, expected NaN", f32)
+ }
+}
+
func TestAcos(t *testing.T) {
for i := 0; i < len(vf); i++ {
a := vf[i] / 10
@@ -2270,6 +2281,13 @@ func TestLog2(t *testing.T) {
t.Errorf("Log2(%g) = %g, want %g", vflogSC[i], f, logSC[i])
}
}
+ for i := -1074; i <= 1023; i++ {
+ f := Ldexp(1, i)
+ l := Log2(f)
+ if l != float64(i) {
+ t.Errorf("Log2(2**%d) = %g, want %d", i, l, i)
+ }
+ }
}
func TestModf(t *testing.T) {
diff --git a/src/pkg/math/asinh.go b/src/pkg/math/asinh.go
index 0defbb9be..ff2de0215 100644
--- a/src/pkg/math/asinh.go
+++ b/src/pkg/math/asinh.go
@@ -30,7 +30,7 @@ package math
// := sign(x)*log1p(|x| + x**2/(1 + sqrt(1+x**2)))
//
-// Asinh(x) calculates the inverse hyperbolic sine of x.
+// Asinh returns the inverse hyperbolic sine of x.
//
// Special cases are:
// Asinh(±0) = ±0
diff --git a/src/pkg/math/atan.go b/src/pkg/math/atan.go
index d424a2be4..c107d388d 100644
--- a/src/pkg/math/atan.go
+++ b/src/pkg/math/atan.go
@@ -6,51 +6,92 @@ package math
/*
Floating-point arctangent.
-
- Atan returns the value of the arctangent of its
- argument in the range [-pi/2,pi/2].
- There are no error returns.
- Coefficients are #5077 from Hart & Cheney. (19.56D)
*/
-// xatan evaluates a series valid in the
-// range [-0.414...,+0.414...]. (tan(pi/8))
-func xatan(arg float64) float64 {
+// The original C code, the long comment, and the constants below were
+// from http://netlib.sandia.gov/cephes/cmath/atan.c, available from
+// http://www.netlib.org/cephes/cmath.tgz.
+// The go code is a version of the original C.
+//
+// atan.c
+// Inverse circular tangent (arctangent)
+//
+// SYNOPSIS:
+// double x, y, atan();
+// y = atan( x );
+//
+// DESCRIPTION:
+// Returns radian angle between -pi/2 and +pi/2 whose tangent is x.
+//
+// Range reduction is from three intervals into the interval from zero to 0.66.
+// The approximant uses a rational function of degree 4/5 of the form
+// x + x**3 P(x)/Q(x).
+//
+// ACCURACY:
+// Relative error:
+// arithmetic domain # trials peak rms
+// DEC -10, 10 50000 2.4e-17 8.3e-18
+// IEEE -10, 10 10^6 1.8e-16 5.0e-17
+//
+// Cephes Math Library Release 2.8: June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+// Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+// The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+// Stephen L. Moshier
+// moshier@na-net.ornl.gov
+
+// xatan evaluates a series valid in the range [0, 0.66].
+func xatan(x float64) float64 {
const (
- P4 = .161536412982230228262e2
- P3 = .26842548195503973794141e3
- P2 = .11530293515404850115428136e4
- P1 = .178040631643319697105464587e4
- P0 = .89678597403663861959987488e3
- Q4 = .5895697050844462222791e2
- Q3 = .536265374031215315104235e3
- Q2 = .16667838148816337184521798e4
- Q1 = .207933497444540981287275926e4
- Q0 = .89678597403663861962481162e3
+ P0 = -8.750608600031904122785e-01
+ P1 = -1.615753718733365076637e+01
+ P2 = -7.500855792314704667340e+01
+ P3 = -1.228866684490136173410e+02
+ P4 = -6.485021904942025371773e+01
+ Q0 = +2.485846490142306297962e+01
+ Q1 = +1.650270098316988542046e+02
+ Q2 = +4.328810604912902668951e+02
+ Q3 = +4.853903996359136964868e+02
+ Q4 = +1.945506571482613964425e+02
)
- sq := arg * arg
- value := ((((P4*sq+P3)*sq+P2)*sq+P1)*sq + P0)
- value = value / (((((sq+Q4)*sq+Q3)*sq+Q2)*sq+Q1)*sq + Q0)
- return value * arg
+ z := x * x
+ z = z * ((((P0*z+P1)*z+P2)*z+P3)*z + P4) / (((((z+Q0)*z+Q1)*z+Q2)*z+Q3)*z + Q4)
+ z = x*z + x
+ return z
}
// satan reduces its argument (known to be positive)
-// to the range [0,0.414...] and calls xatan.
-func satan(arg float64) float64 {
- if arg < Sqrt2-1 {
- return xatan(arg)
+// to the range [0, 0.66] and calls xatan.
+func satan(x float64) float64 {
+ const (
+ Morebits = 6.123233995736765886130e-17 // pi/2 = PIO2 + Morebits
+ Tan3pio8 = 2.41421356237309504880 // tan(3*pi/8)
+ )
+ if x <= 0.66 {
+ return xatan(x)
}
- if arg > Sqrt2+1 {
- return Pi/2 - xatan(1/arg)
+ if x > Tan3pio8 {
+ return Pi/2 - xatan(1/x) + Morebits
}
- return Pi/4 + xatan((arg-1)/(arg+1))
+ return Pi/4 + xatan((x-1)/(x+1)) + 0.5*Morebits
}
// Atan returns the arctangent of x.
//
// Special cases are:
-// Atan(±0) = ±0
-// Atan(±Inf) = ±Pi/2
+// Atan(±0) = ±0
+// Atan(±Inf) = ±Pi/2
func Atan(x float64) float64
func atan(x float64) float64 {
diff --git a/src/pkg/math/atanh.go b/src/pkg/math/atanh.go
index 5b5d46855..113d5c103 100644
--- a/src/pkg/math/atanh.go
+++ b/src/pkg/math/atanh.go
@@ -36,7 +36,7 @@ package math
// atanh(+-1) is +-INF with signal.
//
-// Atanh(x) calculates the inverse hyperbolic tangent of x.
+// Atanh returns the inverse hyperbolic tangent of x.
//
// Special cases are:
// Atanh(1) = +Inf
diff --git a/src/pkg/math/big/arith_386.s b/src/pkg/math/big/arith_386.s
index f1262c651..c62483317 100644
--- a/src/pkg/math/big/arith_386.s
+++ b/src/pkg/math/big/arith_386.s
@@ -29,7 +29,7 @@ TEXT ·addVV(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), CX
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
MOVL $0, DX // c = 0
JMP E1
@@ -54,7 +54,7 @@ TEXT ·subVV(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), CX
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
MOVL $0, DX // c = 0
JMP E2
@@ -78,7 +78,7 @@ TEXT ·addVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), AX // c = y
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
JMP E3
@@ -100,7 +100,7 @@ TEXT ·subVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), AX // c = y
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
MOVL $0, BX // i = 0
JMP E4
@@ -120,7 +120,7 @@ E4: CMPL BX, BP // i < n
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
- MOVL n+4(FP), BX // i = n
+ MOVL z+4(FP), BX // i = z
SUBL $1, BX // i--
JL X8b // i < 0 (n <= 0)
@@ -155,7 +155,7 @@ X8b: MOVL $0, c+28(FP)
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
- MOVL n+4(FP), BP
+ MOVL z+4(FP), BP
SUBL $1, BP // n--
JL X9b // n < 0 (n <= 0)
@@ -196,7 +196,7 @@ TEXT ·mulAddVWW(SB),7,$0
MOVL x+12(FP), SI
MOVL y+24(FP), BP
MOVL r+28(FP), CX // c = r
- MOVL n+4(FP), BX
+ MOVL z+4(FP), BX
LEAL (DI)(BX*4), DI
LEAL (SI)(BX*4), SI
NEGL BX // i = -n
@@ -222,7 +222,7 @@ TEXT ·addMulVVW(SB),7,$0
MOVL z+0(FP), DI
MOVL x+12(FP), SI
MOVL y+24(FP), BP
- MOVL n+4(FP), BX
+ MOVL z+4(FP), BX
LEAL (DI)(BX*4), DI
LEAL (SI)(BX*4), SI
NEGL BX // i = -n
@@ -251,7 +251,7 @@ TEXT ·divWVW(SB),7,$0
MOVL xn+12(FP), DX // r = xn
MOVL x+16(FP), SI
MOVL y+28(FP), CX
- MOVL n+4(FP), BX // i = n
+ MOVL z+4(FP), BX // i = z
JMP E7
L7: MOVL (SI)(BX*4), AX
diff --git a/src/pkg/math/big/arith_amd64.s b/src/pkg/math/big/arith_amd64.s
index 54f647322..d85964502 100644
--- a/src/pkg/math/big/arith_amd64.s
+++ b/src/pkg/math/big/arith_amd64.s
@@ -5,7 +5,15 @@
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
-// TODO(gri) - experiment with unrolled loops for faster execution
+// Literal instruction for MOVQ $0, CX.
+// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.)
+#define ZERO_CX BYTE $0x48; \
+ BYTE $0xc7; \
+ BYTE $0xc1; \
+ BYTE $0x00; \
+ BYTE $0x00; \
+ BYTE $0x00; \
+ BYTE $0x00
// func mulWW(x, y Word) (z1, z0 Word)
TEXT ·mulWW(SB),7,$0
@@ -28,114 +36,231 @@ TEXT ·divWW(SB),7,$0
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
+ MOVQ z+8(FP), DI
+ MOVQ x+24(FP), R8
+ MOVQ y+48(FP), R9
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVQ y+32(FP), R9
- MOVL n+8(FP), R11
- MOVQ $0, BX // i = 0
- MOVQ $0, DX // c = 0
- JMP E1
-
-L1: MOVQ (R8)(BX*8), AX
- RCRQ $1, DX
- ADCQ (R9)(BX*8), AX
- RCLQ $1, DX
- MOVQ AX, (R10)(BX*8)
- ADDL $1, BX // i++
-E1: CMPQ BX, R11 // i < n
- JL L1
-
- MOVQ DX, c+48(FP)
+ MOVQ $0, CX // c = 0
+ MOVQ $0, SI // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUBQ $4, DI // n -= 4
+ JL V1 // if n < 0 goto V1
+
+U1: // n >= 0
+ // regular loop body unrolled 4x
+ RCRQ $1, CX // CF = c
+ MOVQ 0(R8)(SI*8), R11
+ MOVQ 8(R8)(SI*8), R12
+ MOVQ 16(R8)(SI*8), R13
+ MOVQ 24(R8)(SI*8), R14
+ ADCQ 0(R9)(SI*8), R11
+ ADCQ 8(R9)(SI*8), R12
+ ADCQ 16(R9)(SI*8), R13
+ ADCQ 24(R9)(SI*8), R14
+ MOVQ R11, 0(R10)(SI*8)
+ MOVQ R12, 8(R10)(SI*8)
+ MOVQ R13, 16(R10)(SI*8)
+ MOVQ R14, 24(R10)(SI*8)
+ RCLQ $1, CX // c = CF
+
+ ADDQ $4, SI // i += 4
+ SUBQ $4, DI // n -= 4
+ JGE U1 // if n >= 0 goto U1
+
+V1: ADDQ $4, DI // n += 4
+ JLE E1 // if n <= 0 goto E1
+
+L1: // n > 0
+ RCRQ $1, CX // CF = c
+ MOVQ 0(R8)(SI*8), R11
+ ADCQ 0(R9)(SI*8), R11
+ MOVQ R11, 0(R10)(SI*8)
+ RCLQ $1, CX // c = CF
+
+ ADDQ $1, SI // i++
+ SUBQ $1, DI // n--
+ JG L1 // if n > 0 goto L1
+
+E1: MOVQ CX, c+72(FP) // return c
RET
// func subVV(z, x, y []Word) (c Word)
-// (same as addVV_s except for SBBQ instead of ADCQ and label names)
+// (same as addVV except for SBBQ instead of ADCQ and label names)
TEXT ·subVV(SB),7,$0
+ MOVQ z+8(FP), DI
+ MOVQ x+24(FP), R8
+ MOVQ y+48(FP), R9
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVQ y+32(FP), R9
- MOVL n+8(FP), R11
- MOVQ $0, BX // i = 0
- MOVQ $0, DX // c = 0
- JMP E2
-
-L2: MOVQ (R8)(BX*8), AX
- RCRQ $1, DX
- SBBQ (R9)(BX*8), AX
- RCLQ $1, DX
- MOVQ AX, (R10)(BX*8)
- ADDL $1, BX // i++
-
-E2: CMPQ BX, R11 // i < n
- JL L2
- MOVQ DX, c+48(FP)
+ MOVQ $0, CX // c = 0
+ MOVQ $0, SI // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUBQ $4, DI // n -= 4
+ JL V2 // if n < 0 goto V2
+
+U2: // n >= 0
+ // regular loop body unrolled 4x
+ RCRQ $1, CX // CF = c
+ MOVQ 0(R8)(SI*8), R11
+ MOVQ 8(R8)(SI*8), R12
+ MOVQ 16(R8)(SI*8), R13
+ MOVQ 24(R8)(SI*8), R14
+ SBBQ 0(R9)(SI*8), R11
+ SBBQ 8(R9)(SI*8), R12
+ SBBQ 16(R9)(SI*8), R13
+ SBBQ 24(R9)(SI*8), R14
+ MOVQ R11, 0(R10)(SI*8)
+ MOVQ R12, 8(R10)(SI*8)
+ MOVQ R13, 16(R10)(SI*8)
+ MOVQ R14, 24(R10)(SI*8)
+ RCLQ $1, CX // c = CF
+
+ ADDQ $4, SI // i += 4
+ SUBQ $4, DI // n -= 4
+ JGE U2 // if n >= 0 goto U2
+
+V2: ADDQ $4, DI // n += 4
+ JLE E2 // if n <= 0 goto E2
+
+L2: // n > 0
+ RCRQ $1, CX // CF = c
+ MOVQ 0(R8)(SI*8), R11
+ SBBQ 0(R9)(SI*8), R11
+ MOVQ R11, 0(R10)(SI*8)
+ RCLQ $1, CX // c = CF
+
+ ADDQ $1, SI // i++
+ SUBQ $1, DI // n--
+ JG L2 // if n > 0 goto L2
+
+E2: MOVQ CX, c+72(FP) // return c
RET
// func addVW(z, x []Word, y Word) (c Word)
TEXT ·addVW(SB),7,$0
+ MOVQ z+8(FP), DI
+ MOVQ x+24(FP), R8
+ MOVQ y+48(FP), CX // c = y
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVQ y+32(FP), AX // c = y
- MOVL n+8(FP), R11
- MOVQ $0, BX // i = 0
- JMP E3
-L3: ADDQ (R8)(BX*8), AX
- MOVQ AX, (R10)(BX*8)
- RCLQ $1, AX
- ANDQ $1, AX
- ADDL $1, BX // i++
-
-E3: CMPQ BX, R11 // i < n
- JL L3
-
- MOVQ AX, c+40(FP)
+ MOVQ $0, SI // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUBQ $4, DI // n -= 4
+ JL V3 // if n < 4 goto V3
+
+U3: // n >= 0
+ // regular loop body unrolled 4x
+ MOVQ 0(R8)(SI*8), R11
+ MOVQ 8(R8)(SI*8), R12
+ MOVQ 16(R8)(SI*8), R13
+ MOVQ 24(R8)(SI*8), R14
+ ADDQ CX, R11
+ ZERO_CX
+ ADCQ $0, R12
+ ADCQ $0, R13
+ ADCQ $0, R14
+ SETCS CX // c = CF
+ MOVQ R11, 0(R10)(SI*8)
+ MOVQ R12, 8(R10)(SI*8)
+ MOVQ R13, 16(R10)(SI*8)
+ MOVQ R14, 24(R10)(SI*8)
+
+ ADDQ $4, SI // i += 4
+ SUBQ $4, DI // n -= 4
+ JGE U3 // if n >= 0 goto U3
+
+V3: ADDQ $4, DI // n += 4
+ JLE E3 // if n <= 0 goto E3
+
+L3: // n > 0
+ ADDQ 0(R8)(SI*8), CX
+ MOVQ CX, 0(R10)(SI*8)
+ ZERO_CX
+ RCLQ $1, CX // c = CF
+
+ ADDQ $1, SI // i++
+ SUBQ $1, DI // n--
+ JG L3 // if n > 0 goto L3
+
+E3: MOVQ CX, c+56(FP) // return c
RET
// func subVW(z, x []Word, y Word) (c Word)
+// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
TEXT ·subVW(SB),7,$0
+ MOVQ z+8(FP), DI
+ MOVQ x+24(FP), R8
+ MOVQ y+48(FP), CX // c = y
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVQ y+32(FP), AX // c = y
- MOVL n+8(FP), R11
- MOVQ $0, BX // i = 0
- JMP E4
-
-L4: MOVQ (R8)(BX*8), DX // TODO(gri) is there a reverse SUBQ?
- SUBQ AX, DX
- MOVQ DX, (R10)(BX*8)
- RCLQ $1, AX
- ANDQ $1, AX
- ADDL $1, BX // i++
-
-E4: CMPQ BX, R11 // i < n
- JL L4
-
- MOVQ AX, c+40(FP)
+
+ MOVQ $0, SI // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUBQ $4, DI // n -= 4
+ JL V4 // if n < 4 goto V4
+
+U4: // n >= 0
+ // regular loop body unrolled 4x
+ MOVQ 0(R8)(SI*8), R11
+ MOVQ 8(R8)(SI*8), R12
+ MOVQ 16(R8)(SI*8), R13
+ MOVQ 24(R8)(SI*8), R14
+ SUBQ CX, R11
+ ZERO_CX
+ SBBQ $0, R12
+ SBBQ $0, R13
+ SBBQ $0, R14
+ SETCS CX // c = CF
+ MOVQ R11, 0(R10)(SI*8)
+ MOVQ R12, 8(R10)(SI*8)
+ MOVQ R13, 16(R10)(SI*8)
+ MOVQ R14, 24(R10)(SI*8)
+
+ ADDQ $4, SI // i += 4
+ SUBQ $4, DI // n -= 4
+ JGE U4 // if n >= 0 goto U4
+
+V4: ADDQ $4, DI // n += 4
+ JLE E4 // if n <= 0 goto E4
+
+L4: // n > 0
+ MOVQ 0(R8)(SI*8), R11
+ SUBQ CX, R11
+ MOVQ R11, 0(R10)(SI*8)
+ ZERO_CX
+ RCLQ $1, CX // c = CF
+
+ ADDQ $1, SI // i++
+ SUBQ $1, DI // n--
+ JG L4 // if n > 0 goto L4
+
+E4: MOVQ CX, c+56(FP) // return c
RET
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
- MOVL n+8(FP), BX // i = n
- SUBL $1, BX // i--
+ MOVQ z+8(FP), BX // i = z
+ SUBQ $1, BX // i--
JL X8b // i < 0 (n <= 0)
// n > 0
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVL s+32(FP), CX
+ MOVQ x+24(FP), R8
+ MOVQ s+48(FP), CX
MOVQ (R8)(BX*8), AX // w1 = x[n-1]
MOVQ $0, DX
SHLQ CX, DX:AX // w1>>ŝ
- MOVQ DX, c+40(FP)
+ MOVQ DX, c+56(FP)
- CMPL BX, $0
+ CMPQ BX, $0
JLE X8a // i <= 0
// i > 0
@@ -143,7 +268,7 @@ L8: MOVQ AX, DX // w = w1
MOVQ -8(R8)(BX*8), AX // w1 = x[i-1]
SHLQ CX, DX:AX // w<<s | w1>>ŝ
MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ
- SUBL $1, BX // i--
+ SUBQ $1, BX // i--
JG L8 // i > 0
// i <= 0
@@ -151,24 +276,24 @@ X8a: SHLQ CX, AX // w1<<s
MOVQ AX, (R10) // z[0] = w1<<s
RET
-X8b: MOVQ $0, c+40(FP)
+X8b: MOVQ $0, c+56(FP)
RET
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
- MOVL n+8(FP), R11
- SUBL $1, R11 // n--
+ MOVQ z+8(FP), R11
+ SUBQ $1, R11 // n--
JL X9b // n < 0 (n <= 0)
// n > 0
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVL s+32(FP), CX
+ MOVQ x+24(FP), R8
+ MOVQ s+48(FP), CX
MOVQ (R8), AX // w1 = x[0]
MOVQ $0, DX
SHRQ CX, DX:AX // w1<<ŝ
- MOVQ DX, c+40(FP)
+ MOVQ DX, c+56(FP)
MOVQ $0, BX // i = 0
JMP E9
@@ -178,7 +303,7 @@ L9: MOVQ AX, DX // w = w1
MOVQ 8(R8)(BX*8), AX // w1 = x[i+1]
SHRQ CX, DX:AX // w>>s | w1<<ŝ
MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ
- ADDL $1, BX // i++
+ ADDQ $1, BX // i++
E9: CMPQ BX, R11
JL L9 // i < n-1
@@ -188,17 +313,17 @@ X9a: SHRQ CX, AX // w1>>s
MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s
RET
-X9b: MOVQ $0, c+40(FP)
+X9b: MOVQ $0, c+56(FP)
RET
// func mulAddVWW(z, x []Word, y, r Word) (c Word)
TEXT ·mulAddVWW(SB),7,$0
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVQ y+32(FP), R9
- MOVQ r+40(FP), CX // c = r
- MOVL n+8(FP), R11
+ MOVQ x+24(FP), R8
+ MOVQ y+48(FP), R9
+ MOVQ r+56(FP), CX // c = r
+ MOVQ z+8(FP), R11
MOVQ $0, BX // i = 0
JMP E5
@@ -208,21 +333,21 @@ L5: MOVQ (R8)(BX*8), AX
ADCQ $0, DX
MOVQ AX, (R10)(BX*8)
MOVQ DX, CX
- ADDL $1, BX // i++
+ ADDQ $1, BX // i++
E5: CMPQ BX, R11 // i < n
JL L5
- MOVQ CX, c+48(FP)
+ MOVQ CX, c+64(FP)
RET
// func addMulVVW(z, x []Word, y Word) (c Word)
TEXT ·addMulVVW(SB),7,$0
MOVQ z+0(FP), R10
- MOVQ x+16(FP), R8
- MOVQ y+32(FP), R9
- MOVL n+8(FP), R11
+ MOVQ x+24(FP), R8
+ MOVQ y+48(FP), R9
+ MOVQ z+8(FP), R11
MOVQ $0, BX // i = 0
MOVQ $0, CX // c = 0
JMP E6
@@ -234,41 +359,41 @@ L6: MOVQ (R8)(BX*8), AX
ADDQ AX, (R10)(BX*8)
ADCQ $0, DX
MOVQ DX, CX
- ADDL $1, BX // i++
+ ADDQ $1, BX // i++
E6: CMPQ BX, R11 // i < n
JL L6
- MOVQ CX, c+40(FP)
+ MOVQ CX, c+56(FP)
RET
// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
TEXT ·divWVW(SB),7,$0
MOVQ z+0(FP), R10
- MOVQ xn+16(FP), DX // r = xn
- MOVQ x+24(FP), R8
- MOVQ y+40(FP), R9
- MOVL n+8(FP), BX // i = n
+ MOVQ xn+24(FP), DX // r = xn
+ MOVQ x+32(FP), R8
+ MOVQ y+56(FP), R9
+ MOVQ z+8(FP), BX // i = z
JMP E7
L7: MOVQ (R8)(BX*8), AX
DIVQ R9
MOVQ AX, (R10)(BX*8)
-E7: SUBL $1, BX // i--
+E7: SUBQ $1, BX // i--
JGE L7 // i >= 0
- MOVQ DX, r+48(FP)
+ MOVQ DX, r+64(FP)
RET
// func bitLen(x Word) (n int)
TEXT ·bitLen(SB),7,$0
BSRQ x+0(FP), AX
JZ Z1
- INCL AX
- MOVL AX, n+8(FP)
+ ADDQ $1, AX
+ MOVQ AX, n+8(FP)
RET
-Z1: MOVL $0, n+8(FP)
+Z1: MOVQ $0, n+8(FP)
RET
diff --git a/src/pkg/math/big/arith_arm.s b/src/pkg/math/big/arith_arm.s
index dbf3360b5..64610f915 100644
--- a/src/pkg/math/big/arith_arm.s
+++ b/src/pkg/math/big/arith_arm.s
@@ -13,7 +13,7 @@ TEXT ·addVV(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
B E1
@@ -41,7 +41,7 @@ TEXT ·subVV(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
B E2
@@ -68,7 +68,7 @@ TEXT ·addVW(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
CMP R1, R4
@@ -102,7 +102,7 @@ TEXT ·subVW(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R4
+ MOVW z+4(FP), R4
MOVW R4<<2, R4
ADD R1, R4
CMP R1, R4
@@ -134,7 +134,7 @@ E4:
// func shlVU(z, x []Word, s uint) (c Word)
TEXT ·shlVU(SB),7,$0
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
CMP $0, R5
BEQ X7
@@ -183,7 +183,7 @@ X7:
// func shrVU(z, x []Word, s uint) (c Word)
TEXT ·shrVU(SB),7,$0
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
CMP $0, R5
BEQ X6
@@ -238,7 +238,7 @@ TEXT ·mulAddVWW(SB),7,$0
MOVW x+12(FP), R2
MOVW y+24(FP), R3
MOVW r+28(FP), R4
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
MOVW R5<<2, R5
ADD R1, R5
B E8
@@ -265,7 +265,7 @@ TEXT ·addMulVVW(SB),7,$0
MOVW z+0(FP), R1
MOVW x+12(FP), R2
MOVW y+24(FP), R3
- MOVW n+4(FP), R5
+ MOVW z+4(FP), R5
MOVW R5<<2, R5
ADD R1, R5
MOVW $0, R4
@@ -314,7 +314,7 @@ TEXT ·mulWW(SB),7,$0
// func bitLen(x Word) (n int)
TEXT ·bitLen(SB),7,$0
MOVW x+0(FP), R0
- WORD $0xe16f0f10 // CLZ R0, R0 (count leading zeros)
+ CLZ R0, R0
MOVW $32, R1
SUB.S R0, R1
MOVW R1, n+4(FP)
diff --git a/src/pkg/math/big/arith_test.go b/src/pkg/math/big/arith_test.go
index c7e3d284c..3615a659c 100644
--- a/src/pkg/math/big/arith_test.go
+++ b/src/pkg/math/big/arith_test.go
@@ -4,7 +4,10 @@
package big
-import "testing"
+import (
+ "math/rand"
+ "testing"
+)
type funWW func(x, y, c Word) (z1, z0 Word)
type argWW struct {
@@ -100,6 +103,43 @@ func TestFunVV(t *testing.T) {
}
}
+// Always the same seed for reproducible results.
+var rnd = rand.New(rand.NewSource(0))
+
+func rndW() Word {
+ return Word(rnd.Int63()<<1 | rnd.Int63n(2))
+}
+
+func rndV(n int) []Word {
+ v := make([]Word, n)
+ for i := range v {
+ v[i] = rndW()
+ }
+ return v
+}
+
+func benchmarkFunVV(b *testing.B, f funVV, n int) {
+ x := rndV(n)
+ y := rndV(n)
+ z := make([]Word, n)
+ b.SetBytes(int64(n * _W))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ f(z, x, y)
+ }
+}
+
+func BenchmarkAddVV_1(b *testing.B) { benchmarkFunVV(b, addVV, 1) }
+func BenchmarkAddVV_2(b *testing.B) { benchmarkFunVV(b, addVV, 2) }
+func BenchmarkAddVV_3(b *testing.B) { benchmarkFunVV(b, addVV, 3) }
+func BenchmarkAddVV_4(b *testing.B) { benchmarkFunVV(b, addVV, 4) }
+func BenchmarkAddVV_5(b *testing.B) { benchmarkFunVV(b, addVV, 5) }
+func BenchmarkAddVV_1e1(b *testing.B) { benchmarkFunVV(b, addVV, 1e1) }
+func BenchmarkAddVV_1e2(b *testing.B) { benchmarkFunVV(b, addVV, 1e2) }
+func BenchmarkAddVV_1e3(b *testing.B) { benchmarkFunVV(b, addVV, 1e3) }
+func BenchmarkAddVV_1e4(b *testing.B) { benchmarkFunVV(b, addVV, 1e4) }
+func BenchmarkAddVV_1e5(b *testing.B) { benchmarkFunVV(b, addVV, 1e5) }
+
type funVW func(z, x []Word, y Word) (c Word)
type argVW struct {
z, x nat
@@ -210,6 +250,28 @@ func TestFunVW(t *testing.T) {
}
}
+func benchmarkFunVW(b *testing.B, f funVW, n int) {
+ x := rndV(n)
+ y := rndW()
+ z := make([]Word, n)
+ b.SetBytes(int64(n * _W))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ f(z, x, y)
+ }
+}
+
+func BenchmarkAddVW_1(b *testing.B) { benchmarkFunVW(b, addVW, 1) }
+func BenchmarkAddVW_2(b *testing.B) { benchmarkFunVW(b, addVW, 2) }
+func BenchmarkAddVW_3(b *testing.B) { benchmarkFunVW(b, addVW, 3) }
+func BenchmarkAddVW_4(b *testing.B) { benchmarkFunVW(b, addVW, 4) }
+func BenchmarkAddVW_5(b *testing.B) { benchmarkFunVW(b, addVW, 5) }
+func BenchmarkAddVW_1e1(b *testing.B) { benchmarkFunVW(b, addVW, 1e1) }
+func BenchmarkAddVW_1e2(b *testing.B) { benchmarkFunVW(b, addVW, 1e2) }
+func BenchmarkAddVW_1e3(b *testing.B) { benchmarkFunVW(b, addVW, 1e3) }
+func BenchmarkAddVW_1e4(b *testing.B) { benchmarkFunVW(b, addVW, 1e4) }
+func BenchmarkAddVW_1e5(b *testing.B) { benchmarkFunVW(b, addVW, 1e5) }
+
type funVWW func(z, x []Word, y, r Word) (c Word)
type argVWW struct {
z, x nat
@@ -334,6 +396,28 @@ func TestMulAddWWW(t *testing.T) {
}
}
+func benchmarkAddMulVVW(b *testing.B, n int) {
+ x := rndV(n)
+ y := rndW()
+ z := make([]Word, n)
+ b.SetBytes(int64(n * _W))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ addMulVVW(z, x, y)
+ }
+}
+
+func BenchmarkAddMulVVW_1(b *testing.B) { benchmarkAddMulVVW(b, 1) }
+func BenchmarkAddMulVVW_2(b *testing.B) { benchmarkAddMulVVW(b, 2) }
+func BenchmarkAddMulVVW_3(b *testing.B) { benchmarkAddMulVVW(b, 3) }
+func BenchmarkAddMulVVW_4(b *testing.B) { benchmarkAddMulVVW(b, 4) }
+func BenchmarkAddMulVVW_5(b *testing.B) { benchmarkAddMulVVW(b, 5) }
+func BenchmarkAddMulVVW_1e1(b *testing.B) { benchmarkAddMulVVW(b, 1e1) }
+func BenchmarkAddMulVVW_1e2(b *testing.B) { benchmarkAddMulVVW(b, 1e2) }
+func BenchmarkAddMulVVW_1e3(b *testing.B) { benchmarkAddMulVVW(b, 1e3) }
+func BenchmarkAddMulVVW_1e4(b *testing.B) { benchmarkAddMulVVW(b, 1e4) }
+func BenchmarkAddMulVVW_1e5(b *testing.B) { benchmarkAddMulVVW(b, 1e5) }
+
func testWordBitLen(t *testing.T, fname string, f func(Word) int) {
for i := 0; i <= _W; i++ {
x := Word(1) << uint(i-1) // i == 0 => x == 0
diff --git a/src/pkg/math/big/calibrate_test.go b/src/pkg/math/big/calibrate_test.go
index efe1837bb..f69ffbf5c 100644
--- a/src/pkg/math/big/calibrate_test.go
+++ b/src/pkg/math/big/calibrate_test.go
@@ -21,15 +21,17 @@ import (
var calibrate = flag.Bool("calibrate", false, "run calibration test")
-// measure returns the time to run f
-func measure(f func()) time.Duration {
- const N = 100
- start := time.Now()
- for i := N; i > 0; i-- {
- f()
- }
- stop := time.Now()
- return stop.Sub(start) / N
+func karatsubaLoad(b *testing.B) {
+ BenchmarkMul(b)
+}
+
+// measureKaratsuba returns the time to run a Karatsuba-relevant benchmark
+// given Karatsuba threshold th.
+func measureKaratsuba(th int) time.Duration {
+ th, karatsubaThreshold = karatsubaThreshold, th
+ res := testing.Benchmark(karatsubaLoad)
+ karatsubaThreshold = th
+ return time.Duration(res.NsPerOp())
}
func computeThresholds() {
@@ -37,35 +39,33 @@ func computeThresholds() {
fmt.Printf("(run repeatedly for good results)\n")
// determine Tk, the work load execution time using basic multiplication
- karatsubaThreshold = 1e9 // disable karatsuba
- Tb := measure(benchmarkMulLoad)
- fmt.Printf("Tb = %dns\n", Tb)
+ Tb := measureKaratsuba(1e9) // th == 1e9 => Karatsuba multiplication disabled
+ fmt.Printf("Tb = %10s\n", Tb)
// thresholds
- n := 8 // any lower values for the threshold lead to very slow multiplies
+ th := 4
th1 := -1
th2 := -1
var deltaOld time.Duration
- for count := -1; count != 0; count-- {
+ for count := -1; count != 0 && th < 128; count-- {
// determine Tk, the work load execution time using Karatsuba multiplication
- karatsubaThreshold = n // enable karatsuba
- Tk := measure(benchmarkMulLoad)
+ Tk := measureKaratsuba(th)
// improvement over Tb
delta := (Tb - Tk) * 100 / Tb
- fmt.Printf("n = %3d Tk = %8dns %4d%%", n, Tk, delta)
+ fmt.Printf("th = %3d Tk = %10s %4d%%", th, Tk, delta)
// determine break-even point
if Tk < Tb && th1 < 0 {
- th1 = n
+ th1 = th
fmt.Print(" break-even point")
}
// determine diminishing return
if 0 < delta && delta < deltaOld && th2 < 0 {
- th2 = n
+ th2 = th
fmt.Print(" diminishing return")
}
deltaOld = delta
@@ -74,10 +74,10 @@ func computeThresholds() {
// trigger counter
if th1 >= 0 && th2 >= 0 && count < 0 {
- count = 20 // this many extra measurements after we got both thresholds
+ count = 10 // this many extra measurements after we got both thresholds
}
- n++
+ th++
}
}
diff --git a/src/pkg/math/big/gcd_test.go b/src/pkg/math/big/gcd_test.go
new file mode 100644
index 000000000..c0b9f5830
--- /dev/null
+++ b/src/pkg/math/big/gcd_test.go
@@ -0,0 +1,47 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements a GCD benchmark.
+// Usage: go test math/big -test.bench GCD
+
+package big
+
+import (
+ "math/rand"
+ "testing"
+)
+
+// randInt returns a pseudo-random Int in the range [1<<(size-1), (1<<size) - 1]
+func randInt(r *rand.Rand, size uint) *Int {
+ n := new(Int).Lsh(intOne, size-1)
+ x := new(Int).Rand(r, n)
+ return x.Add(x, n) // make sure result > 1<<(size-1)
+}
+
+func runGCD(b *testing.B, aSize, bSize uint) {
+ b.StopTimer()
+ var r = rand.New(rand.NewSource(1234))
+ aa := randInt(r, aSize)
+ bb := randInt(r, bSize)
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ new(Int).GCD(nil, nil, aa, bb)
+ }
+}
+
+func BenchmarkGCD10x10(b *testing.B) { runGCD(b, 10, 10) }
+func BenchmarkGCD10x100(b *testing.B) { runGCD(b, 10, 100) }
+func BenchmarkGCD10x1000(b *testing.B) { runGCD(b, 10, 1000) }
+func BenchmarkGCD10x10000(b *testing.B) { runGCD(b, 10, 10000) }
+func BenchmarkGCD10x100000(b *testing.B) { runGCD(b, 10, 100000) }
+func BenchmarkGCD100x100(b *testing.B) { runGCD(b, 100, 100) }
+func BenchmarkGCD100x1000(b *testing.B) { runGCD(b, 100, 1000) }
+func BenchmarkGCD100x10000(b *testing.B) { runGCD(b, 100, 10000) }
+func BenchmarkGCD100x100000(b *testing.B) { runGCD(b, 100, 100000) }
+func BenchmarkGCD1000x1000(b *testing.B) { runGCD(b, 1000, 1000) }
+func BenchmarkGCD1000x10000(b *testing.B) { runGCD(b, 1000, 10000) }
+func BenchmarkGCD1000x100000(b *testing.B) { runGCD(b, 1000, 100000) }
+func BenchmarkGCD10000x10000(b *testing.B) { runGCD(b, 10000, 10000) }
+func BenchmarkGCD10000x100000(b *testing.B) { runGCD(b, 10000, 100000) }
+func BenchmarkGCD100000x100000(b *testing.B) { runGCD(b, 100000, 100000) }
diff --git a/src/pkg/math/big/int.go b/src/pkg/math/big/int.go
index cd2cd0e2d..bf2fd2009 100644
--- a/src/pkg/math/big/int.go
+++ b/src/pkg/math/big/int.go
@@ -51,6 +51,13 @@ func (z *Int) SetInt64(x int64) *Int {
return z
}
+// SetUint64 sets z to x and returns z.
+func (z *Int) SetUint64(x uint64) *Int {
+ z.abs = z.abs.setUint64(uint64(x))
+ z.neg = false
+ return z
+}
+
// NewInt allocates and returns a new Int set to x.
func NewInt(x int64) *Int {
return new(Int).SetInt64(x)
@@ -412,7 +419,7 @@ func (x *Int) Format(s fmt.State, ch rune) {
if precisionSet {
switch {
case len(digits) < precision:
- zeroes = precision - len(digits) // count of zero padding
+ zeroes = precision - len(digits) // count of zero padding
case digits == "0" && precision == 0:
return // print nothing if zero value (x == 0) and zero precision ("." or ".0")
}
@@ -519,6 +526,19 @@ func (x *Int) Int64() int64 {
return v
}
+// Uint64 returns the uint64 representation of x.
+// If x cannot be represented in an uint64, the result is undefined.
+func (x *Int) Uint64() uint64 {
+ if len(x.abs) == 0 {
+ return 0
+ }
+ v := uint64(x.abs[0])
+ if _W == 32 && len(x.abs) > 1 {
+ v |= uint64(x.abs[1]) << 32
+ }
+ return v
+}
+
// SetString sets z to the value of s, interpreted in the given base,
// and returns z and a boolean indicating success. If SetString fails,
// the value of z is undefined but the returned value is nil.
@@ -561,19 +581,18 @@ func (x *Int) BitLen() int {
return x.abs.bitLen()
}
-// Exp sets z = x**y mod m and returns z. If m is nil, z = x**y.
+// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z.
+// If y <= 0, the result is 1; if m == nil or m == 0, z = x**y.
// See Knuth, volume 2, section 4.6.3.
func (z *Int) Exp(x, y, m *Int) *Int {
if y.neg || len(y.abs) == 0 {
- neg := x.neg
- z.SetInt64(1)
- z.neg = neg
- return z
+ return z.SetInt64(1)
}
+ // y > 0
var mWords nat
if m != nil {
- mWords = m.abs
+ mWords = m.abs // m.abs may be nil for m == 0
}
z.abs = z.abs.expNN(x.abs, y.abs, mWords)
@@ -581,12 +600,12 @@ func (z *Int) Exp(x, y, m *Int) *Int {
return z
}
-// GCD sets z to the greatest common divisor of a and b, which must be
-// positive numbers, and returns z.
+// GCD sets z to the greatest common divisor of a and b, which both must
+// be > 0, and returns z.
// If x and y are not nil, GCD sets x and y such that z = a*x + b*y.
-// If either a or b is not positive, GCD sets z = x = y = 0.
+// If either a or b is <= 0, GCD sets z = x = y = 0.
func (z *Int) GCD(x, y, a, b *Int) *Int {
- if a.neg || b.neg {
+ if a.Sign() <= 0 || b.Sign() <= 0 {
z.SetInt64(0)
if x != nil {
x.SetInt64(0)
@@ -596,6 +615,9 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
}
return z
}
+ if x == nil && y == nil {
+ return z.binaryGCD(a, b)
+ }
A := new(Int).Set(a)
B := new(Int).Set(b)
@@ -640,6 +662,63 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
return z
}
+// binaryGCD sets z to the greatest common divisor of a and b, which both must
+// be > 0, and returns z.
+// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm B.
+func (z *Int) binaryGCD(a, b *Int) *Int {
+ u := z
+ v := new(Int)
+
+ // use one Euclidean iteration to ensure that u and v are approx. the same size
+ switch {
+ case len(a.abs) > len(b.abs):
+ u.Set(b)
+ v.Rem(a, b)
+ case len(a.abs) < len(b.abs):
+ u.Set(a)
+ v.Rem(b, a)
+ default:
+ u.Set(a)
+ v.Set(b)
+ }
+
+ // v might be 0 now
+ if len(v.abs) == 0 {
+ return u
+ }
+ // u > 0 && v > 0
+
+ // determine largest k such that u = u' << k, v = v' << k
+ k := u.abs.trailingZeroBits()
+ if vk := v.abs.trailingZeroBits(); vk < k {
+ k = vk
+ }
+ u.Rsh(u, k)
+ v.Rsh(v, k)
+
+ // determine t (we know that u > 0)
+ t := new(Int)
+ if u.abs[0]&1 != 0 {
+ // u is odd
+ t.Neg(v)
+ } else {
+ t.Set(u)
+ }
+
+ for len(t.abs) > 0 {
+ // reduce t
+ t.Rsh(t, t.abs.trailingZeroBits())
+ if t.neg {
+ v.Neg(t)
+ } else {
+ u.Set(t)
+ }
+ t.Sub(u, v)
+ }
+
+ return u.Lsh(u, k)
+}
+
// ProbablyPrime performs n Miller-Rabin tests to check whether x is prime.
// If it returns true, x is prime with probability 1 - 1/4^n.
// If it returns false, x is not prime.
@@ -697,6 +776,13 @@ func (z *Int) Rsh(x *Int, n uint) *Int {
// Bit returns the value of the i'th bit of x. That is, it
// returns (x>>i)&1. The bit index i must be >= 0.
func (x *Int) Bit(i int) uint {
+ if i == 0 {
+ // optimization for common case: odd/even test of x
+ if len(x.abs) > 0 {
+ return uint(x.abs[0] & 1) // bit 0 is same for -x
+ }
+ return 0
+ }
if i < 0 {
panic("negative bit index")
}
@@ -894,3 +980,19 @@ func (z *Int) GobDecode(buf []byte) error {
z.abs = z.abs.setBytes(buf[1:])
return nil
}
+
+// MarshalJSON implements the json.Marshaler interface.
+func (x *Int) MarshalJSON() ([]byte, error) {
+ // TODO(gri): get rid of the []byte/string conversions
+ return []byte(x.String()), nil
+}
+
+// UnmarshalJSON implements the json.Unmarshaler interface.
+func (z *Int) UnmarshalJSON(x []byte) error {
+ // TODO(gri): get rid of the []byte/string conversions
+ _, ok := z.SetString(string(x), 0)
+ if !ok {
+ return fmt.Errorf("math/big: cannot unmarshal %s into a *big.Int", x)
+ }
+ return nil
+}
diff --git a/src/pkg/math/big/int_test.go b/src/pkg/math/big/int_test.go
index 9700a9b5a..6c981e775 100644
--- a/src/pkg/math/big/int_test.go
+++ b/src/pkg/math/big/int_test.go
@@ -8,6 +8,7 @@ import (
"bytes"
"encoding/gob"
"encoding/hex"
+ "encoding/json"
"fmt"
"math/rand"
"testing"
@@ -642,7 +643,7 @@ func TestSetBytes(t *testing.T) {
func checkBytes(b []byte) bool {
b2 := new(Int).SetBytes(b).Bytes()
- return bytes.Compare(b, b2) == 0
+ return bytes.Equal(b, b2)
}
func TestBytes(t *testing.T) {
@@ -766,8 +767,10 @@ var expTests = []struct {
x, y, m string
out string
}{
+ {"5", "-7", "", "1"},
+ {"-5", "-7", "", "1"},
{"5", "0", "", "1"},
- {"-5", "0", "", "-1"},
+ {"-5", "0", "", "1"},
{"5", "1", "", "5"},
{"-5", "1", "", "-5"},
{"-2", "3", "2", "0"},
@@ -778,6 +781,7 @@ var expTests = []struct {
{"0x8000000000000000", "3", "6719", "5447"},
{"0x8000000000000000", "1000", "6719", "1603"},
{"0x8000000000000000", "1000000", "6719", "3199"},
+ {"0x8000000000000000", "-1000000", "6719", "1"},
{
"2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347",
"298472983472983471903246121093472394872319615612417471234712061",
@@ -806,25 +810,33 @@ func TestExp(t *testing.T) {
continue
}
- z := y.Exp(x, y, m)
- if !isNormalized(z) {
- t.Errorf("#%d: %v is not normalized", i, *z)
+ z1 := new(Int).Exp(x, y, m)
+ if !isNormalized(z1) {
+ t.Errorf("#%d: %v is not normalized", i, *z1)
}
- if z.Cmp(out) != 0 {
- t.Errorf("#%d: got %s want %s", i, z, out)
+ if z1.Cmp(out) != 0 {
+ t.Errorf("#%d: got %s want %s", i, z1, out)
+ }
+
+ if m == nil {
+ // the result should be the same as for m == 0;
+ // specifically, there should be no div-zero panic
+ m = &Int{abs: nat{}} // m != nil && len(m.abs) == 0
+ z2 := new(Int).Exp(x, y, m)
+ if z2.Cmp(z1) != 0 {
+ t.Errorf("#%d: got %s want %s", i, z1, z2)
+ }
}
}
}
func checkGcd(aBytes, bBytes []byte) bool {
- a := new(Int).SetBytes(aBytes)
- b := new(Int).SetBytes(bBytes)
-
x := new(Int)
y := new(Int)
- d := new(Int)
+ a := new(Int).SetBytes(aBytes)
+ b := new(Int).SetBytes(bBytes)
- d.GCD(x, y, a, b)
+ d := new(Int).GCD(x, y, a, b)
x.Mul(x, a)
y.Mul(y, b)
x.Add(x, y)
@@ -833,32 +845,70 @@ func checkGcd(aBytes, bBytes []byte) bool {
}
var gcdTests = []struct {
- a, b int64
- d, x, y int64
+ d, x, y, a, b string
}{
- {120, 23, 1, -9, 47},
-}
-
-func TestGcd(t *testing.T) {
- for i, test := range gcdTests {
- a := NewInt(test.a)
- b := NewInt(test.b)
+ // a <= 0 || b <= 0
+ {"0", "0", "0", "0", "0"},
+ {"0", "0", "0", "0", "7"},
+ {"0", "0", "0", "11", "0"},
+ {"0", "0", "0", "-77", "35"},
+ {"0", "0", "0", "64515", "-24310"},
+ {"0", "0", "0", "-64515", "-24310"},
+
+ {"1", "-9", "47", "120", "23"},
+ {"7", "1", "-2", "77", "35"},
+ {"935", "-3", "8", "64515", "24310"},
+ {"935000000000000000", "-3", "8", "64515000000000000000", "24310000000000000000"},
+ {"1", "-221", "22059940471369027483332068679400581064239780177629666810348940098015901108344", "98920366548084643601728869055592650835572950932266967461790948584315647051443", "991"},
+
+ // test early exit (after one Euclidean iteration) in binaryGCD
+ {"1", "", "", "1", "98920366548084643601728869055592650835572950932266967461790948584315647051443"},
+}
+
+func testGcd(t *testing.T, d, x, y, a, b *Int) {
+ var X *Int
+ if x != nil {
+ X = new(Int)
+ }
+ var Y *Int
+ if y != nil {
+ Y = new(Int)
+ }
- x := new(Int)
- y := new(Int)
- d := new(Int)
+ D := new(Int).GCD(X, Y, a, b)
+ if D.Cmp(d) != 0 {
+ t.Errorf("GCD(%s, %s): got d = %s, want %s", a, b, D, d)
+ }
+ if x != nil && X.Cmp(x) != 0 {
+ t.Errorf("GCD(%s, %s): got x = %s, want %s", a, b, X, x)
+ }
+ if y != nil && Y.Cmp(y) != 0 {
+ t.Errorf("GCD(%s, %s): got y = %s, want %s", a, b, Y, y)
+ }
- expectedX := NewInt(test.x)
- expectedY := NewInt(test.y)
- expectedD := NewInt(test.d)
+ // binaryGCD requires a > 0 && b > 0
+ if a.Sign() <= 0 || b.Sign() <= 0 {
+ return
+ }
- d.GCD(x, y, a, b)
+ D.binaryGCD(a, b)
+ if D.Cmp(d) != 0 {
+ t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, D, d)
+ }
+}
- if expectedX.Cmp(x) != 0 ||
- expectedY.Cmp(y) != 0 ||
- expectedD.Cmp(d) != 0 {
- t.Errorf("#%d got (%s %s %s) want (%s %s %s)", i, x, y, d, expectedX, expectedY, expectedD)
- }
+func TestGcd(t *testing.T) {
+ for _, test := range gcdTests {
+ d, _ := new(Int).SetString(test.d, 0)
+ x, _ := new(Int).SetString(test.x, 0)
+ y, _ := new(Int).SetString(test.y, 0)
+ a, _ := new(Int).SetString(test.a, 0)
+ b, _ := new(Int).SetString(test.b, 0)
+
+ testGcd(t, d, nil, nil, a, b)
+ testGcd(t, d, x, nil, a, b)
+ testGcd(t, d, nil, y, a, b)
+ testGcd(t, d, x, y, a, b)
}
quick.Check(checkGcd, nil)
@@ -1085,6 +1135,36 @@ func TestInt64(t *testing.T) {
}
}
+var uint64Tests = []uint64{
+ 0,
+ 1,
+ 4294967295,
+ 4294967296,
+ 8589934591,
+ 8589934592,
+ 9223372036854775807,
+ 9223372036854775808,
+ 18446744073709551615, // 1<<64 - 1
+}
+
+func TestUint64(t *testing.T) {
+ in := new(Int)
+ for i, testVal := range uint64Tests {
+ in.SetUint64(testVal)
+ out := in.Uint64()
+
+ if out != testVal {
+ t.Errorf("#%d got %d want %d", i, out, testVal)
+ }
+
+ str := fmt.Sprint(testVal)
+ strOut := in.String()
+ if strOut != str {
+ t.Errorf("#%d.String got %s want %s", i, strOut, str)
+ }
+ }
+}
+
var bitwiseTests = []struct {
x, y string
and, or, xor, andNot string
@@ -1368,8 +1448,12 @@ func TestModInverse(t *testing.T) {
}
}
-// used by TestIntGobEncoding and TestRatGobEncoding
-var gobEncodingTests = []string{
+var encodingTests = []string{
+ "-539345864568634858364538753846587364875430589374589",
+ "-678645873",
+ "-100",
+ "-2",
+ "-1",
"0",
"1",
"2",
@@ -1383,26 +1467,37 @@ func TestIntGobEncoding(t *testing.T) {
var medium bytes.Buffer
enc := gob.NewEncoder(&medium)
dec := gob.NewDecoder(&medium)
- for i, test := range gobEncodingTests {
- for j := 0; j < 2; j++ {
- medium.Reset() // empty buffer for each test case (in case of failures)
- stest := test
- if j != 0 {
- // negative numbers
- stest = "-" + test
- }
- var tx Int
- tx.SetString(stest, 10)
- if err := enc.Encode(&tx); err != nil {
- t.Errorf("#%d%c: encoding failed: %s", i, 'a'+j, err)
- }
- var rx Int
- if err := dec.Decode(&rx); err != nil {
- t.Errorf("#%d%c: decoding failed: %s", i, 'a'+j, err)
- }
- if rx.Cmp(&tx) != 0 {
- t.Errorf("#%d%c: transmission failed: got %s want %s", i, 'a'+j, &rx, &tx)
- }
+ for _, test := range encodingTests {
+ medium.Reset() // empty buffer for each test case (in case of failures)
+ var tx Int
+ tx.SetString(test, 10)
+ if err := enc.Encode(&tx); err != nil {
+ t.Errorf("encoding of %s failed: %s", &tx, err)
+ }
+ var rx Int
+ if err := dec.Decode(&rx); err != nil {
+ t.Errorf("decoding of %s failed: %s", &tx, err)
+ }
+ if rx.Cmp(&tx) != 0 {
+ t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
+ }
+ }
+}
+
+func TestIntJSONEncoding(t *testing.T) {
+ for _, test := range encodingTests {
+ var tx Int
+ tx.SetString(test, 10)
+ b, err := json.Marshal(&tx)
+ if err != nil {
+ t.Errorf("marshaling of %s failed: %s", &tx, err)
+ }
+ var rx Int
+ if err := json.Unmarshal(b, &rx); err != nil {
+ t.Errorf("unmarshaling of %s failed: %s", &tx, err)
+ }
+ if rx.Cmp(&tx) != 0 {
+ t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx)
}
}
}
diff --git a/src/pkg/math/big/nat.go b/src/pkg/math/big/nat.go
index eaa6ff066..9d09f97b7 100644
--- a/src/pkg/math/big/nat.go
+++ b/src/pkg/math/big/nat.go
@@ -236,7 +236,7 @@ func karatsubaSub(z, x nat, n int) {
// Operands that are shorter than karatsubaThreshold are multiplied using
// "grade school" multiplication; for longer operands the Karatsuba algorithm
// is used.
-var karatsubaThreshold int = 32 // computed by calibrate.go
+var karatsubaThreshold int = 40 // computed by calibrate.go
// karatsuba multiplies x and y and leaves the result in z.
// Both x and y must have the same length n and n must be a
@@ -342,7 +342,7 @@ func alias(x, y nat) bool {
return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1]
}
-// addAt implements z += x*(1<<(_W*i)); z must be long enough.
+// addAt implements z += x<<(_W*i); z must be long enough.
// (we don't use nat.add because we need z to stay the same
// slice, and we don't need to normalize z after each addition)
func addAt(z, x nat, i int) {
@@ -396,7 +396,7 @@ func (z nat) mul(x, y nat) nat {
}
// use basic multiplication if the numbers are small
- if n < karatsubaThreshold || n < 2 {
+ if n < karatsubaThreshold {
z = z.make(m + n)
basicMul(z, x, y)
return z.norm()
@@ -405,8 +405,8 @@ func (z nat) mul(x, y nat) nat {
// determine Karatsuba length k such that
//
- // x = x1*b + x0
- // y = y1*b + y0 (and k <= len(y), which implies k <= len(x))
+ // x = xh*b + x0 (0 <= x0 < b)
+ // y = yh*b + y0 (0 <= y0 < b)
// b = 1<<(_W*k) ("base" of digits xi, yi)
//
k := karatsubaLen(n)
@@ -417,27 +417,44 @@ func (z nat) mul(x, y nat) nat {
y0 := y[0:k] // y0 is not normalized
z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y
karatsuba(z, x0, y0)
- z = z[0 : m+n] // z has final length but may be incomplete, upper portion is garbage
+ z = z[0 : m+n] // z has final length but may be incomplete
+ z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m)
- // If x1 and/or y1 are not 0, add missing terms to z explicitly:
+ // If xh != 0 or yh != 0, add the missing terms to z. For
+ //
+ // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b)
+ // yh = y1*b (0 <= y1 < b)
+ //
+ // the missing terms are
+ //
+ // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0
//
- // m+n 2*k 0
- // z = [ ... | x0*y0 ]
- // + [ x1*y1 ]
- // + [ x1*y0 ]
- // + [ x0*y1 ]
+ // since all the yi for i > 1 are 0 by choice of k: If any of them
+ // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would
+ // be a larger valid threshold contradicting the assumption about k.
//
if k < n || m != n {
- x1 := x[k:] // x1 is normalized because x is
- y1 := y[k:] // y1 is normalized because y is
var t nat
- t = t.mul(x1, y1)
- copy(z[2*k:], t)
- z[2*k+len(t):].clear() // upper portion of z is garbage
- t = t.mul(x1, y0.norm())
- addAt(z, t, k)
- t = t.mul(x0.norm(), y1)
+
+ // add x0*y1*b
+ x0 := x0.norm()
+ y1 := y[k:] // y1 is normalized because y is
+ t = t.mul(x0, y1) // update t so we don't lose t's underlying array
addAt(z, t, k)
+
+ // add xi*y0<<i, xi*y1*b<<(i+k)
+ y0 := y0.norm()
+ for i := k; i < len(x); i += k {
+ xi := x[i:]
+ if len(xi) > k {
+ xi = xi[:k]
+ }
+ xi = xi.norm()
+ t = t.mul(xi, y0)
+ addAt(z, t, i)
+ t = t.mul(xi, y1)
+ addAt(z, t, i+k)
+ }
}
return z.norm()
@@ -493,14 +510,9 @@ func (z nat) div(z2, u, v nat) (q, r nat) {
}
if len(v) == 1 {
- var rprime Word
- q, rprime = z.divW(u, v[0])
- if rprime > 0 {
- r = z2.make(1)
- r[0] = rprime
- } else {
- r = z2.make(0)
- }
+ var r2 Word
+ q, r2 = z.divW(u, v[0])
+ r = z2.setWord(r2)
return
}
@@ -740,7 +752,7 @@ func (x nat) string(charset string) string {
// convert power of two and non power of two bases separately
if b == b&-b {
// shift is base-b digit size in bits
- shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
+ shift := trailingZeroBits(b) // shift > 0 because b >= 2
mask := Word(1)<<shift - 1
w := x[0]
nbits := uint(_W) // number of unprocessed bits in w
@@ -814,18 +826,18 @@ func (x nat) string(charset string) string {
// Convert words of q to base b digits in s. If q is large, it is recursively "split in half"
// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using
-// repeated nat/Word divison.
+// repeated nat/Word division.
//
-// The iterative method processes n Words by n divW() calls, each of which visits every Word in the
-// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
-// Recursive conversion divides q by its approximate square root, yielding two parts, each half
+// The iterative method processes n Words by n divW() calls, each of which visits every Word in the
+// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s.
+// Recursive conversion divides q by its approximate square root, yielding two parts, each half
// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s
// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and
-// is made better by splitting the subblocks recursively. Best is to split blocks until one more
-// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
-// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
-// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
-// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
+// is made better by splitting the subblocks recursively. Best is to split blocks until one more
+// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the
+// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the
+// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and
+// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for
// specific hardware.
//
func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) {
@@ -908,8 +920,10 @@ type divisor struct {
ndigits int // digit length of divisor in terms of output base digits
}
-var cacheBase10 [64]divisor // cached divisors for base 10
-var cacheLock sync.Mutex // protects cacheBase10
+var cacheBase10 struct {
+ sync.Mutex
+ table [64]divisor // cached divisors for base 10
+}
// expWW computes x**y
func (z nat) expWW(x, y Word) nat {
@@ -925,34 +939,28 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
// determine k where (bb**leafSize)**(2**k) >= sqrt(x)
k := 1
- for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 {
+ for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
k++
}
- // create new table of divisors or extend and reuse existing table as appropriate
- var table []divisor
- var cached bool
- switch b {
- case 10:
- table = cacheBase10[0:k] // reuse old table for this conversion
- cached = true
- default:
- table = make([]divisor, k) // new table for this conversion
+ // reuse and extend existing table of divisors or create new table as appropriate
+ var table []divisor // for b == 10, table overlaps with cacheBase10.table
+ if b == 10 {
+ cacheBase10.Lock()
+ table = cacheBase10.table[0:k] // reuse old table for this conversion
+ } else {
+ table = make([]divisor, k) // create new table for this conversion
}
// extend table
if table[k-1].ndigits == 0 {
- if cached {
- cacheLock.Lock() // begin critical section
- }
-
// add new entries as needed
var larger nat
for i := 0; i < k; i++ {
if table[i].ndigits == 0 {
if i == 0 {
- table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
- table[i].ndigits = ndigits * leafSize
+ table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
+ table[0].ndigits = ndigits * leafSize
} else {
table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
table[i].ndigits = 2 * table[i-1].ndigits
@@ -968,10 +976,10 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
table[i].nbits = table[i].bbb.bitLen()
}
}
+ }
- if cached {
- cacheLock.Unlock() // end critical section
- }
+ if b == 10 {
+ cacheBase10.Unlock()
}
return table
@@ -993,10 +1001,9 @@ var deBruijn64Lookup = []byte{
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
-// trailingZeroBits returns the number of consecutive zero bits on the right
-// side of the given Word.
-// See Knuth, volume 4, section 7.3.1
-func trailingZeroBits(x Word) int {
+// trailingZeroBits returns the number of consecutive least significant zero
+// bits of x.
+func trailingZeroBits(x Word) uint {
// x & -x leaves only the right-most bit set in the word. Let k be the
// index of that bit. Since only a single bit is set, the value is two
// to the power of k. Multiplying by a power of two is equivalent to
@@ -1005,18 +1012,33 @@ func trailingZeroBits(x Word) int {
// Therefore, if we have a left shifted version of this constant we can
// find by how many bits it was shifted by looking at which six bit
// substring ended up at the top of the word.
+ // (Knuth, volume 4, section 7.3.1)
switch _W {
case 32:
- return int(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
+ return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27])
case 64:
- return int(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
+ return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58])
default:
- panic("Unknown word size")
+ panic("unknown word size")
}
return 0
}
+// trailingZeroBits returns the number of consecutive least significant zero
+// bits of x.
+func (x nat) trailingZeroBits() uint {
+ if len(x) == 0 {
+ return 0
+ }
+ var i uint
+ for x[i] == 0 {
+ i++
+ }
+ // x[i] != 0
+ return i*_W + trailingZeroBits(x[i])
+}
+
// z = x << s
func (z nat) shl(x nat, s uint) nat {
m := len(x)
@@ -1169,29 +1191,6 @@ func (x nat) modW(d Word) (r Word) {
return divWVW(q, 0, x, d)
}
-// powersOfTwoDecompose finds q and k with x = q * 1<<k and q is odd, or q and k are 0.
-func (x nat) powersOfTwoDecompose() (q nat, k int) {
- if len(x) == 0 {
- return x, 0
- }
-
- // One of the words must be non-zero by definition,
- // so this loop will terminate with i < len(x), and
- // i is the number of 0 words.
- i := 0
- for x[i] == 0 {
- i++
- }
- n := trailingZeroBits(x[i]) // x[i] != 0
-
- q = make(nat, len(x)-i)
- shrVU(q, x[i:], uint(n))
-
- q = q.norm()
- k = i*_W + n
- return
-}
-
// random creates a random integer in [0..limit), using the space in z if
// possible. n is the bit length of limit.
func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
@@ -1207,17 +1206,19 @@ func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
mask := Word((1 << bitLengthOfMSW) - 1)
for {
- for i := range z {
- switch _W {
- case 32:
+ switch _W {
+ case 32:
+ for i := range z {
z[i] = Word(rand.Uint32())
- case 64:
+ }
+ case 64:
+ for i := range z {
z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32
}
+ default:
+ panic("unknown word size")
}
-
z[len(limit)-1] &= mask
-
if z.cmp(limit) < 0 {
break
}
@@ -1226,11 +1227,11 @@ func (z nat) random(rand *rand.Rand, limit nat, n int) nat {
return z.norm()
}
-// If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It
-// reuses the storage of z if possible.
+// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m;
+// otherwise it sets z to x**y. The result is the value of z.
func (z nat) expNN(x, y, m nat) nat {
if alias(z, x) || alias(z, y) {
- // We cannot allow in place modification of x or y.
+ // We cannot allow in-place modification of x or y.
z = nil
}
@@ -1239,15 +1240,24 @@ func (z nat) expNN(x, y, m nat) nat {
z[0] = 1
return z
}
+ // y > 0
- if m != nil {
+ if len(m) != 0 {
// We likely end up being as long as the modulus.
z = z.make(len(m))
}
z = z.set(x)
- v := y[len(y)-1]
- // It's invalid for the most significant word to be zero, therefore we
- // will find a one bit.
+
+ // If the base is non-trivial and the exponent is large, we use
+ // 4-bit, windowed exponentiation. This involves precomputing 14 values
+ // (x^2...x^15) but then reduces the number of multiply-reduces by a
+ // third. Even for a 32-bit exponent, this reduces the number of
+ // operations.
+ if len(x) > 1 && len(y) > 1 && len(m) > 0 {
+ return z.expNNWindowed(x, y, m)
+ }
+
+ v := y[len(y)-1] // v > 0 because y is normalized and y > 0
shift := leadingZeros(v) + 1
v <<= shift
var q nat
@@ -1259,15 +1269,21 @@ func (z nat) expNN(x, y, m nat) nat {
// we also multiply by x, thus adding one to the power.
w := _W - int(shift)
+ // zz and r are used to avoid allocating in mul and div as
+ // otherwise the arguments would alias.
+ var zz, r nat
for j := 0; j < w; j++ {
- z = z.mul(z, z)
+ zz = zz.mul(z, z)
+ zz, z = z, zz
if v&mask != 0 {
- z = z.mul(z, x)
+ zz = zz.mul(z, x)
+ zz, z = z, zz
}
- if m != nil {
- q, z = q.div(z, z, m)
+ if len(m) != 0 {
+ zz, r = zz.div(r, z, m)
+ zz, r, q, z = q, z, zz, r
}
v <<= 1
@@ -1277,14 +1293,17 @@ func (z nat) expNN(x, y, m nat) nat {
v = y[i]
for j := 0; j < _W; j++ {
- z = z.mul(z, z)
+ zz = zz.mul(z, z)
+ zz, z = z, zz
if v&mask != 0 {
- z = z.mul(z, x)
+ zz = zz.mul(z, x)
+ zz, z = z, zz
}
- if m != nil {
- q, z = q.div(z, z, m)
+ if len(m) != 0 {
+ zz, r = zz.div(r, z, m)
+ zz, r, q, z = q, z, zz, r
}
v <<= 1
@@ -1294,6 +1313,69 @@ func (z nat) expNN(x, y, m nat) nat {
return z.norm()
}
+// expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
+func (z nat) expNNWindowed(x, y, m nat) nat {
+ // zz and r are used to avoid allocating in mul and div as otherwise
+ // the arguments would alias.
+ var zz, r nat
+
+ const n = 4
+ // powers[i] contains x^i.
+ var powers [1 << n]nat
+ powers[0] = natOne
+ powers[1] = x
+ for i := 2; i < 1<<n; i += 2 {
+ p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
+ *p = p.mul(*p2, *p2)
+ zz, r = zz.div(r, *p, m)
+ *p, r = r, *p
+ *p1 = p1.mul(*p, x)
+ zz, r = zz.div(r, *p1, m)
+ *p1, r = r, *p1
+ }
+
+ z = z.setWord(1)
+
+ for i := len(y) - 1; i >= 0; i-- {
+ yi := y[i]
+ for j := 0; j < _W; j += n {
+ if i != len(y)-1 || j != 0 {
+ // Unrolled loop for significant performance
+ // gain. Use go test -bench=".*" in crypto/rsa
+ // to check performance before making changes.
+ zz = zz.mul(z, z)
+ zz, z = z, zz
+ zz, r = zz.div(r, z, m)
+ z, r = r, z
+
+ zz = zz.mul(z, z)
+ zz, z = z, zz
+ zz, r = zz.div(r, z, m)
+ z, r = r, z
+
+ zz = zz.mul(z, z)
+ zz, z = z, zz
+ zz, r = zz.div(r, z, m)
+ z, r = r, z
+
+ zz = zz.mul(z, z)
+ zz, z = z, zz
+ zz, r = zz.div(r, z, m)
+ z, r = r, z
+ }
+
+ zz = zz.mul(z, powers[yi>>(_W-n)])
+ zz, z = z, zz
+ zz, r = zz.div(r, z, m)
+ z, r = r, z
+
+ yi <<= n
+ }
+ }
+
+ return z.norm()
+}
+
// probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
// If it returns true, n is prime with probability 1 - 1/4^reps.
// If it returns false, n is not prime.
@@ -1343,8 +1425,9 @@ func (n nat) probablyPrime(reps int) bool {
}
nm1 := nat(nil).sub(n, natOne)
- // 1<<k * q = nm1;
- q, k := nm1.powersOfTwoDecompose()
+ // determine q, k such that nm1 = q << k
+ k := nm1.trailingZeroBits()
+ q := nat(nil).shr(nm1, k)
nm3 := nat(nil).sub(nm1, natTwo)
rand := rand.New(rand.NewSource(int64(n[0])))
@@ -1360,7 +1443,7 @@ NextRandom:
if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 {
continue
}
- for j := 1; j < k; j++ {
+ for j := uint(1); j < k; j++ {
y = y.mul(y, y)
quotient, y = quotient.div(y, y, n)
if y.cmp(nm1) == 0 {
diff --git a/src/pkg/math/big/nat_test.go b/src/pkg/math/big/nat_test.go
index becde5d17..2dd7bf639 100644
--- a/src/pkg/math/big/nat_test.go
+++ b/src/pkg/math/big/nat_test.go
@@ -6,6 +6,7 @@ package big
import (
"io"
+ "runtime"
"strings"
"testing"
)
@@ -62,6 +63,36 @@ var prodNN = []argNN{
{nat{0, 0, 991 * 991}, nat{0, 991}, nat{0, 991}},
{nat{1 * 991, 2 * 991, 3 * 991, 4 * 991}, nat{1, 2, 3, 4}, nat{991}},
{nat{4, 11, 20, 30, 20, 11, 4}, nat{1, 2, 3, 4}, nat{4, 3, 2, 1}},
+ // 3^100 * 3^28 = 3^128
+ {
+ natFromString("11790184577738583171520872861412518665678211592275841109096961"),
+ natFromString("515377520732011331036461129765621272702107522001"),
+ natFromString("22876792454961"),
+ },
+ // z = 111....1 (70000 digits)
+ // x = 10^(99*700) + ... + 10^1400 + 10^700 + 1
+ // y = 111....1 (700 digits, larger than Karatsuba threshold on 32-bit and 64-bit)
+ {
+ natFromString(strings.Repeat("1", 70000)),
+ natFromString("1" + strings.Repeat(strings.Repeat("0", 699)+"1", 99)),
+ natFromString(strings.Repeat("1", 700)),
+ },
+ // z = 111....1 (20000 digits)
+ // x = 10^10000 + 1
+ // y = 111....1 (10000 digits)
+ {
+ natFromString(strings.Repeat("1", 20000)),
+ natFromString("1" + strings.Repeat("0", 9999) + "1"),
+ natFromString(strings.Repeat("1", 10000)),
+ },
+}
+
+func natFromString(s string) nat {
+ x, _, err := nat(nil).scan(strings.NewReader(s), 0)
+ if err != nil {
+ panic(err)
+ }
+ return x
}
func TestSet(t *testing.T) {
@@ -135,26 +166,43 @@ func TestMulRangeN(t *testing.T) {
}
}
-var mulArg, mulTmp nat
-
-func init() {
- const n = 1000
- mulArg = make(nat, n)
- for i := 0; i < n; i++ {
- mulArg[i] = _M
+// allocBytes returns the number of bytes allocated by invoking f.
+func allocBytes(f func()) uint64 {
+ var stats runtime.MemStats
+ runtime.ReadMemStats(&stats)
+ t := stats.TotalAlloc
+ f()
+ runtime.ReadMemStats(&stats)
+ return stats.TotalAlloc - t
+}
+
+// TestMulUnbalanced tests that multiplying numbers of different lengths
+// does not cause deep recursion and in turn allocate too much memory.
+// Test case for issue 3807.
+func TestMulUnbalanced(t *testing.T) {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+ x := rndNat(50000)
+ y := rndNat(40)
+ allocSize := allocBytes(func() {
+ nat(nil).mul(x, y)
+ })
+ inputSize := uint64(len(x)+len(y)) * _S
+ if ratio := allocSize / uint64(inputSize); ratio > 10 {
+ t.Errorf("multiplication uses too much memory (%d > %d times the size of inputs)", allocSize, ratio)
}
}
-func benchmarkMulLoad() {
- for j := 1; j <= 10; j++ {
- x := mulArg[0 : j*100]
- mulTmp.mul(x, x)
- }
+func rndNat(n int) nat {
+ return nat(rndV(n)).norm()
}
func BenchmarkMul(b *testing.B) {
+ mulx := rndNat(1e4)
+ muly := rndNat(1e4)
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
- benchmarkMulLoad()
+ var z nat
+ z.mul(mulx, muly)
}
}
@@ -362,6 +410,20 @@ func TestScanPi(t *testing.T) {
}
}
+func TestScanPiParallel(t *testing.T) {
+ const n = 2
+ c := make(chan int)
+ for i := 0; i < n; i++ {
+ go func() {
+ TestScanPi(t)
+ c <- 0
+ }()
+ }
+ for i := 0; i < n; i++ {
+ <-c
+ }
+}
+
func BenchmarkScanPi(b *testing.B) {
for i := 0; i < b.N; i++ {
var x nat
@@ -369,6 +431,28 @@ func BenchmarkScanPi(b *testing.B) {
}
}
+func BenchmarkStringPiParallel(b *testing.B) {
+ var x nat
+ x, _, _ = x.scan(strings.NewReader(pi), 0)
+ if x.decimalString() != pi {
+ panic("benchmark incorrect: conversion failed")
+ }
+ n := runtime.GOMAXPROCS(0)
+ m := b.N / n // n*m <= b.N due to flooring, but the error is neglibible (n is not very large)
+ c := make(chan int, n)
+ for i := 0; i < n; i++ {
+ go func() {
+ for j := 0; j < m; j++ {
+ x.decimalString()
+ }
+ c <- 0
+ }()
+ }
+ for i := 0; i < n; i++ {
+ <-c
+ }
+}
+
func BenchmarkScan10Base2(b *testing.B) { ScanHelper(b, 2, 10, 10) }
func BenchmarkScan100Base2(b *testing.B) { ScanHelper(b, 2, 10, 100) }
func BenchmarkScan1000Base2(b *testing.B) { ScanHelper(b, 2, 10, 1000) }
@@ -463,13 +547,13 @@ func BenchmarkLeafSize13(b *testing.B) { LeafSizeHelper(b, 10, 13) }
func BenchmarkLeafSize14(b *testing.B) { LeafSizeHelper(b, 10, 14) }
func BenchmarkLeafSize15(b *testing.B) { LeafSizeHelper(b, 10, 15) }
func BenchmarkLeafSize16(b *testing.B) { LeafSizeHelper(b, 10, 16) }
-func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths
+func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths
func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) }
func LeafSizeHelper(b *testing.B, base Word, size int) {
b.StopTimer()
originalLeafSize := leafSize
- resetTable(cacheBase10[:])
+ resetTable(cacheBase10.table[:])
leafSize = size
b.StartTimer()
@@ -486,7 +570,7 @@ func LeafSizeHelper(b *testing.B, base Word, size int) {
}
b.StopTimer()
- resetTable(cacheBase10[:])
+ resetTable(cacheBase10.table[:])
leafSize = originalLeafSize
b.StartTimer()
}
@@ -616,14 +700,23 @@ func TestModW(t *testing.T) {
}
func TestTrailingZeroBits(t *testing.T) {
- var x Word
- x--
- for i := 0; i < _W; i++ {
- if trailingZeroBits(x) != i {
- t.Errorf("Failed at step %d: x: %x got: %d", i, x, trailingZeroBits(x))
+ x := Word(1)
+ for i := uint(0); i <= _W; i++ {
+ n := trailingZeroBits(x)
+ if n != i%_W {
+ t.Errorf("got trailingZeroBits(%#x) = %d; want %d", x, n, i%_W)
}
x <<= 1
}
+
+ y := nat(nil).set(natOne)
+ for i := uint(0); i <= 3*_W; i++ {
+ n := y.trailingZeroBits()
+ if n != i {
+ t.Errorf("got 0x%s.trailingZeroBits() = %d; want %d", y.string(lowercaseDigits[0:16]), n, i)
+ }
+ y = y.shl(y, 1)
+ }
}
var expNNTests = []struct {
diff --git a/src/pkg/math/big/rat.go b/src/pkg/math/big/rat.go
index 7bd83fc0f..3e6473d92 100644
--- a/src/pkg/math/big/rat.go
+++ b/src/pkg/math/big/rat.go
@@ -10,14 +10,17 @@ import (
"encoding/binary"
"errors"
"fmt"
+ "math"
"strings"
)
// A Rat represents a quotient a/b of arbitrary precision.
// The zero value for a Rat represents the value 0.
type Rat struct {
- a Int
- b nat // len(b) == 0 acts like b == 1
+ // To make zero values for Rat work w/o initialization,
+ // a zero value of b (len(b) == 0) acts like b == 1.
+ // a.neg determines the sign of the Rat, b.neg is ignored.
+ a, b Int
}
// NewRat creates a new Rat with numerator a and denominator b.
@@ -25,6 +28,156 @@ func NewRat(a, b int64) *Rat {
return new(Rat).SetFrac64(a, b)
}
+// SetFloat64 sets z to exactly f and returns z.
+// If f is not finite, SetFloat returns nil.
+func (z *Rat) SetFloat64(f float64) *Rat {
+ const expMask = 1<<11 - 1
+ bits := math.Float64bits(f)
+ mantissa := bits & (1<<52 - 1)
+ exp := int((bits >> 52) & expMask)
+ switch exp {
+ case expMask: // non-finite
+ return nil
+ case 0: // denormal
+ exp -= 1022
+ default: // normal
+ mantissa |= 1 << 52
+ exp -= 1023
+ }
+
+ shift := 52 - exp
+
+ // Optimisation (?): partially pre-normalise.
+ for mantissa&1 == 0 && shift > 0 {
+ mantissa >>= 1
+ shift--
+ }
+
+ z.a.SetUint64(mantissa)
+ z.a.neg = f < 0
+ z.b.Set(intOne)
+ if shift > 0 {
+ z.b.Lsh(&z.b, uint(shift))
+ } else {
+ z.a.Lsh(&z.a, uint(-shift))
+ }
+ return z.norm()
+}
+
+// isFinite reports whether f represents a finite rational value.
+// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
+func isFinite(f float64) bool {
+ return math.Abs(f) <= math.MaxFloat64
+}
+
+// low64 returns the least significant 64 bits of natural number z.
+func low64(z nat) uint64 {
+ if len(z) == 0 {
+ return 0
+ }
+ if _W == 32 && len(z) > 1 {
+ return uint64(z[1])<<32 | uint64(z[0])
+ }
+ return uint64(z[0])
+}
+
+// quotToFloat returns the non-negative IEEE 754 double-precision
+// value nearest to the quotient a/b, using round-to-even in halfway
+// cases. It does not mutate its arguments.
+// Preconditions: b is non-zero; a and b have no common factors.
+func quotToFloat(a, b nat) (f float64, exact bool) {
+ // TODO(adonovan): specialize common degenerate cases: 1.0, integers.
+ alen := a.bitLen()
+ if alen == 0 {
+ return 0, true
+ }
+ blen := b.bitLen()
+ if blen == 0 {
+ panic("division by zero")
+ }
+
+ // 1. Left-shift A or B such that quotient A/B is in [1<<53, 1<<55).
+ // (54 bits if A<B when they are left-aligned, 55 bits if A>=B.)
+ // This is 2 or 3 more than the float64 mantissa field width of 52:
+ // - the optional extra bit is shifted away in step 3 below.
+ // - the high-order 1 is omitted in float64 "normal" representation;
+ // - the low-order 1 will be used during rounding then discarded.
+ exp := alen - blen
+ var a2, b2 nat
+ a2 = a2.set(a)
+ b2 = b2.set(b)
+ if shift := 54 - exp; shift > 0 {
+ a2 = a2.shl(a2, uint(shift))
+ } else if shift < 0 {
+ b2 = b2.shl(b2, uint(-shift))
+ }
+
+ // 2. Compute quotient and remainder (q, r). NB: due to the
+ // extra shift, the low-order bit of q is logically the
+ // high-order bit of r.
+ var q nat
+ q, r := q.div(a2, a2, b2) // (recycle a2)
+ mantissa := low64(q)
+ haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half
+
+ // 3. If quotient didn't fit in 54 bits, re-do division by b2<<1
+ // (in effect---we accomplish this incrementally).
+ if mantissa>>54 == 1 {
+ if mantissa&1 == 1 {
+ haveRem = true
+ }
+ mantissa >>= 1
+ exp++
+ }
+ if mantissa>>53 != 1 {
+ panic("expected exactly 54 bits of result")
+ }
+
+ // 4. Rounding.
+ if -1022-52 <= exp && exp <= -1022 {
+ // Denormal case; lose 'shift' bits of precision.
+ shift := uint64(-1022 - (exp - 1)) // [1..53)
+ lostbits := mantissa & (1<<shift - 1)
+ haveRem = haveRem || lostbits != 0
+ mantissa >>= shift
+ exp = -1023 + 2
+ }
+ // Round q using round-half-to-even.
+ exact = !haveRem
+ if mantissa&1 != 0 {
+ exact = false
+ if haveRem || mantissa&2 != 0 {
+ if mantissa++; mantissa >= 1<<54 {
+ // Complete rollover 11...1 => 100...0, so shift is safe
+ mantissa >>= 1
+ exp++
+ }
+ }
+ }
+ mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 2^53.
+
+ f = math.Ldexp(float64(mantissa), exp-53)
+ if math.IsInf(f, 0) {
+ exact = false
+ }
+ return
+}
+
+// Float64 returns the nearest float64 value to z.
+// If z is exactly representable as a float64, Float64 returns exact=true.
+// If z is negative, so too is f, even if f==0.
+func (z *Rat) Float64() (f float64, exact bool) {
+ b := z.b.abs
+ if len(b) == 0 {
+ b = b.set(natOne) // materialize denominator
+ }
+ f, exact = quotToFloat(z.a.abs, b)
+ if z.a.neg {
+ f = -f
+ }
+ return
+}
+
// SetFrac sets z to a/b and returns z.
func (z *Rat) SetFrac(a, b *Int) *Rat {
z.a.neg = a.neg != b.neg
@@ -36,7 +189,7 @@ func (z *Rat) SetFrac(a, b *Int) *Rat {
babs = nat(nil).set(babs) // make a copy
}
z.a.abs = z.a.abs.set(a.abs)
- z.b = z.b.set(babs)
+ z.b.abs = z.b.abs.set(babs)
return z.norm()
}
@@ -50,21 +203,21 @@ func (z *Rat) SetFrac64(a, b int64) *Rat {
b = -b
z.a.neg = !z.a.neg
}
- z.b = z.b.setUint64(uint64(b))
+ z.b.abs = z.b.abs.setUint64(uint64(b))
return z.norm()
}
// SetInt sets z to x (by making a copy of x) and returns z.
func (z *Rat) SetInt(x *Int) *Rat {
z.a.Set(x)
- z.b = z.b.make(0)
+ z.b.abs = z.b.abs.make(0)
return z
}
// SetInt64 sets z to x and returns z.
func (z *Rat) SetInt64(x int64) *Rat {
z.a.SetInt64(x)
- z.b = z.b.make(0)
+ z.b.abs = z.b.abs.make(0)
return z
}
@@ -72,7 +225,7 @@ func (z *Rat) SetInt64(x int64) *Rat {
func (z *Rat) Set(x *Rat) *Rat {
if z != x {
z.a.Set(&x.a)
- z.b = z.b.set(x.b)
+ z.b.Set(&x.b)
}
return z
}
@@ -97,15 +250,15 @@ func (z *Rat) Inv(x *Rat) *Rat {
panic("division by zero")
}
z.Set(x)
- a := z.b
+ a := z.b.abs
if len(a) == 0 {
- a = a.setWord(1) // materialize numerator
+ a = a.set(natOne) // materialize numerator
}
b := z.a.abs
if b.cmp(natOne) == 0 {
b = b.make(0) // normalize denominator
}
- z.a.abs, z.b = a, b // sign doesn't change
+ z.a.abs, z.b.abs = a, b // sign doesn't change
return z
}
@@ -121,38 +274,26 @@ func (x *Rat) Sign() int {
// IsInt returns true if the denominator of x is 1.
func (x *Rat) IsInt() bool {
- return len(x.b) == 0 || x.b.cmp(natOne) == 0
+ return len(x.b.abs) == 0 || x.b.abs.cmp(natOne) == 0
}
// Num returns the numerator of x; it may be <= 0.
// The result is a reference to x's numerator; it
-// may change if a new value is assigned to x.
+// may change if a new value is assigned to x, and vice versa.
+// The sign of the numerator corresponds to the sign of x.
func (x *Rat) Num() *Int {
return &x.a
}
// Denom returns the denominator of x; it is always > 0.
// The result is a reference to x's denominator; it
-// may change if a new value is assigned to x.
+// may change if a new value is assigned to x, and vice versa.
func (x *Rat) Denom() *Int {
- if len(x.b) == 0 {
- return &Int{abs: nat{1}}
+ x.b.neg = false // the result is always >= 0
+ if len(x.b.abs) == 0 {
+ x.b.abs = x.b.abs.set(natOne) // materialize denominator
}
- return &Int{abs: x.b}
-}
-
-func gcd(x, y nat) nat {
- // Euclidean algorithm.
- var a, b nat
- a = a.set(x)
- b = b.set(y)
- for len(b) != 0 {
- var q, r nat
- _, r = q.div(r, a, b)
- a = b
- b = r
- }
- return a
+ return &x.b
}
func (z *Rat) norm() *Rat {
@@ -160,17 +301,25 @@ func (z *Rat) norm() *Rat {
case len(z.a.abs) == 0:
// z == 0 - normalize sign and denominator
z.a.neg = false
- z.b = z.b.make(0)
- case len(z.b) == 0:
+ z.b.abs = z.b.abs.make(0)
+ case len(z.b.abs) == 0:
// z is normalized int - nothing to do
- case z.b.cmp(natOne) == 0:
+ case z.b.abs.cmp(natOne) == 0:
// z is int - normalize denominator
- z.b = z.b.make(0)
+ z.b.abs = z.b.abs.make(0)
default:
- if f := gcd(z.a.abs, z.b); f.cmp(natOne) != 0 {
- z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f)
- z.b, _ = z.b.div(nil, z.b, f)
+ neg := z.a.neg
+ z.a.neg = false
+ z.b.neg = false
+ if f := NewInt(0).binaryGCD(&z.a, &z.b); f.Cmp(intOne) != 0 {
+ z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs)
+ z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs)
+ if z.b.abs.cmp(natOne) == 0 {
+ // z is int - normalize denominator
+ z.b.abs = z.b.abs.make(0)
+ }
}
+ z.a.neg = neg
}
return z
}
@@ -207,31 +356,31 @@ func scaleDenom(x *Int, f nat) *Int {
// +1 if x > y
//
func (x *Rat) Cmp(y *Rat) int {
- return scaleDenom(&x.a, y.b).Cmp(scaleDenom(&y.a, x.b))
+ return scaleDenom(&x.a, y.b.abs).Cmp(scaleDenom(&y.a, x.b.abs))
}
// Add sets z to the sum x+y and returns z.
func (z *Rat) Add(x, y *Rat) *Rat {
- a1 := scaleDenom(&x.a, y.b)
- a2 := scaleDenom(&y.a, x.b)
+ a1 := scaleDenom(&x.a, y.b.abs)
+ a2 := scaleDenom(&y.a, x.b.abs)
z.a.Add(a1, a2)
- z.b = mulDenom(z.b, x.b, y.b)
+ z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs)
return z.norm()
}
// Sub sets z to the difference x-y and returns z.
func (z *Rat) Sub(x, y *Rat) *Rat {
- a1 := scaleDenom(&x.a, y.b)
- a2 := scaleDenom(&y.a, x.b)
+ a1 := scaleDenom(&x.a, y.b.abs)
+ a2 := scaleDenom(&y.a, x.b.abs)
z.a.Sub(a1, a2)
- z.b = mulDenom(z.b, x.b, y.b)
+ z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs)
return z.norm()
}
// Mul sets z to the product x*y and returns z.
func (z *Rat) Mul(x, y *Rat) *Rat {
z.a.Mul(&x.a, &y.a)
- z.b = mulDenom(z.b, x.b, y.b)
+ z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs)
return z.norm()
}
@@ -241,10 +390,10 @@ func (z *Rat) Quo(x, y *Rat) *Rat {
if len(y.a.abs) == 0 {
panic("division by zero")
}
- a := scaleDenom(&x.a, y.b)
- b := scaleDenom(&y.a, x.b)
+ a := scaleDenom(&x.a, y.b.abs)
+ b := scaleDenom(&y.a, x.b.abs)
z.a.abs = a.abs
- z.b = b.abs
+ z.b.abs = b.abs
z.a.neg = a.neg != b.neg
return z.norm()
}
@@ -286,7 +435,7 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
}
s = s[sep+1:]
var err error
- if z.b, _, err = z.b.scan(strings.NewReader(s), 10); err != nil {
+ if z.b.abs, _, err = z.b.abs.scan(strings.NewReader(s), 10); err != nil {
return nil, false
}
return z.norm(), true
@@ -317,11 +466,11 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
}
powTen := nat(nil).expNN(natTen, exp.abs, nil)
if exp.neg {
- z.b = powTen
+ z.b.abs = powTen
z.norm()
} else {
z.a.abs = z.a.abs.mul(z.a.abs, powTen)
- z.b = z.b.make(0)
+ z.b.abs = z.b.abs.make(0)
}
return z, true
@@ -330,8 +479,8 @@ func (z *Rat) SetString(s string) (*Rat, bool) {
// String returns a string representation of z in the form "a/b" (even if b == 1).
func (x *Rat) String() string {
s := "/1"
- if len(x.b) != 0 {
- s = "/" + x.b.decimalString()
+ if len(x.b.abs) != 0 {
+ s = "/" + x.b.abs.decimalString()
}
return x.a.String() + s
}
@@ -355,9 +504,9 @@ func (x *Rat) FloatString(prec int) string {
}
return s
}
- // x.b != 0
+ // x.b.abs != 0
- q, r := nat(nil).div(nat(nil), x.a.abs, x.b)
+ q, r := nat(nil).div(nat(nil), x.a.abs, x.b.abs)
p := natOne
if prec > 0 {
@@ -365,11 +514,11 @@ func (x *Rat) FloatString(prec int) string {
}
r = r.mul(r, p)
- r, r2 := r.div(nat(nil), r, x.b)
+ r, r2 := r.div(nat(nil), r, x.b.abs)
// see if we need to round up
r2 = r2.add(r2, r2)
- if x.b.cmp(r2) <= 0 {
+ if x.b.abs.cmp(r2) <= 0 {
r = r.add(r, natOne)
if r.cmp(p) >= 0 {
q = nat(nil).add(q, natOne)
@@ -396,8 +545,8 @@ const ratGobVersion byte = 1
// GobEncode implements the gob.GobEncoder interface.
func (x *Rat) GobEncode() ([]byte, error) {
- buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b))*_S) // extra bytes for version and sign bit (1), and numerator length (4)
- i := x.b.bytes(buf)
+ buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4)
+ i := x.b.abs.bytes(buf)
j := x.a.abs.bytes(buf[0:i])
n := i - j
if int(uint32(n)) != n {
@@ -427,6 +576,6 @@ func (z *Rat) GobDecode(buf []byte) error {
i := j + binary.BigEndian.Uint32(buf[j-4:j])
z.a.neg = b&1 != 0
z.a.abs = z.a.abs.setBytes(buf[j:i])
- z.b = z.b.setBytes(buf[i:])
+ z.b.abs = z.b.abs.setBytes(buf[i:])
return nil
}
diff --git a/src/pkg/math/big/rat_test.go b/src/pkg/math/big/rat_test.go
index f7f31ae1a..462dfb723 100644
--- a/src/pkg/math/big/rat_test.go
+++ b/src/pkg/math/big/rat_test.go
@@ -8,6 +8,9 @@ import (
"bytes"
"encoding/gob"
"fmt"
+ "math"
+ "strconv"
+ "strings"
"testing"
)
@@ -387,30 +390,19 @@ func TestRatGobEncoding(t *testing.T) {
var medium bytes.Buffer
enc := gob.NewEncoder(&medium)
dec := gob.NewDecoder(&medium)
- for i, test := range gobEncodingTests {
- for j := 0; j < 4; j++ {
- medium.Reset() // empty buffer for each test case (in case of failures)
- stest := test
- if j&1 != 0 {
- // negative numbers
- stest = "-" + test
- }
- if j%2 != 0 {
- // fractions
- stest = stest + "." + test
- }
- var tx Rat
- tx.SetString(stest)
- if err := enc.Encode(&tx); err != nil {
- t.Errorf("#%d%c: encoding failed: %s", i, 'a'+j, err)
- }
- var rx Rat
- if err := dec.Decode(&rx); err != nil {
- t.Errorf("#%d%c: decoding failed: %s", i, 'a'+j, err)
- }
- if rx.Cmp(&tx) != 0 {
- t.Errorf("#%d%c: transmission failed: got %s want %s", i, 'a'+j, &rx, &tx)
- }
+ for _, test := range encodingTests {
+ medium.Reset() // empty buffer for each test case (in case of failures)
+ var tx Rat
+ tx.SetString(test + ".14159265")
+ if err := enc.Encode(&tx); err != nil {
+ t.Errorf("encoding of %s failed: %s", &tx, err)
+ }
+ var rx Rat
+ if err := dec.Decode(&rx); err != nil {
+ t.Errorf("decoding of %s failed: %s", &tx, err)
+ }
+ if rx.Cmp(&tx) != 0 {
+ t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx)
}
}
}
@@ -454,3 +446,462 @@ func TestIssue2379(t *testing.T) {
t.Errorf("5) got %s want %s", x, q)
}
}
+
+func TestIssue3521(t *testing.T) {
+ a := new(Int)
+ b := new(Int)
+ a.SetString("64375784358435883458348587", 0)
+ b.SetString("4789759874531", 0)
+
+ // 0) a raw zero value has 1 as denominator
+ zero := new(Rat)
+ one := NewInt(1)
+ if zero.Denom().Cmp(one) != 0 {
+ t.Errorf("0) got %s want %s", zero.Denom(), one)
+ }
+
+ // 1a) a zero value remains zero independent of denominator
+ x := new(Rat)
+ x.Denom().Set(new(Int).Neg(b))
+ if x.Cmp(zero) != 0 {
+ t.Errorf("1a) got %s want %s", x, zero)
+ }
+
+ // 1b) a zero value may have a denominator != 0 and != 1
+ x.Num().Set(a)
+ qab := new(Rat).SetFrac(a, b)
+ if x.Cmp(qab) != 0 {
+ t.Errorf("1b) got %s want %s", x, qab)
+ }
+
+ // 2a) an integral value becomes a fraction depending on denominator
+ x.SetFrac64(10, 2)
+ x.Denom().SetInt64(3)
+ q53 := NewRat(5, 3)
+ if x.Cmp(q53) != 0 {
+ t.Errorf("2a) got %s want %s", x, q53)
+ }
+
+ // 2b) an integral value becomes a fraction depending on denominator
+ x = NewRat(10, 2)
+ x.Denom().SetInt64(3)
+ if x.Cmp(q53) != 0 {
+ t.Errorf("2b) got %s want %s", x, q53)
+ }
+
+ // 3) changing the numerator/denominator of a Rat changes the Rat
+ x.SetFrac(a, b)
+ a = x.Num()
+ b = x.Denom()
+ a.SetInt64(5)
+ b.SetInt64(3)
+ if x.Cmp(q53) != 0 {
+ t.Errorf("3) got %s want %s", x, q53)
+ }
+}
+
+// Test inputs to Rat.SetString. The prefix "long:" causes the test
+// to be skipped in --test.short mode. (The threshold is about 500us.)
+var float64inputs = []string{
+ //
+ // Constants plundered from strconv/testfp.txt.
+ //
+
+ // Table 1: Stress Inputs for Conversion to 53-bit Binary, < 1/2 ULP
+ "5e+125",
+ "69e+267",
+ "999e-026",
+ "7861e-034",
+ "75569e-254",
+ "928609e-261",
+ "9210917e+080",
+ "84863171e+114",
+ "653777767e+273",
+ "5232604057e-298",
+ "27235667517e-109",
+ "653532977297e-123",
+ "3142213164987e-294",
+ "46202199371337e-072",
+ "231010996856685e-073",
+ "9324754620109615e+212",
+ "78459735791271921e+049",
+ "272104041512242479e+200",
+ "6802601037806061975e+198",
+ "20505426358836677347e-221",
+ "836168422905420598437e-234",
+ "4891559871276714924261e+222",
+
+ // Table 2: Stress Inputs for Conversion to 53-bit Binary, > 1/2 ULP
+ "9e-265",
+ "85e-037",
+ "623e+100",
+ "3571e+263",
+ "81661e+153",
+ "920657e-023",
+ "4603285e-024",
+ "87575437e-309",
+ "245540327e+122",
+ "6138508175e+120",
+ "83356057653e+193",
+ "619534293513e+124",
+ "2335141086879e+218",
+ "36167929443327e-159",
+ "609610927149051e-255",
+ "3743626360493413e-165",
+ "94080055902682397e-242",
+ "899810892172646163e+283",
+ "7120190517612959703e+120",
+ "25188282901709339043e-252",
+ "308984926168550152811e-052",
+ "6372891218502368041059e+064",
+
+ // Table 14: Stress Inputs for Conversion to 24-bit Binary, <1/2 ULP
+ "5e-20",
+ "67e+14",
+ "985e+15",
+ "7693e-42",
+ "55895e-16",
+ "996622e-44",
+ "7038531e-32",
+ "60419369e-46",
+ "702990899e-20",
+ "6930161142e-48",
+ "25933168707e+13",
+ "596428896559e+20",
+
+ // Table 15: Stress Inputs for Conversion to 24-bit Binary, >1/2 ULP
+ "3e-23",
+ "57e+18",
+ "789e-35",
+ "2539e-18",
+ "76173e+28",
+ "887745e-11",
+ "5382571e-37",
+ "82381273e-35",
+ "750486563e-38",
+ "3752432815e-39",
+ "75224575729e-45",
+ "459926601011e+15",
+
+ //
+ // Constants plundered from strconv/atof_test.go.
+ //
+
+ "0",
+ "1",
+ "+1",
+ "1e23",
+ "1E23",
+ "100000000000000000000000",
+ "1e-100",
+ "123456700",
+ "99999999999999974834176",
+ "100000000000000000000001",
+ "100000000000000008388608",
+ "100000000000000016777215",
+ "100000000000000016777216",
+ "-1",
+ "-0.1",
+ "-0", // NB: exception made for this input
+ "1e-20",
+ "625e-3",
+
+ // largest float64
+ "1.7976931348623157e308",
+ "-1.7976931348623157e308",
+ // next float64 - too large
+ "1.7976931348623159e308",
+ "-1.7976931348623159e308",
+ // the border is ...158079
+ // borderline - okay
+ "1.7976931348623158e308",
+ "-1.7976931348623158e308",
+ // borderline - too large
+ "1.797693134862315808e308",
+ "-1.797693134862315808e308",
+
+ // a little too large
+ "1e308",
+ "2e308",
+ "1e309",
+
+ // way too large
+ "1e310",
+ "-1e310",
+ "1e400",
+ "-1e400",
+ "long:1e400000",
+ "long:-1e400000",
+
+ // denormalized
+ "1e-305",
+ "1e-306",
+ "1e-307",
+ "1e-308",
+ "1e-309",
+ "1e-310",
+ "1e-322",
+ // smallest denormal
+ "5e-324",
+ "4e-324",
+ "3e-324",
+ // too small
+ "2e-324",
+ // way too small
+ "1e-350",
+ "long:1e-400000",
+ // way too small, negative
+ "-1e-350",
+ "long:-1e-400000",
+
+ // try to overflow exponent
+ // [Disabled: too slow and memory-hungry with rationals.]
+ // "1e-4294967296",
+ // "1e+4294967296",
+ // "1e-18446744073709551616",
+ // "1e+18446744073709551616",
+
+ // http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
+ "2.2250738585072012e-308",
+ // http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
+
+ "2.2250738585072011e-308",
+
+ // A very large number (initially wrongly parsed by the fast algorithm).
+ "4.630813248087435e+307",
+
+ // A different kind of very large number.
+ "22.222222222222222",
+ "long:2." + strings.Repeat("2", 4000) + "e+1",
+
+ // Exactly halfway between 1 and math.Nextafter(1, 2).
+ // Round to even (down).
+ "1.00000000000000011102230246251565404236316680908203125",
+ // Slightly lower; still round down.
+ "1.00000000000000011102230246251565404236316680908203124",
+ // Slightly higher; round up.
+ "1.00000000000000011102230246251565404236316680908203126",
+ // Slightly higher, but you have to read all the way to the end.
+ "long:1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1",
+
+ // Smallest denormal, 2^(-1022-52)
+ "4.940656458412465441765687928682213723651e-324",
+ // Half of smallest denormal, 2^(-1022-53)
+ "2.470328229206232720882843964341106861825e-324",
+ // A little more than the exact half of smallest denormal
+ // 2^-1075 + 2^-1100. (Rounds to 1p-1074.)
+ "2.470328302827751011111470718709768633275e-324",
+ // The exact halfway between smallest normal and largest denormal:
+ // 2^-1022 - 2^-1075. (Rounds to 2^-1022.)
+ "2.225073858507201136057409796709131975935e-308",
+
+ "1152921504606846975", // 1<<60 - 1
+ "-1152921504606846975", // -(1<<60 - 1)
+ "1152921504606846977", // 1<<60 + 1
+ "-1152921504606846977", // -(1<<60 + 1)
+
+ "1/3",
+}
+
+func TestFloat64SpecialCases(t *testing.T) {
+ for _, input := range float64inputs {
+ if strings.HasPrefix(input, "long:") {
+ if testing.Short() {
+ continue
+ }
+ input = input[len("long:"):]
+ }
+
+ r, ok := new(Rat).SetString(input)
+ if !ok {
+ t.Errorf("Rat.SetString(%q) failed", input)
+ continue
+ }
+ f, exact := r.Float64()
+
+ // 1. Check string -> Rat -> float64 conversions are
+ // consistent with strconv.ParseFloat.
+ // Skip this check if the input uses "a/b" rational syntax.
+ if !strings.Contains(input, "/") {
+ e, _ := strconv.ParseFloat(input, 64)
+
+ // Careful: negative Rats too small for
+ // float64 become -0, but Rat obviously cannot
+ // preserve the sign from SetString("-0").
+ switch {
+ case math.Float64bits(e) == math.Float64bits(f):
+ // Ok: bitwise equal.
+ case f == 0 && r.Num().BitLen() == 0:
+ // Ok: Rat(0) is equivalent to both +/- float64(0).
+ default:
+ t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta=%g", input, e, e, f, f, f-e)
+ }
+ }
+
+ if !isFinite(f) {
+ continue
+ }
+
+ // 2. Check f is best approximation to r.
+ if !checkIsBestApprox(t, f, r) {
+ // Append context information.
+ t.Errorf("(input was %q)", input)
+ }
+
+ // 3. Check f->R->f roundtrip is non-lossy.
+ checkNonLossyRoundtrip(t, f)
+
+ // 4. Check exactness using slow algorithm.
+ if wasExact := new(Rat).SetFloat64(f).Cmp(r) == 0; wasExact != exact {
+ t.Errorf("Rat.SetString(%q).Float64().exact = %t, want %t", input, exact, wasExact)
+ }
+ }
+}
+
+func TestFloat64Distribution(t *testing.T) {
+ // Generate a distribution of (sign, mantissa, exp) values
+ // broader than the float64 range, and check Rat.Float64()
+ // always picks the closest float64 approximation.
+ var add = []int64{
+ 0,
+ 1,
+ 3,
+ 5,
+ 7,
+ 9,
+ 11,
+ }
+ var winc, einc = uint64(1), int(1) // soak test (~75s on x86-64)
+ if testing.Short() {
+ winc, einc = 10, 500 // quick test (~12ms on x86-64)
+ }
+
+ for _, sign := range "+-" {
+ for _, a := range add {
+ for wid := uint64(0); wid < 60; wid += winc {
+ b := int64(1<<wid + a)
+ if sign == '-' {
+ b = -b
+ }
+ for exp := -1100; exp < 1100; exp += einc {
+ num, den := NewInt(b), NewInt(1)
+ if exp > 0 {
+ num.Lsh(num, uint(exp))
+ } else {
+ den.Lsh(den, uint(-exp))
+ }
+ r := new(Rat).SetFrac(num, den)
+ f, _ := r.Float64()
+
+ if !checkIsBestApprox(t, f, r) {
+ // Append context information.
+ t.Errorf("(input was mantissa %#x, exp %d; f=%g (%b); f~%g; r=%v)",
+ b, exp, f, f, math.Ldexp(float64(b), exp), r)
+ }
+
+ checkNonLossyRoundtrip(t, f)
+ }
+ }
+ }
+ }
+}
+
+// TestFloat64NonFinite checks that SetFloat64 of a non-finite value
+// returns nil.
+func TestSetFloat64NonFinite(t *testing.T) {
+ for _, f := range []float64{math.NaN(), math.Inf(+1), math.Inf(-1)} {
+ var r Rat
+ if r2 := r.SetFloat64(f); r2 != nil {
+ t.Errorf("SetFloat64(%g) was %v, want nil", f, r2)
+ }
+ }
+}
+
+// checkNonLossyRoundtrip checks that a float->Rat->float roundtrip is
+// non-lossy for finite f.
+func checkNonLossyRoundtrip(t *testing.T, f float64) {
+ if !isFinite(f) {
+ return
+ }
+ r := new(Rat).SetFloat64(f)
+ if r == nil {
+ t.Errorf("Rat.SetFloat64(%g (%b)) == nil", f, f)
+ return
+ }
+ f2, exact := r.Float64()
+ if f != f2 || !exact {
+ t.Errorf("Rat.SetFloat64(%g).Float64() = %g (%b), %v, want %g (%b), %v; delta=%b",
+ f, f2, f2, exact, f, f, true, f2-f)
+ }
+}
+
+// delta returns the absolute difference between r and f.
+func delta(r *Rat, f float64) *Rat {
+ d := new(Rat).Sub(r, new(Rat).SetFloat64(f))
+ return d.Abs(d)
+}
+
+// checkIsBestApprox checks that f is the best possible float64
+// approximation of r.
+// Returns true on success.
+func checkIsBestApprox(t *testing.T, f float64, r *Rat) bool {
+ if math.Abs(f) >= math.MaxFloat64 {
+ // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat64).
+ // But we have tests for these special cases.
+ return true
+ }
+
+ // r must be strictly between f0 and f1, the floats bracketing f.
+ f0 := math.Nextafter(f, math.Inf(-1))
+ f1 := math.Nextafter(f, math.Inf(+1))
+
+ // For f to be correct, r must be closer to f than to f0 or f1.
+ df := delta(r, f)
+ df0 := delta(r, f0)
+ df1 := delta(r, f1)
+ if df.Cmp(df0) > 0 {
+ t.Errorf("Rat(%v).Float64() = %g (%b), but previous float64 %g (%b) is closer", r, f, f, f0, f0)
+ return false
+ }
+ if df.Cmp(df1) > 0 {
+ t.Errorf("Rat(%v).Float64() = %g (%b), but next float64 %g (%b) is closer", r, f, f, f1, f1)
+ return false
+ }
+ if df.Cmp(df0) == 0 && !isEven(f) {
+ t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0)
+ return false
+ }
+ if df.Cmp(df1) == 0 && !isEven(f) {
+ t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1)
+ return false
+ }
+ return true
+}
+
+func isEven(f float64) bool { return math.Float64bits(f)&1 == 0 }
+
+func TestIsFinite(t *testing.T) {
+ finites := []float64{
+ 1.0 / 3,
+ 4891559871276714924261e+222,
+ math.MaxFloat64,
+ math.SmallestNonzeroFloat64,
+ -math.MaxFloat64,
+ -math.SmallestNonzeroFloat64,
+ }
+ for _, f := range finites {
+ if !isFinite(f) {
+ t.Errorf("!IsFinite(%g (%b))", f, f)
+ }
+ }
+ nonfinites := []float64{
+ math.NaN(),
+ math.Inf(-1),
+ math.Inf(+1),
+ }
+ for _, f := range nonfinites {
+ if isFinite(f) {
+ t.Errorf("IsFinite(%g, (%b))", f, f)
+ }
+ }
+}
diff --git a/src/pkg/math/bits.go b/src/pkg/math/bits.go
index 1cf60ce7d..0df0b1cc9 100644
--- a/src/pkg/math/bits.go
+++ b/src/pkg/math/bits.go
@@ -5,7 +5,7 @@
package math
const (
- uvnan = 0x7FF0000000000001
+ uvnan = 0x7FF8000000000001
uvinf = 0x7FF0000000000000
uvneginf = 0xFFF0000000000000
mask = 0x7FF
diff --git a/src/pkg/math/cbrt.go b/src/pkg/math/cbrt.go
index 8c43f0afb..272e30923 100644
--- a/src/pkg/math/cbrt.go
+++ b/src/pkg/math/cbrt.go
@@ -12,7 +12,7 @@ package math
(http://www.jstor.org/stable/2006387?seq=9, accessed 11-Feb-2010)
*/
-// Cbrt returns the cube root of its argument.
+// Cbrt returns the cube root of x.
//
// Special cases are:
// Cbrt(±0) = ±0
diff --git a/src/pkg/math/copysign.go b/src/pkg/math/copysign.go
index ee65456a1..719c64b9e 100644
--- a/src/pkg/math/copysign.go
+++ b/src/pkg/math/copysign.go
@@ -4,7 +4,7 @@
package math
-// Copysign(x, y) returns a value with the magnitude
+// Copysign returns a value with the magnitude
// of x and the sign of y.
func Copysign(x, y float64) float64 {
const sign = 1 << 63
diff --git a/src/pkg/math/dim_amd64.s b/src/pkg/math/dim_amd64.s
index c867db553..a1505ce44 100644
--- a/src/pkg/math/dim_amd64.s
+++ b/src/pkg/math/dim_amd64.s
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
#define PosInf 0x7FF0000000000000
-#define NaN 0x7FF0000000000001
+#define NaN 0x7FF8000000000001
#define NegInf 0xFFF0000000000000
// func Dim(x, y float64) float64
diff --git a/src/pkg/math/erf.go b/src/pkg/math/erf.go
index c6f32bdbe..4cd80f80c 100644
--- a/src/pkg/math/erf.go
+++ b/src/pkg/math/erf.go
@@ -179,7 +179,7 @@ const (
sb7 = -2.24409524465858183362e+01 // 0xC03670E242712D62
)
-// Erf(x) returns the error function of x.
+// Erf returns the error function of x.
//
// Special cases are:
// Erf(+Inf) = 1
@@ -256,7 +256,7 @@ func Erf(x float64) float64 {
return 1 - r/x
}
-// Erfc(x) returns the complementary error function of x.
+// Erfc returns the complementary error function of x.
//
// Special cases are:
// Erfc(+Inf) = 0
diff --git a/src/pkg/math/floor_amd64.s b/src/pkg/math/floor_amd64.s
index 9fc49a56f..e72cc3cf9 100644
--- a/src/pkg/math/floor_amd64.s
+++ b/src/pkg/math/floor_amd64.s
@@ -1,12 +1,74 @@
-// Copyright 2011 The Go Authors. All rights reserved.
+// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+#define Big 0x4330000000000000 // 2**52
+
+// func Floor(x float64) float64
TEXT ·Floor(SB),7,$0
- JMP ·floor(SB)
+ MOVQ x+0(FP), AX
+ MOVQ $~(1<<63), DX // sign bit mask
+ ANDQ AX,DX // DX = |x|
+ SUBQ $1,DX
+ MOVQ $(Big - 1), CX // if |x| >= 2**52-1 or IsNaN(x) or |x| == 0, return x
+ CMPQ DX,CX
+ JAE isBig_floor
+ MOVQ AX, X0 // X0 = x
+ CVTTSD2SQ X0, AX
+ CVTSQ2SD AX, X1 // X1 = float(int(x))
+ CMPSD X1, X0, 1 // compare LT; X0 = 0xffffffffffffffff or 0
+ MOVSD $(-1.0), X2
+ ANDPD X2, X0 // if x < float(int(x)) {X0 = -1} else {X0 = 0}
+ ADDSD X1, X0
+ MOVSD X0, r+8(FP)
+ RET
+isBig_floor:
+ MOVQ AX, r+8(FP) // return x
+ RET
+// func Ceil(x float64) float64
TEXT ·Ceil(SB),7,$0
- JMP ·ceil(SB)
+ MOVQ x+0(FP), AX
+ MOVQ $~(1<<63), DX // sign bit mask
+ MOVQ AX, BX // BX = copy of x
+ ANDQ DX, BX // BX = |x|
+ MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x
+ CMPQ BX, CX
+ JAE isBig_ceil
+ MOVQ AX, X0 // X0 = x
+ MOVQ DX, X2 // X2 = sign bit mask
+ CVTTSD2SQ X0, AX
+ ANDNPD X0, X2 // X2 = sign
+ CVTSQ2SD AX, X1 // X1 = float(int(x))
+ CMPSD X1, X0, 2 // compare LE; X0 = 0xffffffffffffffff or 0
+ ORPD X2, X1 // if X1 = 0.0, incorporate sign
+ MOVSD $1.0, X3
+ ANDNPD X3, X0
+ ORPD X2, X0 // if float(int(x)) <= x {X0 = 1} else {X0 = -0}
+ ADDSD X1, X0
+ MOVSD X0, r+8(FP)
+ RET
+isBig_ceil:
+ MOVQ AX, r+8(FP)
+ RET
+// func Trunc(x float64) float64
TEXT ·Trunc(SB),7,$0
- JMP ·trunc(SB)
+ MOVQ x+0(FP), AX
+ MOVQ $~(1<<63), DX // sign bit mask
+ MOVQ AX, BX // BX = copy of x
+ ANDQ DX, BX // BX = |x|
+ MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x
+ CMPQ BX, CX
+ JAE isBig_trunc
+ MOVQ AX, X0
+ MOVQ DX, X2 // X2 = sign bit mask
+ CVTTSD2SQ X0, AX
+ ANDNPD X0, X2 // X2 = sign
+ CVTSQ2SD AX, X0 // X0 = float(int(x))
+ ORPD X2, X0 // if X0 = 0.0, incorporate sign
+ MOVSD X0, r+8(FP)
+ RET
+isBig_trunc:
+ MOVQ AX, r+8(FP) // return x
+ RET
diff --git a/src/pkg/math/frexp_386.s b/src/pkg/math/frexp_386.s
index 177c4b97b..95e50de02 100644
--- a/src/pkg/math/frexp_386.s
+++ b/src/pkg/math/frexp_386.s
@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Frexp(x float64) (f float64, e int)
+// func Frexp(f float64) (frac float64, exp int)
TEXT ·Frexp(SB),7,$0
- FMOVD x+0(FP), F0 // F0=x
+ FMOVD f+0(FP), F0 // F0=f
FXAM
FSTSW AX
SAHF
@@ -12,12 +12,12 @@ TEXT ·Frexp(SB),7,$0
JCS nan_zero_inf
FXTRACT // F0=f (0<=f<1), F1=e
FMULD $(0.5), F0 // F0=f (0.5<=f<1), F1=e
- FMOVDP F0, f+8(FP) // F0=e
+ FMOVDP F0, frac+8(FP) // F0=e
FLD1 // F0=1, F1=e
FADDDP F0, F1 // F0=e+1
- FMOVLP F0, e+16(FP) // (int=int32)
+ FMOVLP F0, exp+16(FP) // (int=int32)
RET
nan_zero_inf:
- FMOVDP F0, f+8(FP) // F0=e
- MOVL $0, e+16(FP) // e=0
+ FMOVDP F0, frac+8(FP) // F0=e
+ MOVL $0, exp+16(FP) // exp=0
RET
diff --git a/src/pkg/math/gamma.go b/src/pkg/math/gamma.go
index 7c6f421ba..164f54f33 100644
--- a/src/pkg/math/gamma.go
+++ b/src/pkg/math/gamma.go
@@ -110,19 +110,26 @@ func stirling(x float64) float64 {
return y
}
-// Gamma(x) returns the Gamma function of x.
+// Gamma returns the Gamma function of x.
//
// Special cases are:
-// Gamma(±Inf) = ±Inf
+// Gamma(+Inf) = +Inf
+// Gamma(+0) = +Inf
+// Gamma(-0) = -Inf
+// Gamma(x) = NaN for integer x < 0
+// Gamma(-Inf) = NaN
// Gamma(NaN) = NaN
-// Large values overflow to +Inf.
-// Zero and negative integer arguments return ±Inf.
func Gamma(x float64) float64 {
const Euler = 0.57721566490153286060651209008240243104215933593992 // A001620
// special cases
switch {
- case IsInf(x, -1) || IsNaN(x):
- return x
+ case isNegInt(x) || IsInf(x, -1) || IsNaN(x):
+ return NaN()
+ case x == 0:
+ if Signbit(x) {
+ return Inf(-1)
+ }
+ return Inf(1)
case x < -170.5674972726612 || x > 171.61447887182298:
return Inf(1)
}
@@ -185,3 +192,11 @@ small:
}
return z / ((1 + Euler*x) * x)
}
+
+func isNegInt(x float64) bool {
+ if x < 0 {
+ _, xf := Modf(x)
+ return xf == 0
+ }
+ return false
+}
diff --git a/src/pkg/math/hypot.go b/src/pkg/math/hypot.go
index df4d3eb70..3846e6d87 100644
--- a/src/pkg/math/hypot.go
+++ b/src/pkg/math/hypot.go
@@ -8,7 +8,7 @@ package math
Hypot -- sqrt(p*p + q*q), but overflows only if the result does.
*/
-// Hypot computes Sqrt(p*p + q*q), taking care to avoid
+// Hypot returns Sqrt(p*p + q*q), taking care to avoid
// unnecessary overflow and underflow.
//
// Special cases are:
diff --git a/src/pkg/math/hypot_386.s b/src/pkg/math/hypot_386.s
index 70ff19a17..51cd90419 100644
--- a/src/pkg/math/hypot_386.s
+++ b/src/pkg/math/hypot_386.s
@@ -2,35 +2,35 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Hypot(x, y float64) float64
+// func Hypot(p, q float64) float64
TEXT ·Hypot(SB),7,$0
// test bits for not-finite
- MOVL xh+4(FP), AX // high word x
+ MOVL p+4(FP), AX // high word p
ANDL $0x7ff00000, AX
CMPL AX, $0x7ff00000
JEQ not_finite
- MOVL yh+12(FP), AX // high word y
+ MOVL q+12(FP), AX // high word q
ANDL $0x7ff00000, AX
CMPL AX, $0x7ff00000
JEQ not_finite
- FMOVD x+0(FP), F0 // F0=x
- FABS // F0=|x|
- FMOVD y+8(FP), F0 // F0=y, F1=|x|
- FABS // F0=|y|, F1=|x|
+ FMOVD p+0(FP), F0 // F0=p
+ FABS // F0=|p|
+ FMOVD q+8(FP), F0 // F0=q, F1=|p|
+ FABS // F0=|q|, F1=|p|
FUCOMI F0, F1 // compare F0 to F1
JCC 2(PC) // jump if F0 >= F1
- FXCHD F0, F1 // F0=|x| (larger), F1=|y| (smaller)
+ FXCHD F0, F1 // F0=|p| (larger), F1=|q| (smaller)
FTST // compare F0 to 0
FSTSW AX
ANDW $0x4000, AX
JNE 10(PC) // jump if F0 = 0
- FXCHD F0, F1 // F0=y (smaller), F1=x (larger)
- FDIVD F1, F0 // F0=y(=y/x), F1=x
- FMULD F0, F0 // F0=y*y, F1=x
- FLD1 // F0=1, F1=y*y, F2=x
- FADDDP F0, F1 // F0=1+y*y, F1=x
- FSQRT // F0=sqrt(1+y*y), F1=x
- FMULDP F0, F1 // F0=x*sqrt(1+y*y)
+ FXCHD F0, F1 // F0=q (smaller), F1=p (larger)
+ FDIVD F1, F0 // F0=q(=q/p), F1=p
+ FMULD F0, F0 // F0=q*q, F1=p
+ FLD1 // F0=1, F1=q*q, F2=p
+ FADDDP F0, F1 // F0=1+q*q, F1=p
+ FSQRT // F0=sqrt(1+q*q), F1=p
+ FMULDP F0, F1 // F0=p*sqrt(1+q*q)
FMOVDP F0, r+16(FP)
RET
FMOVDP F0, F1 // F0=0
@@ -38,20 +38,20 @@ TEXT ·Hypot(SB),7,$0
RET
not_finite:
// test bits for -Inf or +Inf
- MOVL xh+4(FP), AX // high word x
- ORL xl+0(FP), AX // low word x
+ MOVL p+4(FP), AX // high word p
+ ORL p+0(FP), AX // low word p
ANDL $0x7fffffff, AX
CMPL AX, $0x7ff00000
JEQ is_inf
- MOVL yh+12(FP), AX // high word y
- ORL yl+8(FP), AX // low word y
+ MOVL q+12(FP), AX // high word q
+ ORL q+8(FP), AX // low word q
ANDL $0x7fffffff, AX
CMPL AX, $0x7ff00000
JEQ is_inf
- MOVL $0x7ff00000, rh+20(FP) // return NaN = 0x7FF0000000000001
- MOVL $0x00000001, rl+16(FP)
+ MOVL $0x7ff80000, r+20(FP) // return NaN = 0x7FF8000000000001
+ MOVL $0x00000001, r+16(FP)
RET
is_inf:
- MOVL AX, rh+20(FP) // return +Inf = 0x7FF0000000000000
- MOVL $0x00000000, rl+16(FP)
+ MOVL AX, r+20(FP) // return +Inf = 0x7FF0000000000000
+ MOVL $0x00000000, r+16(FP)
RET
diff --git a/src/pkg/math/hypot_amd64.s b/src/pkg/math/hypot_amd64.s
index 1f691e70e..02fff5b92 100644
--- a/src/pkg/math/hypot_amd64.s
+++ b/src/pkg/math/hypot_amd64.s
@@ -2,17 +2,17 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#define PosInf 0x7ff0000000000000
-#define NaN 0x7FF0000000000001
+#define PosInf 0x7FF0000000000000
+#define NaN 0x7FF8000000000001
-// func Hypot(x, y float64) float64
+// func Hypot(p, q float64) float64
TEXT ·Hypot(SB),7,$0
// test bits for special cases
- MOVQ x+0(FP), BX
+ MOVQ p+0(FP), BX
MOVQ $~(1<<63), AX
- ANDQ AX, BX // x = |x|
- MOVQ y+8(FP), CX
- ANDQ AX, CX // y = |y|
+ ANDQ AX, BX // p = |p|
+ MOVQ q+8(FP), CX
+ ANDQ AX, CX // q = |q|
MOVQ $PosInf, AX
CMPQ AX, BX
JLE isInfOrNaN
diff --git a/src/pkg/math/ldexp_386.s b/src/pkg/math/ldexp_386.s
index ed91ffcd3..3a65629d2 100644
--- a/src/pkg/math/ldexp_386.s
+++ b/src/pkg/math/ldexp_386.s
@@ -2,10 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Ldexp(f float64, e int) float64
+// func Ldexp(frac float64, exp int) float64
TEXT ·Ldexp(SB),7,$0
- FMOVL e+8(FP), F0 // F0=e
- FMOVD x+0(FP), F0 // F0=x, F1=e
+ FMOVL exp+8(FP), F0 // F0=exp
+ FMOVD frac+0(FP), F0 // F0=frac, F1=e
FSCALE // F0=x*2**e, F1=e
FMOVDP F0, F1 // F0=x*2**e
FMOVDP F0, r+12(FP)
diff --git a/src/pkg/math/log10.go b/src/pkg/math/log10.go
index 67c163a49..95cfbf47c 100644
--- a/src/pkg/math/log10.go
+++ b/src/pkg/math/log10.go
@@ -17,5 +17,6 @@ func log10(x float64) float64 {
func Log2(x float64) float64
func log2(x float64) float64 {
- return Log(x) * (1 / Ln2)
+ frac, exp := Frexp(x)
+ return Log(frac)*(1/Ln2) + float64(exp)
}
diff --git a/src/pkg/math/log_amd64.s b/src/pkg/math/log_amd64.s
index 79e35907c..75bc55764 100644
--- a/src/pkg/math/log_amd64.s
+++ b/src/pkg/math/log_amd64.s
@@ -12,7 +12,7 @@
#define L5 1.818357216161805012e-01 // 0x3FC7466496CB03DE
#define L6 1.531383769920937332e-01 // 0x3FC39A09D078C69F
#define L7 1.479819860511658591e-01 // 0x3FC2F112DF3E5244
-#define NaN 0x7FF0000000000001
+#define NaN 0x7FF8000000000001
#define NegInf 0xFFF0000000000000
#define PosInf 0x7FF0000000000000
@@ -54,13 +54,13 @@ TEXT ·Log(SB),7,$0
// s := f / (2 + f)
MOVSD $2.0, X0
ADDSD X2, X0
- MOVSD X2, X3
+ MOVAPD X2, X3
DIVSD X0, X3 // x1=k, x2= f, x3= s
// s2 := s * s
- MOVSD X3, X4 // x1= k, x2= f, x3= s
+ MOVAPD X3, X4 // x1= k, x2= f, x3= s
MULSD X4, X4 // x1= k, x2= f, x3= s, x4= s2
// s4 := s2 * s2
- MOVSD X4, X5 // x1= k, x2= f, x3= s, x4= s2
+ MOVAPD X4, X5 // x1= k, x2= f, x3= s, x4= s2
MULSD X5, X5 // x1= k, x2= f, x3= s, x4= s2, x5= s4
// t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7)))
MOVSD $L7, X6
diff --git a/src/pkg/math/logb.go b/src/pkg/math/logb.go
index d32f9f100..f2769d4fd 100644
--- a/src/pkg/math/logb.go
+++ b/src/pkg/math/logb.go
@@ -4,7 +4,7 @@
package math
-// Logb(x) returns the binary exponent of x.
+// Logb returns the binary exponent of x.
//
// Special cases are:
// Logb(±Inf) = +Inf
@@ -23,7 +23,7 @@ func Logb(x float64) float64 {
return float64(ilogb(x))
}
-// Ilogb(x) returns the binary exponent of x as an integer.
+// Ilogb returns the binary exponent of x as an integer.
//
// Special cases are:
// Ilogb(±Inf) = MaxInt32
diff --git a/src/pkg/math/modf_386.s b/src/pkg/math/modf_386.s
index 5ccab9812..f5dc415c3 100644
--- a/src/pkg/math/modf_386.s
+++ b/src/pkg/math/modf_386.s
@@ -2,18 +2,18 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// func Modf(x float64) (int float64, frac float64)
+// func Modf(f float64) (int float64, frac float64)
TEXT ·Modf(SB),7,$0
- FMOVD x+0(FP), F0 // F0=x
- FMOVD F0, F1 // F0=x, F1=x
+ FMOVD f+0(FP), F0 // F0=f
+ FMOVD F0, F1 // F0=f, F1=f
FSTCW -2(SP) // save old Control Word
MOVW -2(SP), AX
ORW $0x0c00, AX // Rounding Control set to truncate
MOVW AX, -4(SP) // store new Control Word
FLDCW -4(SP) // load new Control Word
- FRNDINT // F0=trunc(x), F1=x
+ FRNDINT // F0=trunc(f), F1=f
FLDCW -2(SP) // load old Control Word
- FSUBD F0, F1 // F0=trunc(x), F1=x-trunc(x)
- FMOVDP F0, i+8(FP) // F0=x-trunc(x)
- FMOVDP F0, f+16(FP)
+ FSUBD F0, F1 // F0=trunc(f), F1=f-trunc(f)
+ FMOVDP F0, int+8(FP) // F0=f-trunc(f)
+ FMOVDP F0, frac+16(FP)
RET
diff --git a/src/pkg/math/rand/example_test.go b/src/pkg/math/rand/example_test.go
new file mode 100644
index 000000000..4fe207d85
--- /dev/null
+++ b/src/pkg/math/rand/example_test.go
@@ -0,0 +1,69 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rand_test
+
+import (
+ "fmt"
+ "math/rand"
+ "os"
+ "text/tabwriter"
+)
+
+// This test serves as an example but also makes sure we don't change
+// the output of the random number generator when given a fixed seed.
+
+// This example shows the use of each of the methods on a *Rand.
+// The use of the global functions is the same, without the receiver.
+func Example() {
+ // Create and seed the generator.
+ // Typically a non-fixed seed should be used, such as time.Now().UnixNano().
+ // Using a fixed seed will produce the same output on every run.
+ r := rand.New(rand.NewSource(99))
+
+ // The tabwriter here helps us generate aligned output.
+ w := tabwriter.NewWriter(os.Stdout, 1, 1, 1, ' ', 0)
+ defer w.Flush()
+ show := func(name string, v1, v2, v3 interface{}) {
+ fmt.Fprintf(w, "%s\t%v\t%v\t%v\n", name, v1, v2, v3)
+ }
+
+ // Float32 and Float64 values are in [0, 1).
+ show("Float32", r.Float32(), r.Float32(), r.Float32())
+ show("Float64", r.Float64(), r.Float64(), r.Float64())
+
+ // ExpFloat64 values have an average of 1 but decay exponentially.
+ show("ExpFloat64", r.ExpFloat64(), r.ExpFloat64(), r.ExpFloat64())
+
+ // NormFloat64 values have an average of 0 and a standard deviation of 1.
+ show("NormFloat64", r.NormFloat64(), r.NormFloat64(), r.NormFloat64())
+
+ // Int31, Int63, and Uint32 generate values of the given width.
+ // The Int method (not shown) is like either Int31 or Int63
+ // depending on the size of 'int'.
+ show("Int31", r.Int31(), r.Int31(), r.Int31())
+ show("Int63", r.Int63(), r.Int63(), r.Int63())
+ show("Uint32", r.Int63(), r.Int63(), r.Int63())
+
+ // Intn, Int31n, and Int63n limit their output to be < n.
+ // They do so more carefully than using r.Int()%n.
+ show("Intn(10)", r.Intn(10), r.Intn(10), r.Intn(10))
+ show("Int31n(10)", r.Int31n(10), r.Int31n(10), r.Int31n(10))
+ show("Int63n(10)", r.Int63n(10), r.Int63n(10), r.Int63n(10))
+
+ // Perm generates a random permutation of the numbers [0, n).
+ show("Perm", r.Perm(5), r.Perm(5), r.Perm(5))
+ // Output:
+ // Float32 0.2635776 0.6358173 0.6718283
+ // Float64 0.628605430454327 0.4504798828572669 0.9562755949377957
+ // ExpFloat64 0.3362240648200941 1.4256072328483647 0.24354758816173044
+ // NormFloat64 0.17233959114940064 1.577014951434847 0.04259129641113857
+ // Int31 1501292890 1486668269 182840835
+ // Int63 3546343826724305832 5724354148158589552 5239846799706671610
+ // Uint32 5927547564735367388 637072299495207830 4128311955958246186
+ // Intn(10) 1 2 5
+ // Int31n(10) 4 7 8
+ // Int63n(10) 7 6 3
+ // Perm [1 4 2 3 0] [4 2 1 3 0] [1 2 4 0 3]
+}
diff --git a/src/pkg/math/rand/rand_test.go b/src/pkg/math/rand/rand_test.go
index bbd44e3f8..4d3abdb60 100644
--- a/src/pkg/math/rand/rand_test.go
+++ b/src/pkg/math/rand/rand_test.go
@@ -57,16 +57,13 @@ func (this *statsResults) checkSimilarDistribution(expected *statsResults) error
func getStatsResults(samples []float64) *statsResults {
res := new(statsResults)
- var sum float64
- for i := range samples {
- sum += samples[i]
+ var sum, squaresum float64
+ for _, s := range samples {
+ sum += s
+ squaresum += s * s
}
res.mean = sum / float64(len(samples))
- var devsum float64
- for i := range samples {
- devsum += math.Pow(samples[i]-res.mean, 2)
- }
- res.stddev = math.Sqrt(devsum / float64(len(samples)))
+ res.stddev = math.Sqrt(squaresum/float64(len(samples)) - res.mean*res.mean)
return res
}
diff --git a/src/pkg/math/remainder.go b/src/pkg/math/remainder.go
index 41efd7908..9a4e4154c 100644
--- a/src/pkg/math/remainder.go
+++ b/src/pkg/math/remainder.go
@@ -4,7 +4,7 @@
package math
-// The original C code and the the comment below are from
+// The original C code and the comment below are from
// FreeBSD's /usr/src/lib/msun/src/e_remainder.c and came
// with this notice. The go code is a simplified version of
// the original C.
diff --git a/src/pkg/math/sincos.go b/src/pkg/math/sincos.go
index 730042920..718030319 100644
--- a/src/pkg/math/sincos.go
+++ b/src/pkg/math/sincos.go
@@ -6,7 +6,7 @@ package math
// Coefficients _sin[] and _cos[] are found in pkg/math/sin.go.
-// Sincos(x) returns Sin(x), Cos(x).
+// Sincos returns Sin(x), Cos(x).
//
// Special cases are:
// Sincos(±0) = ±0, 1
diff --git a/src/pkg/math/sincos_386.s b/src/pkg/math/sincos_386.s
index 9dd37a3b7..8f5e0f8d1 100644
--- a/src/pkg/math/sincos_386.s
+++ b/src/pkg/math/sincos_386.s
@@ -9,8 +9,8 @@ TEXT ·Sincos(SB),7,$0
FSTSW AX // AX=status word
ANDW $0x0400, AX
JNE 4(PC) // jump if x outside range
- FMOVDP F0, c+16(FP) // F0=sin(x)
- FMOVDP F0, s+8(FP)
+ FMOVDP F0, cos+16(FP) // F0=sin(x)
+ FMOVDP F0, sin+8(FP)
RET
FLDPI // F0=Pi, F1=x
FADDD F0, F0 // F0=2*Pi, F1=x
@@ -21,6 +21,6 @@ TEXT ·Sincos(SB),7,$0
JNE -3(PC) // jump if reduction incomplete
FMOVDP F0, F1 // F0=reduced_x
FSINCOS // F0=cos(reduced_x), F1=sin(reduced_x)
- FMOVDP F0, c+16(FP) // F0=sin(reduced_x)
- FMOVDP F0, s+8(FP)
+ FMOVDP F0, cos+16(FP) // F0=sin(reduced_x)
+ FMOVDP F0, sin+8(FP)
RET
diff --git a/src/pkg/math/sincos_amd64.s b/src/pkg/math/sincos_amd64.s
index 18c824e51..c9dea0916 100644
--- a/src/pkg/math/sincos_amd64.s
+++ b/src/pkg/math/sincos_amd64.s
@@ -19,7 +19,7 @@
#define PosOne 0x3FF0000000000000
#define PosInf 0x7FF0000000000000
-#define NaN 0x7FF0000000000001
+#define NaN 0x7FF8000000000001
#define PI4A 0.7853981554508209228515625 // pi/4 split into three parts
#define PI4B 0.794662735614792836713604629039764404296875e-8
#define PI4C 0.306161699786838294306516483068750264552437361480769e-16
diff --git a/src/pkg/math/tanh.go b/src/pkg/math/tanh.go
index 03a641b4d..7305be66c 100644
--- a/src/pkg/math/tanh.go
+++ b/src/pkg/math/tanh.go
@@ -4,12 +4,66 @@
package math
-/*
- Floating-point hyperbolic tangent.
+// The original C code, the long comment, and the constants
+// below were from http://netlib.sandia.gov/cephes/cmath/sin.c,
+// available from http://www.netlib.org/cephes/cmath.tgz.
+// The go code is a simplified version of the original C.
+// tanh.c
+//
+// Hyperbolic tangent
+//
+// SYNOPSIS:
+//
+// double x, y, tanh();
+//
+// y = tanh( x );
+//
+// DESCRIPTION:
+//
+// Returns hyperbolic tangent of argument in the range MINLOG to MAXLOG.
+// MAXLOG = 8.8029691931113054295988e+01 = log(2**127)
+// MINLOG = -8.872283911167299960540e+01 = log(2**-128)
+//
+// A rational function is used for |x| < 0.625. The form
+// x + x**3 P(x)/Q(x) of Cody & Waite is employed.
+// Otherwise,
+// tanh(x) = sinh(x)/cosh(x) = 1 - 2/(exp(2x) + 1).
+//
+// ACCURACY:
+//
+// Relative error:
+// arithmetic domain # trials peak rms
+// IEEE -2,2 30000 2.5e-16 5.8e-17
+//
+// Cephes Math Library Release 2.8: June, 2000
+// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier
+//
+// The readme file at http://netlib.sandia.gov/cephes/ says:
+// Some software in this archive may be from the book _Methods and
+// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
+// International, 1989) or from the Cephes Mathematical Library, a
+// commercial product. In either event, it is copyrighted by the author.
+// What you see here may be used freely but it comes with no support or
+// guarantee.
+//
+// The two known misprints in the book are repaired here in the
+// source listings for the gamma function and the incomplete beta
+// integral.
+//
+// Stephen L. Moshier
+// moshier@na-net.ornl.gov
+//
- Sinh and Cosh are called except for large arguments, which
- would cause overflow improperly.
-*/
+var tanhP = [...]float64{
+ -9.64399179425052238628E-1,
+ -9.92877231001918586564E1,
+ -1.61468768441708447952E3,
+}
+var tanhQ = [...]float64{
+ 1.12811678491632931402E2,
+ 2.23548839060100448583E3,
+ 4.84406305325125486048E3,
+}
// Tanh computes the hyperbolic tangent of x.
//
@@ -18,15 +72,26 @@ package math
// Tanh(±Inf) = ±1
// Tanh(NaN) = NaN
func Tanh(x float64) float64 {
- if x < 0 {
- x = -x
- if x > 21 {
+ const MAXLOG = 8.8029691931113054295988e+01 // log(2**127)
+ z := Abs(x)
+ switch {
+ case z > 0.5*MAXLOG:
+ if x < 0 {
return -1
}
- return -Sinh(x) / Cosh(x)
- }
- if x > 21 {
return 1
+ case z >= 0.625:
+ s := Exp(2 * z)
+ z = 1 - 2/(s+1)
+ if x < 0 {
+ z = -z
+ }
+ default:
+ if x == 0 {
+ return x
+ }
+ s := x * x
+ z = x + x*s*((tanhP[0]*s+tanhP[1])*s+tanhP[2])/(((s+tanhQ[0])*s+tanhQ[1])*s+tanhQ[2])
}
- return Sinh(x) / Cosh(x)
+ return z
}