diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 12:50:40 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 12:50:40 -0700 |
commit | 2a0db60599fdd75b1bc3e297180fbe1282763759 (patch) | |
tree | 68d43c3e30d9ab961ddf6b7365201ca6b675b253 /src/math | |
parent | ef33cba3c8de6c431df56503df51fcd3a473c89e (diff) | |
parent | f154da9e12608589e8d5f0508f908a0c3e88a1bb (diff) | |
download | golang-2a0db60599fdd75b1bc3e297180fbe1282763759.tar.gz |
Merge tag 'upstream/1.4' into debian-experimental
* tag 'upstream/1.4':
Imported Upstream version 1.4
Diffstat (limited to 'src/math')
172 files changed, 22094 insertions, 0 deletions
diff --git a/src/math/abs.go b/src/math/abs.go new file mode 100644 index 000000000..bc41a6d6b --- /dev/null +++ b/src/math/abs.go @@ -0,0 +1,22 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Abs returns the absolute value of x. +// +// Special cases are: +// Abs(±Inf) = +Inf +// Abs(NaN) = NaN +func Abs(x float64) float64 + +func abs(x float64) float64 { + switch { + case x < 0: + return -x + case x == 0: + return 0 // return correctly abs(-0) + } + return x +} diff --git a/src/math/abs_386.s b/src/math/abs_386.s new file mode 100644 index 000000000..f30a439c2 --- /dev/null +++ b/src/math/abs_386.s @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Abs(x float64) float64 +TEXT ·Abs(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FABS // F0=|x| + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/abs_amd64.s b/src/math/abs_amd64.s new file mode 100644 index 000000000..0424eb5fa --- /dev/null +++ b/src/math/abs_amd64.s @@ -0,0 +1,14 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Abs(x float64) float64 +TEXT ·Abs(SB),NOSPLIT,$0 + MOVQ $(1<<63), BX + MOVQ BX, X0 // movsd $(-0.0), x0 + MOVSD x+0(FP), X1 + ANDNPD X1, X0 + MOVSD X0, ret+8(FP) + RET diff --git a/src/math/abs_amd64p32.s b/src/math/abs_amd64p32.s new file mode 100644 index 000000000..08c8c6b33 --- /dev/null +++ b/src/math/abs_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "abs_amd64.s" diff --git a/src/math/abs_arm.s b/src/math/abs_arm.s new file mode 100644 index 000000000..bfa77eb49 --- /dev/null +++ b/src/math/abs_arm.s @@ -0,0 +1,13 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Abs(SB),NOSPLIT,$0 + MOVW x_lo+0(FP), R0 + MOVW x_hi+4(FP), R1 + AND $((1<<31)-1), R1 + MOVW R0, ret_lo+8(FP) + MOVW R1, ret_hi+12(FP) + RET diff --git a/src/math/acosh.go b/src/math/acosh.go new file mode 100644 index 000000000..e394008b0 --- /dev/null +++ b/src/math/acosh.go @@ -0,0 +1,60 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_acosh.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// __ieee754_acosh(x) +// Method : +// Based on +// acosh(x) = log [ x + sqrt(x*x-1) ] +// we have +// acosh(x) := log(x)+ln2, if x is large; else +// acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else +// acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1. +// +// Special cases: +// acosh(x) is NaN with signal if x<1. +// acosh(NaN) is NaN without signal. +// + +// Acosh returns the inverse hyperbolic cosine of x. +// +// Special cases are: +// Acosh(+Inf) = +Inf +// Acosh(x) = NaN if x < 1 +// Acosh(NaN) = NaN +func Acosh(x float64) float64 { + const ( + Ln2 = 6.93147180559945286227e-01 // 0x3FE62E42FEFA39EF + Large = 1 << 28 // 2**28 + ) + // first case is special case + switch { + case x < 1 || IsNaN(x): + return NaN() + case x == 1: + return 0 + case x >= Large: + return Log(x) + Ln2 // x > 2**28 + case x > 2: + return Log(2*x - 1/(x+Sqrt(x*x-1))) // 2**28 > x > 2 + } + t := x - 1 + return Log1p(t + Sqrt(2*t+t*t)) // 2 >= x > 1 +} diff --git a/src/math/all_test.go b/src/math/all_test.go new file mode 100644 index 000000000..763efb2e6 --- /dev/null +++ b/src/math/all_test.go @@ -0,0 +1,2992 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math_test + +import ( + "fmt" + . "math" + "testing" +) + +var vf = []float64{ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + -8.6859247685756013e+00, +} + +// The expected results below were computed by the high precision calculators +// at http://keisan.casio.com/. More exact input values (array vf[], above) +// were obtained by printing them with "%.26f". The answers were calculated +// to 26 digits (by using the "Digit number" drop-down control of each +// calculator). +var acos = []float64{ + 1.0496193546107222142571536e+00, + 6.8584012813664425171660692e-01, + 1.5984878714577160325521819e+00, + 2.0956199361475859327461799e+00, + 2.7053008467824138592616927e-01, + 1.2738121680361776018155625e+00, + 1.0205369421140629186287407e+00, + 1.2945003481781246062157835e+00, + 1.3872364345374451433846657e+00, + 2.6231510803970463967294145e+00, +} +var acosh = []float64{ + 2.4743347004159012494457618e+00, + 2.8576385344292769649802701e+00, + 7.2796961502981066190593175e-01, + 2.4796794418831451156471977e+00, + 3.0552020742306061857212962e+00, + 2.044238592688586588942468e+00, + 2.5158701513104513595766636e+00, + 1.99050839282411638174299e+00, + 1.6988625798424034227205445e+00, + 2.9611454842470387925531875e+00, +} +var asin = []float64{ + 5.2117697218417440497416805e-01, + 8.8495619865825236751471477e-01, + -02.769154466281941332086016e-02, + -5.2482360935268931351485822e-01, + 1.3002662421166552333051524e+00, + 2.9698415875871901741575922e-01, + 5.5025938468083370060258102e-01, + 2.7629597861677201301553823e-01, + 1.83559892257451475846656e-01, + -1.0523547536021497774980928e+00, +} +var asinh = []float64{ + 2.3083139124923523427628243e+00, + 2.743551594301593620039021e+00, + -2.7345908534880091229413487e-01, + -2.3145157644718338650499085e+00, + 2.9613652154015058521951083e+00, + 1.7949041616585821933067568e+00, + 2.3564032905983506405561554e+00, + 1.7287118790768438878045346e+00, + 1.3626658083714826013073193e+00, + -2.8581483626513914445234004e+00, +} +var atan = []float64{ + 1.372590262129621651920085e+00, + 1.442290609645298083020664e+00, + -2.7011324359471758245192595e-01, + -1.3738077684543379452781531e+00, + 1.4673921193587666049154681e+00, + 1.2415173565870168649117764e+00, + 1.3818396865615168979966498e+00, + 1.2194305844639670701091426e+00, + 1.0696031952318783760193244e+00, + -1.4561721938838084990898679e+00, +} +var atanh = []float64{ + 5.4651163712251938116878204e-01, + 1.0299474112843111224914709e+00, + -2.7695084420740135145234906e-02, + -5.5072096119207195480202529e-01, + 1.9943940993171843235906642e+00, + 3.01448604578089708203017e-01, + 5.8033427206942188834370595e-01, + 2.7987997499441511013958297e-01, + 1.8459947964298794318714228e-01, + -1.3273186910532645867272502e+00, +} +var atan2 = []float64{ + 1.1088291730037004444527075e+00, + 9.1218183188715804018797795e-01, + 1.5984772603216203736068915e+00, + 2.0352918654092086637227327e+00, + 8.0391819139044720267356014e-01, + 1.2861075249894661588866752e+00, + 1.0889904479131695712182587e+00, + 1.3044821793397925293797357e+00, + 1.3902530903455392306872261e+00, + 2.2859857424479142655411058e+00, +} +var cbrt = []float64{ + 1.7075799841925094446722675e+00, + 1.9779982212970353936691498e+00, + -6.5177429017779910853339447e-01, + -1.7111838886544019873338113e+00, + 2.1279920909827937423960472e+00, + 1.4303536770460741452312367e+00, + 1.7357021059106154902341052e+00, + 1.3972633462554328350552916e+00, + 1.2221149580905388454977636e+00, + -2.0556003730500069110343596e+00, +} +var ceil = []float64{ + 5.0000000000000000e+00, + 8.0000000000000000e+00, + 0.0000000000000000e+00, + -5.0000000000000000e+00, + 1.0000000000000000e+01, + 3.0000000000000000e+00, + 6.0000000000000000e+00, + 3.0000000000000000e+00, + 2.0000000000000000e+00, + -8.0000000000000000e+00, +} +var copysign = []float64{ + -4.9790119248836735e+00, + -7.7388724745781045e+00, + -2.7688005719200159e-01, + -5.0106036182710749e+00, + -9.6362937071984173e+00, + -2.9263772392439646e+00, + -5.2290834314593066e+00, + -2.7279399104360102e+00, + -1.8253080916808550e+00, + -8.6859247685756013e+00, +} +var cos = []float64{ + 2.634752140995199110787593e-01, + 1.148551260848219865642039e-01, + 9.6191297325640768154550453e-01, + 2.938141150061714816890637e-01, + -9.777138189897924126294461e-01, + -9.7693041344303219127199518e-01, + 4.940088096948647263961162e-01, + -9.1565869021018925545016502e-01, + -2.517729313893103197176091e-01, + -7.39241351595676573201918e-01, +} + +// Results for 100000 * Pi + vf[i] +var cosLarge = []float64{ + 2.634752141185559426744e-01, + 1.14855126055543100712e-01, + 9.61912973266488928113e-01, + 2.9381411499556122552e-01, + -9.777138189880161924641e-01, + -9.76930413445147608049e-01, + 4.940088097314976789841e-01, + -9.15658690217517835002e-01, + -2.51772931436786954751e-01, + -7.3924135157173099849e-01, +} +var cosh = []float64{ + 7.2668796942212842775517446e+01, + 1.1479413465659254502011135e+03, + 1.0385767908766418550935495e+00, + 7.5000957789658051428857788e+01, + 7.655246669605357888468613e+03, + 9.3567491758321272072888257e+00, + 9.331351599270605471131735e+01, + 7.6833430994624643209296404e+00, + 3.1829371625150718153881164e+00, + 2.9595059261916188501640911e+03, +} +var erf = []float64{ + 5.1865354817738701906913566e-01, + 7.2623875834137295116929844e-01, + -3.123458688281309990629839e-02, + -5.2143121110253302920437013e-01, + 8.2704742671312902508629582e-01, + 3.2101767558376376743993945e-01, + 5.403990312223245516066252e-01, + 3.0034702916738588551174831e-01, + 2.0369924417882241241559589e-01, + -7.8069386968009226729944677e-01, +} +var erfc = []float64{ + 4.8134645182261298093086434e-01, + 2.7376124165862704883070156e-01, + 1.0312345868828130999062984e+00, + 1.5214312111025330292043701e+00, + 1.7295257328687097491370418e-01, + 6.7898232441623623256006055e-01, + 4.596009687776754483933748e-01, + 6.9965297083261411448825169e-01, + 7.9630075582117758758440411e-01, + 1.7806938696800922672994468e+00, +} +var exp = []float64{ + 1.4533071302642137507696589e+02, + 2.2958822575694449002537581e+03, + 7.5814542574851666582042306e-01, + 6.6668778421791005061482264e-03, + 1.5310493273896033740861206e+04, + 1.8659907517999328638667732e+01, + 1.8662167355098714543942057e+02, + 1.5301332413189378961665788e+01, + 6.2047063430646876349125085e+00, + 1.6894712385826521111610438e-04, +} +var expm1 = []float64{ + 5.105047796122957327384770212e-02, + 8.046199708567344080562675439e-02, + -2.764970978891639815187418703e-03, + -4.8871434888875355394330300273e-02, + 1.0115864277221467777117227494e-01, + 2.969616407795910726014621657e-02, + 5.368214487944892300914037972e-02, + 2.765488851131274068067445335e-02, + 1.842068661871398836913874273e-02, + -8.3193870863553801814961137573e-02, +} +var exp2 = []float64{ + 3.1537839463286288034313104e+01, + 2.1361549283756232296144849e+02, + 8.2537402562185562902577219e-01, + 3.1021158628740294833424229e-02, + 7.9581744110252191462569661e+02, + 7.6019905892596359262696423e+00, + 3.7506882048388096973183084e+01, + 6.6250893439173561733216375e+00, + 3.5438267900243941544605339e+00, + 2.4281533133513300984289196e-03, +} +var fabs = []float64{ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + 2.7688005719200159e-01, + 5.0106036182710749e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + 8.6859247685756013e+00, +} +var fdim = []float64{ + 4.9790119248836735e+00, + 7.7388724745781045e+00, + 0.0000000000000000e+00, + 0.0000000000000000e+00, + 9.6362937071984173e+00, + 2.9263772392439646e+00, + 5.2290834314593066e+00, + 2.7279399104360102e+00, + 1.8253080916808550e+00, + 0.0000000000000000e+00, +} +var floor = []float64{ + 4.0000000000000000e+00, + 7.0000000000000000e+00, + -1.0000000000000000e+00, + -6.0000000000000000e+00, + 9.0000000000000000e+00, + 2.0000000000000000e+00, + 5.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + -9.0000000000000000e+00, +} +var fmod = []float64{ + 4.197615023265299782906368e-02, + 2.261127525421895434476482e+00, + 3.231794108794261433104108e-02, + 4.989396381728925078391512e+00, + 3.637062928015826201999516e-01, + 1.220868282268106064236690e+00, + 4.770916568540693347699744e+00, + 1.816180268691969246219742e+00, + 8.734595415957246977711748e-01, + 1.314075231424398637614104e+00, +} + +type fi struct { + f float64 + i int +} + +var frexp = []fi{ + {6.2237649061045918750e-01, 3}, + {9.6735905932226306250e-01, 3}, + {-5.5376011438400318000e-01, -1}, + {-6.2632545228388436250e-01, 3}, + {6.02268356699901081250e-01, 4}, + {7.3159430981099115000e-01, 2}, + {6.5363542893241332500e-01, 3}, + {6.8198497760900255000e-01, 2}, + {9.1265404584042750000e-01, 1}, + {-5.4287029803597508250e-01, 4}, +} +var gamma = []float64{ + 2.3254348370739963835386613898e+01, + 2.991153837155317076427529816e+03, + -4.561154336726758060575129109e+00, + 7.719403468842639065959210984e-01, + 1.6111876618855418534325755566e+05, + 1.8706575145216421164173224946e+00, + 3.4082787447257502836734201635e+01, + 1.579733951448952054898583387e+00, + 9.3834586598354592860187267089e-01, + -2.093995902923148389186189429e-05, +} +var j0 = []float64{ + -1.8444682230601672018219338e-01, + 2.27353668906331975435892e-01, + 9.809259936157051116270273e-01, + -1.741170131426226587841181e-01, + -2.1389448451144143352039069e-01, + -2.340905848928038763337414e-01, + -1.0029099691890912094586326e-01, + -1.5466726714884328135358907e-01, + 3.252650187653420388714693e-01, + -8.72218484409407250005360235e-03, +} +var j1 = []float64{ + -3.251526395295203422162967e-01, + 1.893581711430515718062564e-01, + -1.3711761352467242914491514e-01, + 3.287486536269617297529617e-01, + 1.3133899188830978473849215e-01, + 3.660243417832986825301766e-01, + -3.4436769271848174665420672e-01, + 4.329481396640773768835036e-01, + 5.8181350531954794639333955e-01, + -2.7030574577733036112996607e-01, +} +var j2 = []float64{ + 5.3837518920137802565192769e-02, + -1.7841678003393207281244667e-01, + 9.521746934916464142495821e-03, + 4.28958355470987397983072e-02, + 2.4115371837854494725492872e-01, + 4.842458532394520316844449e-01, + -3.142145220618633390125946e-02, + 4.720849184745124761189957e-01, + 3.122312022520957042957497e-01, + 7.096213118930231185707277e-02, +} +var jM3 = []float64{ + -3.684042080996403091021151e-01, + 2.8157665936340887268092661e-01, + 4.401005480841948348343589e-04, + 3.629926999056814081597135e-01, + 3.123672198825455192489266e-02, + -2.958805510589623607540455e-01, + -3.2033177696533233403289416e-01, + -2.592737332129663376736604e-01, + -1.0241334641061485092351251e-01, + -2.3762660886100206491674503e-01, +} +var lgamma = []fi{ + {3.146492141244545774319734e+00, 1}, + {8.003414490659126375852113e+00, 1}, + {1.517575735509779707488106e+00, -1}, + {-2.588480028182145853558748e-01, 1}, + {1.1989897050205555002007985e+01, 1}, + {6.262899811091257519386906e-01, 1}, + {3.5287924899091566764846037e+00, 1}, + {4.5725644770161182299423372e-01, 1}, + {-6.363667087767961257654854e-02, 1}, + {-1.077385130910300066425564e+01, -1}, +} +var log = []float64{ + 1.605231462693062999102599e+00, + 2.0462560018708770653153909e+00, + -1.2841708730962657801275038e+00, + 1.6115563905281545116286206e+00, + 2.2655365644872016636317461e+00, + 1.0737652208918379856272735e+00, + 1.6542360106073546632707956e+00, + 1.0035467127723465801264487e+00, + 6.0174879014578057187016475e-01, + 2.161703872847352815363655e+00, +} +var logb = []float64{ + 2.0000000000000000e+00, + 2.0000000000000000e+00, + -2.0000000000000000e+00, + 2.0000000000000000e+00, + 3.0000000000000000e+00, + 1.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + 0.0000000000000000e+00, + 3.0000000000000000e+00, +} +var log10 = []float64{ + 6.9714316642508290997617083e-01, + 8.886776901739320576279124e-01, + -5.5770832400658929815908236e-01, + 6.998900476822994346229723e-01, + 9.8391002850684232013281033e-01, + 4.6633031029295153334285302e-01, + 7.1842557117242328821552533e-01, + 4.3583479968917773161304553e-01, + 2.6133617905227038228626834e-01, + 9.3881606348649405716214241e-01, +} +var log1p = []float64{ + 4.8590257759797794104158205e-02, + 7.4540265965225865330849141e-02, + -2.7726407903942672823234024e-03, + -5.1404917651627649094953380e-02, + 9.1998280672258624681335010e-02, + 2.8843762576593352865894824e-02, + 5.0969534581863707268992645e-02, + 2.6913947602193238458458594e-02, + 1.8088493239630770262045333e-02, + -9.0865245631588989681559268e-02, +} +var log2 = []float64{ + 2.3158594707062190618898251e+00, + 2.9521233862883917703341018e+00, + -1.8526669502700329984917062e+00, + 2.3249844127278861543568029e+00, + 3.268478366538305087466309e+00, + 1.5491157592596970278166492e+00, + 2.3865580889631732407886495e+00, + 1.447811865817085365540347e+00, + 8.6813999540425116282815557e-01, + 3.118679457227342224364709e+00, +} +var modf = [][2]float64{ + {4.0000000000000000e+00, 9.7901192488367350108546816e-01}, + {7.0000000000000000e+00, 7.3887247457810456552351752e-01}, + {0.0000000000000000e+00, -2.7688005719200159404635997e-01}, + {-5.0000000000000000e+00, -1.060361827107492160848778e-02}, + {9.0000000000000000e+00, 6.3629370719841737980004837e-01}, + {2.0000000000000000e+00, 9.2637723924396464525443662e-01}, + {5.0000000000000000e+00, 2.2908343145930665230025625e-01}, + {2.0000000000000000e+00, 7.2793991043601025126008608e-01}, + {1.0000000000000000e+00, 8.2530809168085506044576505e-01}, + {-8.0000000000000000e+00, -6.8592476857560136238589621e-01}, +} +var nextafter32 = []float32{ + 4.979012489318848e+00, + 7.738873004913330e+00, + -2.768800258636475e-01, + -5.010602951049805e+00, + 9.636294364929199e+00, + 2.926377534866333e+00, + 5.229084014892578e+00, + 2.727940082550049e+00, + 1.825308203697205e+00, + -8.685923576354980e+00, +} +var nextafter64 = []float64{ + 4.97901192488367438926388786e+00, + 7.73887247457810545370193722e+00, + -2.7688005719200153853520874e-01, + -5.01060361827107403343006808e+00, + 9.63629370719841915615688777e+00, + 2.92637723924396508934364647e+00, + 5.22908343145930754047867595e+00, + 2.72793991043601069534929593e+00, + 1.82530809168085528249036997e+00, + -8.68592476857559958602905681e+00, +} +var pow = []float64{ + 9.5282232631648411840742957e+04, + 5.4811599352999901232411871e+07, + 5.2859121715894396531132279e-01, + 9.7587991957286474464259698e-06, + 4.328064329346044846740467e+09, + 8.4406761805034547437659092e+02, + 1.6946633276191194947742146e+05, + 5.3449040147551939075312879e+02, + 6.688182138451414936380374e+01, + 2.0609869004248742886827439e-09, +} +var remainder = []float64{ + 4.197615023265299782906368e-02, + 2.261127525421895434476482e+00, + 3.231794108794261433104108e-02, + -2.120723654214984321697556e-02, + 3.637062928015826201999516e-01, + 1.220868282268106064236690e+00, + -4.581668629186133046005125e-01, + -9.117596417440410050403443e-01, + 8.734595415957246977711748e-01, + 1.314075231424398637614104e+00, +} +var signbit = []bool{ + false, + false, + true, + true, + false, + false, + false, + false, + false, + true, +} +var sin = []float64{ + -9.6466616586009283766724726e-01, + 9.9338225271646545763467022e-01, + -2.7335587039794393342449301e-01, + 9.5586257685042792878173752e-01, + -2.099421066779969164496634e-01, + 2.135578780799860532750616e-01, + -8.694568971167362743327708e-01, + 4.019566681155577786649878e-01, + 9.6778633541687993721617774e-01, + -6.734405869050344734943028e-01, +} + +// Results for 100000 * Pi + vf[i] +var sinLarge = []float64{ + -9.646661658548936063912e-01, + 9.933822527198506903752e-01, + -2.7335587036246899796e-01, + 9.55862576853689321268e-01, + -2.099421066862688873691e-01, + 2.13557878070308981163e-01, + -8.694568970959221300497e-01, + 4.01956668098863248917e-01, + 9.67786335404528727927e-01, + -6.7344058693131973066e-01, +} +var sinh = []float64{ + 7.2661916084208532301448439e+01, + 1.1479409110035194500526446e+03, + -2.8043136512812518927312641e-01, + -7.499429091181587232835164e+01, + 7.6552466042906758523925934e+03, + 9.3031583421672014313789064e+00, + 9.330815755828109072810322e+01, + 7.6179893137269146407361477e+00, + 3.021769180549615819524392e+00, + -2.95950575724449499189888e+03, +} +var sqrt = []float64{ + 2.2313699659365484748756904e+00, + 2.7818829009464263511285458e+00, + 5.2619393496314796848143251e-01, + 2.2384377628763938724244104e+00, + 3.1042380236055381099288487e+00, + 1.7106657298385224403917771e+00, + 2.286718922705479046148059e+00, + 1.6516476350711159636222979e+00, + 1.3510396336454586262419247e+00, + 2.9471892997524949215723329e+00, +} +var tan = []float64{ + -3.661316565040227801781974e+00, + 8.64900232648597589369854e+00, + -2.8417941955033612725238097e-01, + 3.253290185974728640827156e+00, + 2.147275640380293804770778e-01, + -2.18600910711067004921551e-01, + -1.760002817872367935518928e+00, + -4.389808914752818126249079e-01, + -3.843885560201130679995041e+00, + 9.10988793377685105753416e-01, +} + +// Results for 100000 * Pi + vf[i] +var tanLarge = []float64{ + -3.66131656475596512705e+00, + 8.6490023287202547927e+00, + -2.841794195104782406e-01, + 3.2532901861033120983e+00, + 2.14727564046880001365e-01, + -2.18600910700688062874e-01, + -1.760002817699722747043e+00, + -4.38980891453536115952e-01, + -3.84388555942723509071e+00, + 9.1098879344275101051e-01, +} +var tanh = []float64{ + 9.9990531206936338549262119e-01, + 9.9999962057085294197613294e-01, + -2.7001505097318677233756845e-01, + -9.9991110943061718603541401e-01, + 9.9999999146798465745022007e-01, + 9.9427249436125236705001048e-01, + 9.9994257600983138572705076e-01, + 9.9149409509772875982054701e-01, + 9.4936501296239685514466577e-01, + -9.9999994291374030946055701e-01, +} +var trunc = []float64{ + 4.0000000000000000e+00, + 7.0000000000000000e+00, + -0.0000000000000000e+00, + -5.0000000000000000e+00, + 9.0000000000000000e+00, + 2.0000000000000000e+00, + 5.0000000000000000e+00, + 2.0000000000000000e+00, + 1.0000000000000000e+00, + -8.0000000000000000e+00, +} +var y0 = []float64{ + -3.053399153780788357534855e-01, + 1.7437227649515231515503649e-01, + -8.6221781263678836910392572e-01, + -3.100664880987498407872839e-01, + 1.422200649300982280645377e-01, + 4.000004067997901144239363e-01, + -3.3340749753099352392332536e-01, + 4.5399790746668954555205502e-01, + 4.8290004112497761007536522e-01, + 2.7036697826604756229601611e-01, +} +var y1 = []float64{ + 0.15494213737457922210218611, + -0.2165955142081145245075746, + -2.4644949631241895201032829, + 0.1442740489541836405154505, + 0.2215379960518984777080163, + 0.3038800915160754150565448, + 0.0691107642452362383808547, + 0.2380116417809914424860165, + -0.20849492979459761009678934, + 0.0242503179793232308250804, +} +var y2 = []float64{ + 0.3675780219390303613394936, + -0.23034826393250119879267257, + -16.939677983817727205631397, + 0.367653980523052152867791, + -0.0962401471767804440353136, + -0.1923169356184851105200523, + 0.35984072054267882391843766, + -0.2794987252299739821654982, + -0.7113490692587462579757954, + -0.2647831587821263302087457, +} +var yM3 = []float64{ + -0.14035984421094849100895341, + -0.097535139617792072703973, + 242.25775994555580176377379, + -0.1492267014802818619511046, + 0.26148702629155918694500469, + 0.56675383593895176530394248, + -0.206150264009006981070575, + 0.64784284687568332737963658, + 1.3503631555901938037008443, + 0.1461869756579956803341844, +} + +// arguments and expected results for special cases +var vfacosSC = []float64{ + -Pi, + 1, + Pi, + NaN(), +} +var acosSC = []float64{ + NaN(), + 0, + NaN(), + NaN(), +} + +var vfacoshSC = []float64{ + Inf(-1), + 0.5, + 1, + Inf(1), + NaN(), +} +var acoshSC = []float64{ + NaN(), + NaN(), + 0, + Inf(1), + NaN(), +} + +var vfasinSC = []float64{ + -Pi, + Copysign(0, -1), + 0, + Pi, + NaN(), +} +var asinSC = []float64{ + NaN(), + Copysign(0, -1), + 0, + NaN(), + NaN(), +} + +var vfasinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var asinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfatanSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var atanSC = []float64{ + -Pi / 2, + Copysign(0, -1), + 0, + Pi / 2, + NaN(), +} + +var vfatanhSC = []float64{ + Inf(-1), + -Pi, + -1, + Copysign(0, -1), + 0, + 1, + Pi, + Inf(1), + NaN(), +} +var atanhSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), + NaN(), + NaN(), +} +var vfatan2SC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), -Pi}, + {Inf(-1), 0}, + {Inf(-1), +Pi}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {-Pi, Inf(-1)}, + {-Pi, 0}, + {-Pi, Inf(1)}, + {-Pi, NaN()}, + {Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), -Pi}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), +Pi}, + {Copysign(0, -1), Inf(1)}, + {Copysign(0, -1), NaN()}, + {0, Inf(-1)}, + {0, -Pi}, + {0, Copysign(0, -1)}, + {0, 0}, + {0, +Pi}, + {0, Inf(1)}, + {0, NaN()}, + {+Pi, Inf(-1)}, + {+Pi, 0}, + {+Pi, Inf(1)}, + {+Pi, NaN()}, + {Inf(1), Inf(-1)}, + {Inf(1), -Pi}, + {Inf(1), 0}, + {Inf(1), +Pi}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), NaN()}, +} +var atan2SC = []float64{ + -3 * Pi / 4, // atan2(-Inf, -Inf) + -Pi / 2, // atan2(-Inf, -Pi) + -Pi / 2, // atan2(-Inf, +0) + -Pi / 2, // atan2(-Inf, +Pi) + -Pi / 4, // atan2(-Inf, +Inf) + NaN(), // atan2(-Inf, NaN) + -Pi, // atan2(-Pi, -Inf) + -Pi / 2, // atan2(-Pi, +0) + Copysign(0, -1), // atan2(-Pi, Inf) + NaN(), // atan2(-Pi, NaN) + -Pi, // atan2(-0, -Inf) + -Pi, // atan2(-0, -Pi) + -Pi, // atan2(-0, -0) + Copysign(0, -1), // atan2(-0, +0) + Copysign(0, -1), // atan2(-0, +Pi) + Copysign(0, -1), // atan2(-0, +Inf) + NaN(), // atan2(-0, NaN) + Pi, // atan2(+0, -Inf) + Pi, // atan2(+0, -Pi) + Pi, // atan2(+0, -0) + 0, // atan2(+0, +0) + 0, // atan2(+0, +Pi) + 0, // atan2(+0, +Inf) + NaN(), // atan2(+0, NaN) + Pi, // atan2(+Pi, -Inf) + Pi / 2, // atan2(+Pi, +0) + 0, // atan2(+Pi, +Inf) + NaN(), // atan2(+Pi, NaN) + 3 * Pi / 4, // atan2(+Inf, -Inf) + Pi / 2, // atan2(+Inf, -Pi) + Pi / 2, // atan2(+Inf, +0) + Pi / 2, // atan2(+Inf, +Pi) + Pi / 4, // atan2(+Inf, +Inf) + NaN(), // atan2(+Inf, NaN) + NaN(), // atan2(NaN, NaN) +} + +var vfcbrtSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var cbrtSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfceilSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var ceilSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfcopysignSC = []float64{ + Inf(-1), + Inf(1), + NaN(), +} +var copysignSC = []float64{ + Inf(-1), + Inf(-1), + NaN(), +} + +var vfcosSC = []float64{ + Inf(-1), + Inf(1), + NaN(), +} +var cosSC = []float64{ + NaN(), + NaN(), + NaN(), +} + +var vfcoshSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var coshSC = []float64{ + Inf(1), + 1, + 1, + Inf(1), + NaN(), +} + +var vferfSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var erfSC = []float64{ + -1, + Copysign(0, -1), + 0, + 1, + NaN(), +} + +var vferfcSC = []float64{ + Inf(-1), + Inf(1), + NaN(), +} +var erfcSC = []float64{ + 2, + 0, + NaN(), +} + +var vfexpSC = []float64{ + Inf(-1), + -2000, + 2000, + Inf(1), + NaN(), +} +var expSC = []float64{ + 0, + 0, + Inf(1), + Inf(1), + NaN(), +} + +var vfexpm1SC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var expm1SC = []float64{ + -1, + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vffabsSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var fabsSC = []float64{ + Inf(1), + 0, + 0, + Inf(1), + NaN(), +} + +var vffdimSC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {0, Copysign(0, -1)}, + {0, 0}, + {Inf(1), Inf(-1)}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), Inf(-1)}, + {NaN(), Copysign(0, -1)}, + {NaN(), 0}, + {NaN(), Inf(1)}, + {NaN(), NaN()}, +} +var fdimSC = []float64{ + NaN(), + 0, + NaN(), + 0, + 0, + 0, + 0, + Inf(1), + NaN(), + NaN(), + NaN(), + NaN(), + NaN(), + NaN(), + NaN(), +} +var fmaxSC = []float64{ + Inf(-1), + Inf(1), + NaN(), + Copysign(0, -1), + 0, + 0, + 0, + Inf(1), + Inf(1), + Inf(1), + NaN(), + NaN(), + NaN(), + Inf(1), + NaN(), +} +var fminSC = []float64{ + Inf(-1), + Inf(-1), + Inf(-1), + Copysign(0, -1), + Copysign(0, -1), + Copysign(0, -1), + 0, + Inf(-1), + Inf(1), + NaN(), + Inf(-1), + NaN(), + NaN(), + NaN(), + NaN(), +} + +var vffmodSC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), -Pi}, + {Inf(-1), 0}, + {Inf(-1), Pi}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {-Pi, Inf(-1)}, + {-Pi, 0}, + {-Pi, Inf(1)}, + {-Pi, NaN()}, + {Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), Inf(1)}, + {Copysign(0, -1), NaN()}, + {0, Inf(-1)}, + {0, 0}, + {0, Inf(1)}, + {0, NaN()}, + {Pi, Inf(-1)}, + {Pi, 0}, + {Pi, Inf(1)}, + {Pi, NaN()}, + {Inf(1), Inf(-1)}, + {Inf(1), -Pi}, + {Inf(1), 0}, + {Inf(1), Pi}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), Inf(-1)}, + {NaN(), -Pi}, + {NaN(), 0}, + {NaN(), Pi}, + {NaN(), Inf(1)}, + {NaN(), NaN()}, +} +var fmodSC = []float64{ + NaN(), // fmod(-Inf, -Inf) + NaN(), // fmod(-Inf, -Pi) + NaN(), // fmod(-Inf, 0) + NaN(), // fmod(-Inf, Pi) + NaN(), // fmod(-Inf, +Inf) + NaN(), // fmod(-Inf, NaN) + -Pi, // fmod(-Pi, -Inf) + NaN(), // fmod(-Pi, 0) + -Pi, // fmod(-Pi, +Inf) + NaN(), // fmod(-Pi, NaN) + Copysign(0, -1), // fmod(-0, -Inf) + NaN(), // fmod(-0, 0) + Copysign(0, -1), // fmod(-0, Inf) + NaN(), // fmod(-0, NaN) + 0, // fmod(0, -Inf) + NaN(), // fmod(0, 0) + 0, // fmod(0, +Inf) + NaN(), // fmod(0, NaN) + Pi, // fmod(Pi, -Inf) + NaN(), // fmod(Pi, 0) + Pi, // fmod(Pi, +Inf) + NaN(), // fmod(Pi, NaN) + NaN(), // fmod(+Inf, -Inf) + NaN(), // fmod(+Inf, -Pi) + NaN(), // fmod(+Inf, 0) + NaN(), // fmod(+Inf, Pi) + NaN(), // fmod(+Inf, +Inf) + NaN(), // fmod(+Inf, NaN) + NaN(), // fmod(NaN, -Inf) + NaN(), // fmod(NaN, -Pi) + NaN(), // fmod(NaN, 0) + NaN(), // fmod(NaN, Pi) + NaN(), // fmod(NaN, +Inf) + NaN(), // fmod(NaN, NaN) +} + +var vffrexpSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var frexpSC = []fi{ + {Inf(-1), 0}, + {Copysign(0, -1), 0}, + {0, 0}, + {Inf(1), 0}, + {NaN(), 0}, +} + +var vfgammaSC = []float64{ + Inf(-1), + -3, + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var gammaSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Inf(1), + Inf(1), + NaN(), +} + +var vfhypotSC = [][2]float64{ + {Inf(-1), Inf(-1)}, + {Inf(-1), 0}, + {Inf(-1), Inf(1)}, + {Inf(-1), NaN()}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), 0}, + {0, Copysign(0, -1)}, + {0, 0}, // +0, +0 + {0, Inf(-1)}, + {0, Inf(1)}, + {0, NaN()}, + {Inf(1), Inf(-1)}, + {Inf(1), 0}, + {Inf(1), Inf(1)}, + {Inf(1), NaN()}, + {NaN(), Inf(-1)}, + {NaN(), 0}, + {NaN(), Inf(1)}, + {NaN(), NaN()}, +} +var hypotSC = []float64{ + Inf(1), + Inf(1), + Inf(1), + Inf(1), + 0, + 0, + 0, + 0, + Inf(1), + Inf(1), + NaN(), + Inf(1), + Inf(1), + Inf(1), + Inf(1), + Inf(1), + NaN(), + Inf(1), + NaN(), +} + +var vfilogbSC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), +} +var ilogbSC = []int{ + MaxInt32, + MinInt32, + MaxInt32, + MaxInt32, +} + +var vfj0SC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), +} +var j0SC = []float64{ + 0, + 1, + 0, + NaN(), +} +var j1SC = []float64{ + 0, + 0, + 0, + NaN(), +} +var j2SC = []float64{ + 0, + 0, + 0, + NaN(), +} +var jM3SC = []float64{ + 0, + 0, + 0, + NaN(), +} + +var vfldexpSC = []fi{ + {0, 0}, + {0, -1075}, + {0, 1024}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), -1075}, + {Copysign(0, -1), 1024}, + {Inf(1), 0}, + {Inf(1), -1024}, + {Inf(-1), 0}, + {Inf(-1), -1024}, + {NaN(), -1024}, +} +var ldexpSC = []float64{ + 0, + 0, + 0, + Copysign(0, -1), + Copysign(0, -1), + Copysign(0, -1), + Inf(1), + Inf(1), + Inf(-1), + Inf(-1), + NaN(), +} + +var vflgammaSC = []float64{ + Inf(-1), + -3, + 0, + 1, + 2, + Inf(1), + NaN(), +} +var lgammaSC = []fi{ + {Inf(-1), 1}, + {Inf(1), 1}, + {Inf(1), 1}, + {0, 1}, + {0, 1}, + {Inf(1), 1}, + {NaN(), 1}, +} + +var vflogSC = []float64{ + Inf(-1), + -Pi, + Copysign(0, -1), + 0, + 1, + Inf(1), + NaN(), +} +var logSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Inf(-1), + 0, + Inf(1), + NaN(), +} + +var vflogbSC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), +} +var logbSC = []float64{ + Inf(1), + Inf(-1), + Inf(1), + NaN(), +} + +var vflog1pSC = []float64{ + Inf(-1), + -Pi, + -1, + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var log1pSC = []float64{ + NaN(), + NaN(), + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfmodfSC = []float64{ + Inf(-1), + Inf(1), + NaN(), +} +var modfSC = [][2]float64{ + {Inf(-1), NaN()}, // [2]float64{Copysign(0, -1), Inf(-1)}, + {Inf(1), NaN()}, // [2]float64{0, Inf(1)}, + {NaN(), NaN()}, +} + +var vfnextafter32SC = [][2]float32{ + {0, 0}, + {0, float32(Copysign(0, -1))}, + {0, -1}, + {0, float32(NaN())}, + {float32(Copysign(0, -1)), 1}, + {float32(Copysign(0, -1)), 0}, + {float32(Copysign(0, -1)), float32(Copysign(0, -1))}, + {float32(Copysign(0, -1)), -1}, + {float32(NaN()), 0}, + {float32(NaN()), float32(NaN())}, +} +var nextafter32SC = []float32{ + 0, + 0, + -1.401298464e-45, // Float32frombits(0x80000001) + float32(NaN()), + 1.401298464e-45, // Float32frombits(0x00000001) + float32(Copysign(0, -1)), + float32(Copysign(0, -1)), + -1.401298464e-45, // Float32frombits(0x80000001) + float32(NaN()), + float32(NaN()), +} + +var vfnextafter64SC = [][2]float64{ + {0, 0}, + {0, Copysign(0, -1)}, + {0, -1}, + {0, NaN()}, + {Copysign(0, -1), 1}, + {Copysign(0, -1), 0}, + {Copysign(0, -1), Copysign(0, -1)}, + {Copysign(0, -1), -1}, + {NaN(), 0}, + {NaN(), NaN()}, +} +var nextafter64SC = []float64{ + 0, + 0, + -4.9406564584124654418e-324, // Float64frombits(0x8000000000000001) + NaN(), + 4.9406564584124654418e-324, // Float64frombits(0x0000000000000001) + Copysign(0, -1), + Copysign(0, -1), + -4.9406564584124654418e-324, // Float64frombits(0x8000000000000001) + NaN(), + NaN(), +} + +var vfpowSC = [][2]float64{ + {Inf(-1), -Pi}, + {Inf(-1), -3}, + {Inf(-1), Copysign(0, -1)}, + {Inf(-1), 0}, + {Inf(-1), 1}, + {Inf(-1), 3}, + {Inf(-1), Pi}, + {Inf(-1), NaN()}, + + {-Pi, Inf(-1)}, + {-Pi, -Pi}, + {-Pi, Copysign(0, -1)}, + {-Pi, 0}, + {-Pi, 1}, + {-Pi, Pi}, + {-Pi, Inf(1)}, + {-Pi, NaN()}, + + {-1, Inf(-1)}, + {-1, Inf(1)}, + {-1, NaN()}, + {-1 / 2, Inf(-1)}, + {-1 / 2, Inf(1)}, + {Copysign(0, -1), Inf(-1)}, + {Copysign(0, -1), -Pi}, + {Copysign(0, -1), -3}, + {Copysign(0, -1), 3}, + {Copysign(0, -1), Pi}, + {Copysign(0, -1), Inf(1)}, + + {0, Inf(-1)}, + {0, -Pi}, + {0, -3}, + {0, Copysign(0, -1)}, + {0, 0}, + {0, 3}, + {0, Pi}, + {0, Inf(1)}, + {0, NaN()}, + + {1 / 2, Inf(-1)}, + {1 / 2, Inf(1)}, + {1, Inf(-1)}, + {1, Inf(1)}, + {1, NaN()}, + + {Pi, Inf(-1)}, + {Pi, Copysign(0, -1)}, + {Pi, 0}, + {Pi, 1}, + {Pi, Inf(1)}, + {Pi, NaN()}, + {Inf(1), -Pi}, + {Inf(1), Copysign(0, -1)}, + {Inf(1), 0}, + {Inf(1), 1}, + {Inf(1), Pi}, + {Inf(1), NaN()}, + {NaN(), -Pi}, + {NaN(), Copysign(0, -1)}, + {NaN(), 0}, + {NaN(), 1}, + {NaN(), Pi}, + {NaN(), NaN()}, +} +var powSC = []float64{ + 0, // pow(-Inf, -Pi) + Copysign(0, -1), // pow(-Inf, -3) + 1, // pow(-Inf, -0) + 1, // pow(-Inf, +0) + Inf(-1), // pow(-Inf, 1) + Inf(-1), // pow(-Inf, 3) + Inf(1), // pow(-Inf, Pi) + NaN(), // pow(-Inf, NaN) + 0, // pow(-Pi, -Inf) + NaN(), // pow(-Pi, -Pi) + 1, // pow(-Pi, -0) + 1, // pow(-Pi, +0) + -Pi, // pow(-Pi, 1) + NaN(), // pow(-Pi, Pi) + Inf(1), // pow(-Pi, +Inf) + NaN(), // pow(-Pi, NaN) + 1, // pow(-1, -Inf) IEEE 754-2008 + 1, // pow(-1, +Inf) IEEE 754-2008 + NaN(), // pow(-1, NaN) + Inf(1), // pow(-1/2, -Inf) + 0, // pow(-1/2, +Inf) + Inf(1), // pow(-0, -Inf) + Inf(1), // pow(-0, -Pi) + Inf(-1), // pow(-0, -3) IEEE 754-2008 + Copysign(0, -1), // pow(-0, 3) IEEE 754-2008 + 0, // pow(-0, +Pi) + 0, // pow(-0, +Inf) + Inf(1), // pow(+0, -Inf) + Inf(1), // pow(+0, -Pi) + Inf(1), // pow(+0, -3) + 1, // pow(+0, -0) + 1, // pow(+0, +0) + 0, // pow(+0, 3) + 0, // pow(+0, +Pi) + 0, // pow(+0, +Inf) + NaN(), // pow(+0, NaN) + Inf(1), // pow(1/2, -Inf) + 0, // pow(1/2, +Inf) + 1, // pow(1, -Inf) IEEE 754-2008 + 1, // pow(1, +Inf) IEEE 754-2008 + 1, // pow(1, NaN) IEEE 754-2008 + 0, // pow(+Pi, -Inf) + 1, // pow(+Pi, -0) + 1, // pow(+Pi, +0) + Pi, // pow(+Pi, 1) + Inf(1), // pow(+Pi, +Inf) + NaN(), // pow(+Pi, NaN) + 0, // pow(+Inf, -Pi) + 1, // pow(+Inf, -0) + 1, // pow(+Inf, +0) + Inf(1), // pow(+Inf, 1) + Inf(1), // pow(+Inf, Pi) + NaN(), // pow(+Inf, NaN) + NaN(), // pow(NaN, -Pi) + 1, // pow(NaN, -0) + 1, // pow(NaN, +0) + NaN(), // pow(NaN, 1) + NaN(), // pow(NaN, +Pi) + NaN(), // pow(NaN, NaN) +} + +var vfpow10SC = []int{ + MinInt32, + MaxInt32, + -325, + 309, +} + +var pow10SC = []float64{ + 0, // pow10(MinInt32) + Inf(1), // pow10(MaxInt32) + 0, // pow10(-325) + Inf(1), // pow10(309) +} + +var vfsignbitSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var signbitSC = []bool{ + true, + true, + false, + false, + false, +} + +var vfsinSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var sinSC = []float64{ + NaN(), + Copysign(0, -1), + 0, + NaN(), + NaN(), +} + +var vfsinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var sinhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vfsqrtSC = []float64{ + Inf(-1), + -Pi, + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var sqrtSC = []float64{ + NaN(), + NaN(), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} + +var vftanhSC = []float64{ + Inf(-1), + Copysign(0, -1), + 0, + Inf(1), + NaN(), +} +var tanhSC = []float64{ + -1, + Copysign(0, -1), + 0, + 1, + NaN(), +} + +var vfy0SC = []float64{ + Inf(-1), + 0, + Inf(1), + NaN(), +} +var y0SC = []float64{ + NaN(), + Inf(-1), + 0, + NaN(), +} +var y1SC = []float64{ + NaN(), + Inf(-1), + 0, + NaN(), +} +var y2SC = []float64{ + NaN(), + Inf(-1), + 0, + NaN(), +} +var yM3SC = []float64{ + NaN(), + Inf(1), + 0, + NaN(), +} + +// arguments and expected results for boundary cases +const ( + SmallestNormalFloat64 = 2.2250738585072014e-308 // 2**-1022 + LargestSubnormalFloat64 = SmallestNormalFloat64 - SmallestNonzeroFloat64 +) + +var vffrexpBC = []float64{ + SmallestNormalFloat64, + LargestSubnormalFloat64, + SmallestNonzeroFloat64, + MaxFloat64, + -SmallestNormalFloat64, + -LargestSubnormalFloat64, + -SmallestNonzeroFloat64, + -MaxFloat64, +} +var frexpBC = []fi{ + {0.5, -1021}, + {0.99999999999999978, -1022}, + {0.5, -1073}, + {0.99999999999999989, 1024}, + {-0.5, -1021}, + {-0.99999999999999978, -1022}, + {-0.5, -1073}, + {-0.99999999999999989, 1024}, +} + +var vfldexpBC = []fi{ + {SmallestNormalFloat64, -52}, + {LargestSubnormalFloat64, -51}, + {SmallestNonzeroFloat64, 1074}, + {MaxFloat64, -(1023 + 1074)}, + {1, -1075}, + {-1, -1075}, + {1, 1024}, + {-1, 1024}, +} +var ldexpBC = []float64{ + SmallestNonzeroFloat64, + 1e-323, // 2**-1073 + 1, + 1e-323, // 2**-1073 + 0, + Copysign(0, -1), + Inf(1), + Inf(-1), +} + +var logbBC = []float64{ + -1022, + -1023, + -1074, + 1023, + -1022, + -1023, + -1074, + 1023, +} + +func tolerance(a, b, e float64) bool { + d := a - b + if d < 0 { + d = -d + } + + if a != 0 { + e = e * a + if e < 0 { + e = -e + } + } + return d < e +} +func kindaclose(a, b float64) bool { return tolerance(a, b, 1e-8) } +func close(a, b float64) bool { return tolerance(a, b, 1e-14) } +func veryclose(a, b float64) bool { return tolerance(a, b, 4e-16) } +func soclose(a, b, e float64) bool { return tolerance(a, b, e) } +func alike(a, b float64) bool { + switch { + case IsNaN(a) && IsNaN(b): + return true + case a == b: + return Signbit(a) == Signbit(b) + } + return false +} + +func TestNaN(t *testing.T) { + f64 := NaN() + if f64 == f64 { + t.Fatalf("NaN() returns %g, expected NaN", f64) + } + f32 := float32(f64) + if f32 == f32 { + t.Fatalf("float32(NaN()) is %g, expected NaN", f32) + } +} + +func TestAcos(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Acos(a); !close(acos[i], f) { + t.Errorf("Acos(%g) = %g, want %g", a, f, acos[i]) + } + } + for i := 0; i < len(vfacosSC); i++ { + if f := Acos(vfacosSC[i]); !alike(acosSC[i], f) { + t.Errorf("Acos(%g) = %g, want %g", vfacosSC[i], f, acosSC[i]) + } + } +} + +func TestAcosh(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := 1 + Abs(vf[i]) + if f := Acosh(a); !veryclose(acosh[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", a, f, acosh[i]) + } + } + for i := 0; i < len(vfacoshSC); i++ { + if f := Acosh(vfacoshSC[i]); !alike(acoshSC[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", vfacoshSC[i], f, acoshSC[i]) + } + } +} + +func TestAsin(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Asin(a); !veryclose(asin[i], f) { + t.Errorf("Asin(%g) = %g, want %g", a, f, asin[i]) + } + } + for i := 0; i < len(vfasinSC); i++ { + if f := Asin(vfasinSC[i]); !alike(asinSC[i], f) { + t.Errorf("Asin(%g) = %g, want %g", vfasinSC[i], f, asinSC[i]) + } + } +} + +func TestAsinh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Asinh(vf[i]); !veryclose(asinh[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vf[i], f, asinh[i]) + } + } + for i := 0; i < len(vfasinhSC); i++ { + if f := Asinh(vfasinhSC[i]); !alike(asinhSC[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vfasinhSC[i], f, asinhSC[i]) + } + } +} + +func TestAtan(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Atan(vf[i]); !veryclose(atan[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vf[i], f, atan[i]) + } + } + for i := 0; i < len(vfatanSC); i++ { + if f := Atan(vfatanSC[i]); !alike(atanSC[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vfatanSC[i], f, atanSC[i]) + } + } +} + +func TestAtanh(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Atanh(a); !veryclose(atanh[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", a, f, atanh[i]) + } + } + for i := 0; i < len(vfatanhSC); i++ { + if f := Atanh(vfatanhSC[i]); !alike(atanhSC[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", vfatanhSC[i], f, atanhSC[i]) + } + } +} + +func TestAtan2(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Atan2(10, vf[i]); !veryclose(atan2[i], f) { + t.Errorf("Atan2(10, %g) = %g, want %g", vf[i], f, atan2[i]) + } + } + for i := 0; i < len(vfatan2SC); i++ { + if f := Atan2(vfatan2SC[i][0], vfatan2SC[i][1]); !alike(atan2SC[i], f) { + t.Errorf("Atan2(%g, %g) = %g, want %g", vfatan2SC[i][0], vfatan2SC[i][1], f, atan2SC[i]) + } + } +} + +func TestCbrt(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Cbrt(vf[i]); !veryclose(cbrt[i], f) { + t.Errorf("Cbrt(%g) = %g, want %g", vf[i], f, cbrt[i]) + } + } + for i := 0; i < len(vfcbrtSC); i++ { + if f := Cbrt(vfcbrtSC[i]); !alike(cbrtSC[i], f) { + t.Errorf("Cbrt(%g) = %g, want %g", vfcbrtSC[i], f, cbrtSC[i]) + } + } +} + +func TestCeil(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Ceil(vf[i]); ceil[i] != f { + t.Errorf("Ceil(%g) = %g, want %g", vf[i], f, ceil[i]) + } + } + for i := 0; i < len(vfceilSC); i++ { + if f := Ceil(vfceilSC[i]); !alike(ceilSC[i], f) { + t.Errorf("Ceil(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i]) + } + } +} + +func TestCopysign(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Copysign(vf[i], -1); copysign[i] != f { + t.Errorf("Copysign(%g, -1) = %g, want %g", vf[i], f, copysign[i]) + } + } + for i := 0; i < len(vf); i++ { + if f := Copysign(vf[i], 1); -copysign[i] != f { + t.Errorf("Copysign(%g, 1) = %g, want %g", vf[i], f, -copysign[i]) + } + } + for i := 0; i < len(vfcopysignSC); i++ { + if f := Copysign(vfcopysignSC[i], -1); !alike(copysignSC[i], f) { + t.Errorf("Copysign(%g, -1) = %g, want %g", vfcopysignSC[i], f, copysignSC[i]) + } + } +} + +func TestCos(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Cos(vf[i]); !veryclose(cos[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vf[i], f, cos[i]) + } + } + for i := 0; i < len(vfcosSC); i++ { + if f := Cos(vfcosSC[i]); !alike(cosSC[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vfcosSC[i], f, cosSC[i]) + } + } +} + +func TestCosh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Cosh(vf[i]); !close(cosh[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vf[i], f, cosh[i]) + } + } + for i := 0; i < len(vfcoshSC); i++ { + if f := Cosh(vfcoshSC[i]); !alike(coshSC[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vfcoshSC[i], f, coshSC[i]) + } + } +} + +func TestErf(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Erf(a); !veryclose(erf[i], f) { + t.Errorf("Erf(%g) = %g, want %g", a, f, erf[i]) + } + } + for i := 0; i < len(vferfSC); i++ { + if f := Erf(vferfSC[i]); !alike(erfSC[i], f) { + t.Errorf("Erf(%g) = %g, want %g", vferfSC[i], f, erfSC[i]) + } + } +} + +func TestErfc(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 10 + if f := Erfc(a); !veryclose(erfc[i], f) { + t.Errorf("Erfc(%g) = %g, want %g", a, f, erfc[i]) + } + } + for i := 0; i < len(vferfcSC); i++ { + if f := Erfc(vferfcSC[i]); !alike(erfcSC[i], f) { + t.Errorf("Erfc(%g) = %g, want %g", vferfcSC[i], f, erfcSC[i]) + } + } +} + +func TestExp(t *testing.T) { + testExp(t, Exp, "Exp") + testExp(t, ExpGo, "ExpGo") +} + +func testExp(t *testing.T, Exp func(float64) float64, name string) { + for i := 0; i < len(vf); i++ { + if f := Exp(vf[i]); !close(exp[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp[i]) + } + } + for i := 0; i < len(vfexpSC); i++ { + if f := Exp(vfexpSC[i]); !alike(expSC[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i]) + } + } +} + +func TestExpm1(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 100 + if f := Expm1(a); !veryclose(expm1[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", a, f, expm1[i]) + } + } + for i := 0; i < len(vfexpm1SC); i++ { + if f := Expm1(vfexpm1SC[i]); !alike(expm1SC[i], f) { + t.Errorf("Expm1(%g) = %g, want %g", vfexpm1SC[i], f, expm1SC[i]) + } + } +} + +func TestExp2(t *testing.T) { + testExp2(t, Exp2, "Exp2") + testExp2(t, Exp2Go, "Exp2Go") +} + +func testExp2(t *testing.T, Exp2 func(float64) float64, name string) { + for i := 0; i < len(vf); i++ { + if f := Exp2(vf[i]); !close(exp2[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vf[i], f, exp2[i]) + } + } + for i := 0; i < len(vfexpSC); i++ { + if f := Exp2(vfexpSC[i]); !alike(expSC[i], f) { + t.Errorf("%s(%g) = %g, want %g", name, vfexpSC[i], f, expSC[i]) + } + } + for n := -1074; n < 1024; n++ { + f := Exp2(float64(n)) + vf := Ldexp(1, n) + if f != vf { + t.Errorf("%s(%d) = %g, want %g", name, n, f, vf) + } + } +} + +func TestAbs(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Abs(vf[i]); fabs[i] != f { + t.Errorf("Abs(%g) = %g, want %g", vf[i], f, fabs[i]) + } + } + for i := 0; i < len(vffabsSC); i++ { + if f := Abs(vffabsSC[i]); !alike(fabsSC[i], f) { + t.Errorf("Abs(%g) = %g, want %g", vffabsSC[i], f, fabsSC[i]) + } + } +} + +func TestDim(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Dim(vf[i], 0); fdim[i] != f { + t.Errorf("Dim(%g, %g) = %g, want %g", vf[i], 0.0, f, fdim[i]) + } + } + for i := 0; i < len(vffdimSC); i++ { + if f := Dim(vffdimSC[i][0], vffdimSC[i][1]); !alike(fdimSC[i], f) { + t.Errorf("Dim(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fdimSC[i]) + } + } +} + +func TestFloor(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Floor(vf[i]); floor[i] != f { + t.Errorf("Floor(%g) = %g, want %g", vf[i], f, floor[i]) + } + } + for i := 0; i < len(vfceilSC); i++ { + if f := Floor(vfceilSC[i]); !alike(ceilSC[i], f) { + t.Errorf("Floor(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i]) + } + } +} + +func TestMax(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Max(vf[i], ceil[i]); ceil[i] != f { + t.Errorf("Max(%g, %g) = %g, want %g", vf[i], ceil[i], f, ceil[i]) + } + } + for i := 0; i < len(vffdimSC); i++ { + if f := Max(vffdimSC[i][0], vffdimSC[i][1]); !alike(fmaxSC[i], f) { + t.Errorf("Max(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fmaxSC[i]) + } + } +} + +func TestMin(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Min(vf[i], floor[i]); floor[i] != f { + t.Errorf("Min(%g, %g) = %g, want %g", vf[i], floor[i], f, floor[i]) + } + } + for i := 0; i < len(vffdimSC); i++ { + if f := Min(vffdimSC[i][0], vffdimSC[i][1]); !alike(fminSC[i], f) { + t.Errorf("Min(%g, %g) = %g, want %g", vffdimSC[i][0], vffdimSC[i][1], f, fminSC[i]) + } + } +} + +func TestMod(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Mod(10, vf[i]); fmod[i] != f { + t.Errorf("Mod(10, %g) = %g, want %g", vf[i], f, fmod[i]) + } + } + for i := 0; i < len(vffmodSC); i++ { + if f := Mod(vffmodSC[i][0], vffmodSC[i][1]); !alike(fmodSC[i], f) { + t.Errorf("Mod(%g, %g) = %g, want %g", vffmodSC[i][0], vffmodSC[i][1], f, fmodSC[i]) + } + } +} + +func TestFrexp(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f, j := Frexp(vf[i]); !veryclose(frexp[i].f, f) || frexp[i].i != j { + t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vf[i], f, j, frexp[i].f, frexp[i].i) + } + } + for i := 0; i < len(vffrexpSC); i++ { + if f, j := Frexp(vffrexpSC[i]); !alike(frexpSC[i].f, f) || frexpSC[i].i != j { + t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpSC[i], f, j, frexpSC[i].f, frexpSC[i].i) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if f, j := Frexp(vffrexpBC[i]); !alike(frexpBC[i].f, f) || frexpBC[i].i != j { + t.Errorf("Frexp(%g) = %g, %d, want %g, %d", vffrexpBC[i], f, j, frexpBC[i].f, frexpBC[i].i) + } + } +} + +func TestGamma(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Gamma(vf[i]); !close(gamma[i], f) { + t.Errorf("Gamma(%g) = %g, want %g", vf[i], f, gamma[i]) + } + } + for i := 0; i < len(vfgammaSC); i++ { + if f := Gamma(vfgammaSC[i]); !alike(gammaSC[i], f) { + t.Errorf("Gamma(%g) = %g, want %g", vfgammaSC[i], f, gammaSC[i]) + } + } +} + +func TestHypot(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(1e200 * tanh[i] * Sqrt(2)) + if f := Hypot(1e200*tanh[i], 1e200*tanh[i]); !veryclose(a, f) { + t.Errorf("Hypot(%g, %g) = %g, want %g", 1e200*tanh[i], 1e200*tanh[i], f, a) + } + } + for i := 0; i < len(vfhypotSC); i++ { + if f := Hypot(vfhypotSC[i][0], vfhypotSC[i][1]); !alike(hypotSC[i], f) { + t.Errorf("Hypot(%g, %g) = %g, want %g", vfhypotSC[i][0], vfhypotSC[i][1], f, hypotSC[i]) + } + } +} + +func TestHypotGo(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(1e200 * tanh[i] * Sqrt(2)) + if f := HypotGo(1e200*tanh[i], 1e200*tanh[i]); !veryclose(a, f) { + t.Errorf("HypotGo(%g, %g) = %g, want %g", 1e200*tanh[i], 1e200*tanh[i], f, a) + } + } + for i := 0; i < len(vfhypotSC); i++ { + if f := HypotGo(vfhypotSC[i][0], vfhypotSC[i][1]); !alike(hypotSC[i], f) { + t.Errorf("HypotGo(%g, %g) = %g, want %g", vfhypotSC[i][0], vfhypotSC[i][1], f, hypotSC[i]) + } + } +} + +func TestIlogb(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := frexp[i].i - 1 // adjust because fr in the interval [½, 1) + if e := Ilogb(vf[i]); a != e { + t.Errorf("Ilogb(%g) = %d, want %d", vf[i], e, a) + } + } + for i := 0; i < len(vflogbSC); i++ { + if e := Ilogb(vflogbSC[i]); ilogbSC[i] != e { + t.Errorf("Ilogb(%g) = %d, want %d", vflogbSC[i], e, ilogbSC[i]) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if e := Ilogb(vffrexpBC[i]); int(logbBC[i]) != e { + t.Errorf("Ilogb(%g) = %d, want %d", vffrexpBC[i], e, int(logbBC[i])) + } + } +} + +func TestJ0(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := J0(vf[i]); !soclose(j0[i], f, 4e-14) { + t.Errorf("J0(%g) = %g, want %g", vf[i], f, j0[i]) + } + } + for i := 0; i < len(vfj0SC); i++ { + if f := J0(vfj0SC[i]); !alike(j0SC[i], f) { + t.Errorf("J0(%g) = %g, want %g", vfj0SC[i], f, j0SC[i]) + } + } +} + +func TestJ1(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := J1(vf[i]); !close(j1[i], f) { + t.Errorf("J1(%g) = %g, want %g", vf[i], f, j1[i]) + } + } + for i := 0; i < len(vfj0SC); i++ { + if f := J1(vfj0SC[i]); !alike(j1SC[i], f) { + t.Errorf("J1(%g) = %g, want %g", vfj0SC[i], f, j1SC[i]) + } + } +} + +func TestJn(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Jn(2, vf[i]); !close(j2[i], f) { + t.Errorf("Jn(2, %g) = %g, want %g", vf[i], f, j2[i]) + } + if f := Jn(-3, vf[i]); !close(jM3[i], f) { + t.Errorf("Jn(-3, %g) = %g, want %g", vf[i], f, jM3[i]) + } + } + for i := 0; i < len(vfj0SC); i++ { + if f := Jn(2, vfj0SC[i]); !alike(j2SC[i], f) { + t.Errorf("Jn(2, %g) = %g, want %g", vfj0SC[i], f, j2SC[i]) + } + if f := Jn(-3, vfj0SC[i]); !alike(jM3SC[i], f) { + t.Errorf("Jn(-3, %g) = %g, want %g", vfj0SC[i], f, jM3SC[i]) + } + } +} + +func TestLdexp(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Ldexp(frexp[i].f, frexp[i].i); !veryclose(vf[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", frexp[i].f, frexp[i].i, f, vf[i]) + } + } + for i := 0; i < len(vffrexpSC); i++ { + if f := Ldexp(frexpSC[i].f, frexpSC[i].i); !alike(vffrexpSC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpSC[i].f, frexpSC[i].i, f, vffrexpSC[i]) + } + } + for i := 0; i < len(vfldexpSC); i++ { + if f := Ldexp(vfldexpSC[i].f, vfldexpSC[i].i); !alike(ldexpSC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpSC[i].f, vfldexpSC[i].i, f, ldexpSC[i]) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if f := Ldexp(frexpBC[i].f, frexpBC[i].i); !alike(vffrexpBC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", frexpBC[i].f, frexpBC[i].i, f, vffrexpBC[i]) + } + } + for i := 0; i < len(vfldexpBC); i++ { + if f := Ldexp(vfldexpBC[i].f, vfldexpBC[i].i); !alike(ldexpBC[i], f) { + t.Errorf("Ldexp(%g, %d) = %g, want %g", vfldexpBC[i].f, vfldexpBC[i].i, f, ldexpBC[i]) + } + } +} + +func TestLgamma(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f, s := Lgamma(vf[i]); !close(lgamma[i].f, f) || lgamma[i].i != s { + t.Errorf("Lgamma(%g) = %g, %d, want %g, %d", vf[i], f, s, lgamma[i].f, lgamma[i].i) + } + } + for i := 0; i < len(vflgammaSC); i++ { + if f, s := Lgamma(vflgammaSC[i]); !alike(lgammaSC[i].f, f) || lgammaSC[i].i != s { + t.Errorf("Lgamma(%g) = %g, %d, want %g, %d", vflgammaSC[i], f, s, lgammaSC[i].f, lgammaSC[i].i) + } + } +} + +func TestLog(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log(a); log[i] != f { + t.Errorf("Log(%g) = %g, want %g", a, f, log[i]) + } + } + if f := Log(10); f != Ln10 { + t.Errorf("Log(%g) = %g, want %g", 10.0, f, Ln10) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} + +func TestLogb(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Logb(vf[i]); logb[i] != f { + t.Errorf("Logb(%g) = %g, want %g", vf[i], f, logb[i]) + } + } + for i := 0; i < len(vflogbSC); i++ { + if f := Logb(vflogbSC[i]); !alike(logbSC[i], f) { + t.Errorf("Logb(%g) = %g, want %g", vflogbSC[i], f, logbSC[i]) + } + } + for i := 0; i < len(vffrexpBC); i++ { + if f := Logb(vffrexpBC[i]); !alike(logbBC[i], f) { + t.Errorf("Logb(%g) = %g, want %g", vffrexpBC[i], f, logbBC[i]) + } + } +} + +func TestLog10(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log10(a); !veryclose(log10[i], f) { + t.Errorf("Log10(%g) = %g, want %g", a, f, log10[i]) + } + } + if f := Log10(E); f != Log10E { + t.Errorf("Log10(%g) = %g, want %g", E, f, Log10E) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log10(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } +} + +func TestLog1p(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := vf[i] / 100 + if f := Log1p(a); !veryclose(log1p[i], f) { + t.Errorf("Log1p(%g) = %g, want %g", a, f, log1p[i]) + } + } + a := 9.0 + if f := Log1p(a); f != Ln10 { + t.Errorf("Log1p(%g) = %g, want %g", a, f, Ln10) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log1p(vflog1pSC[i]); !alike(log1pSC[i], f) { + t.Errorf("Log1p(%g) = %g, want %g", vflog1pSC[i], f, log1pSC[i]) + } + } +} + +func TestLog2(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Log2(a); !veryclose(log2[i], f) { + t.Errorf("Log2(%g) = %g, want %g", a, f, log2[i]) + } + } + if f := Log2(E); f != Log2E { + t.Errorf("Log2(%g) = %g, want %g", E, f, Log2E) + } + for i := 0; i < len(vflogSC); i++ { + if f := Log2(vflogSC[i]); !alike(logSC[i], f) { + t.Errorf("Log2(%g) = %g, want %g", vflogSC[i], f, logSC[i]) + } + } + for i := -1074; i <= 1023; i++ { + f := Ldexp(1, i) + l := Log2(f) + if l != float64(i) { + t.Errorf("Log2(2**%d) = %g, want %d", i, l, i) + } + } +} + +func TestModf(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f, g := Modf(vf[i]); !veryclose(modf[i][0], f) || !veryclose(modf[i][1], g) { + t.Errorf("Modf(%g) = %g, %g, want %g, %g", vf[i], f, g, modf[i][0], modf[i][1]) + } + } + for i := 0; i < len(vfmodfSC); i++ { + if f, g := Modf(vfmodfSC[i]); !alike(modfSC[i][0], f) || !alike(modfSC[i][1], g) { + t.Errorf("Modf(%g) = %g, %g, want %g, %g", vfmodfSC[i], f, g, modfSC[i][0], modfSC[i][1]) + } + } +} + +func TestNextafter32(t *testing.T) { + for i := 0; i < len(vf); i++ { + vfi := float32(vf[i]) + if f := Nextafter32(vfi, 10); nextafter32[i] != f { + t.Errorf("Nextafter32(%g, %g) = %g want %g", vfi, 10.0, f, nextafter32[i]) + } + } + for i := 0; i < len(vfnextafter32SC); i++ { + if f := Nextafter32(vfnextafter32SC[i][0], vfnextafter32SC[i][1]); !alike(float64(nextafter32SC[i]), float64(f)) { + t.Errorf("Nextafter32(%g, %g) = %g want %g", vfnextafter32SC[i][0], vfnextafter32SC[i][1], f, nextafter32SC[i]) + } + } +} + +func TestNextafter64(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Nextafter(vf[i], 10); nextafter64[i] != f { + t.Errorf("Nextafter64(%g, %g) = %g want %g", vf[i], 10.0, f, nextafter64[i]) + } + } + for i := 0; i < len(vfnextafter64SC); i++ { + if f := Nextafter(vfnextafter64SC[i][0], vfnextafter64SC[i][1]); !alike(nextafter64SC[i], f) { + t.Errorf("Nextafter64(%g, %g) = %g want %g", vfnextafter64SC[i][0], vfnextafter64SC[i][1], f, nextafter64SC[i]) + } + } +} + +func TestPow(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Pow(10, vf[i]); !close(pow[i], f) { + t.Errorf("Pow(10, %g) = %g, want %g", vf[i], f, pow[i]) + } + } + for i := 0; i < len(vfpowSC); i++ { + if f := Pow(vfpowSC[i][0], vfpowSC[i][1]); !alike(powSC[i], f) { + t.Errorf("Pow(%g, %g) = %g, want %g", vfpowSC[i][0], vfpowSC[i][1], f, powSC[i]) + } + } +} + +func TestPow10(t *testing.T) { + for i := 0; i < len(vfpow10SC); i++ { + if f := Pow10(vfpow10SC[i]); !alike(pow10SC[i], f) { + t.Errorf("Pow10(%d) = %g, want %g", vfpow10SC[i], f, pow10SC[i]) + } + } +} + +func TestRemainder(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Remainder(10, vf[i]); remainder[i] != f { + t.Errorf("Remainder(10, %g) = %g, want %g", vf[i], f, remainder[i]) + } + } + for i := 0; i < len(vffmodSC); i++ { + if f := Remainder(vffmodSC[i][0], vffmodSC[i][1]); !alike(fmodSC[i], f) { + t.Errorf("Remainder(%g, %g) = %g, want %g", vffmodSC[i][0], vffmodSC[i][1], f, fmodSC[i]) + } + } +} + +func TestSignbit(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Signbit(vf[i]); signbit[i] != f { + t.Errorf("Signbit(%g) = %t, want %t", vf[i], f, signbit[i]) + } + } + for i := 0; i < len(vfsignbitSC); i++ { + if f := Signbit(vfsignbitSC[i]); signbitSC[i] != f { + t.Errorf("Signbit(%g) = %t, want %t", vfsignbitSC[i], f, signbitSC[i]) + } + } +} +func TestSin(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Sin(vf[i]); !veryclose(sin[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vf[i], f, sin[i]) + } + } + for i := 0; i < len(vfsinSC); i++ { + if f := Sin(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestSincos(t *testing.T) { + for i := 0; i < len(vf); i++ { + if s, c := Sincos(vf[i]); !veryclose(sin[i], s) || !veryclose(cos[i], c) { + t.Errorf("Sincos(%g) = %g, %g want %g, %g", vf[i], s, c, sin[i], cos[i]) + } + } +} + +func TestSinh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Sinh(vf[i]); !close(sinh[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vf[i], f, sinh[i]) + } + } + for i := 0; i < len(vfsinhSC); i++ { + if f := Sinh(vfsinhSC[i]); !alike(sinhSC[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vfsinhSC[i], f, sinhSC[i]) + } + } +} + +func TestSqrt(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := SqrtGo(a); sqrt[i] != f { + t.Errorf("SqrtGo(%g) = %g, want %g", a, f, sqrt[i]) + } + a = Abs(vf[i]) + if f := Sqrt(a); sqrt[i] != f { + t.Errorf("Sqrt(%g) = %g, want %g", a, f, sqrt[i]) + } + } + for i := 0; i < len(vfsqrtSC); i++ { + if f := SqrtGo(vfsqrtSC[i]); !alike(sqrtSC[i], f) { + t.Errorf("SqrtGo(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i]) + } + if f := Sqrt(vfsqrtSC[i]); !alike(sqrtSC[i], f) { + t.Errorf("Sqrt(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i]) + } + } +} + +func TestTan(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Tan(vf[i]); !veryclose(tan[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vf[i], f, tan[i]) + } + } + // same special cases as Sin + for i := 0; i < len(vfsinSC); i++ { + if f := Tan(vfsinSC[i]); !alike(sinSC[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vfsinSC[i], f, sinSC[i]) + } + } +} + +func TestTanh(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Tanh(vf[i]); !veryclose(tanh[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vf[i], f, tanh[i]) + } + } + for i := 0; i < len(vftanhSC); i++ { + if f := Tanh(vftanhSC[i]); !alike(tanhSC[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vftanhSC[i], f, tanhSC[i]) + } + } +} + +func TestTrunc(t *testing.T) { + for i := 0; i < len(vf); i++ { + if f := Trunc(vf[i]); trunc[i] != f { + t.Errorf("Trunc(%g) = %g, want %g", vf[i], f, trunc[i]) + } + } + for i := 0; i < len(vfceilSC); i++ { + if f := Trunc(vfceilSC[i]); !alike(ceilSC[i], f) { + t.Errorf("Trunc(%g) = %g, want %g", vfceilSC[i], f, ceilSC[i]) + } + } +} + +func TestY0(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Y0(a); !close(y0[i], f) { + t.Errorf("Y0(%g) = %g, want %g", a, f, y0[i]) + } + } + for i := 0; i < len(vfy0SC); i++ { + if f := Y0(vfy0SC[i]); !alike(y0SC[i], f) { + t.Errorf("Y0(%g) = %g, want %g", vfy0SC[i], f, y0SC[i]) + } + } +} + +func TestY1(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Y1(a); !soclose(y1[i], f, 2e-14) { + t.Errorf("Y1(%g) = %g, want %g", a, f, y1[i]) + } + } + for i := 0; i < len(vfy0SC); i++ { + if f := Y1(vfy0SC[i]); !alike(y1SC[i], f) { + t.Errorf("Y1(%g) = %g, want %g", vfy0SC[i], f, y1SC[i]) + } + } +} + +func TestYn(t *testing.T) { + for i := 0; i < len(vf); i++ { + a := Abs(vf[i]) + if f := Yn(2, a); !close(y2[i], f) { + t.Errorf("Yn(2, %g) = %g, want %g", a, f, y2[i]) + } + if f := Yn(-3, a); !close(yM3[i], f) { + t.Errorf("Yn(-3, %g) = %g, want %g", a, f, yM3[i]) + } + } + for i := 0; i < len(vfy0SC); i++ { + if f := Yn(2, vfy0SC[i]); !alike(y2SC[i], f) { + t.Errorf("Yn(2, %g) = %g, want %g", vfy0SC[i], f, y2SC[i]) + } + if f := Yn(-3, vfy0SC[i]); !alike(yM3SC[i], f) { + t.Errorf("Yn(-3, %g) = %g, want %g", vfy0SC[i], f, yM3SC[i]) + } + } +} + +// Check that math functions of high angle values +// return accurate results. [Since (vf[i] + large) - large != vf[i], +// testing for Trig(vf[i] + large) == Trig(vf[i]), where large is +// a multiple of 2*Pi, is misleading.] +func TestLargeCos(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := cosLarge[i] + f2 := Cos(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Cos(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeSin(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := sinLarge[i] + f2 := Sin(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Sin(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +func TestLargeSincos(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1, g1 := sinLarge[i], cosLarge[i] + f2, g2 := Sincos(vf[i] + large) + if !close(f1, f2) || !close(g1, g2) { + t.Errorf("Sincos(%g) = %g, %g, want %g, %g", vf[i]+large, f2, g2, f1, g1) + } + } +} + +func TestLargeTan(t *testing.T) { + large := float64(100000 * Pi) + for i := 0; i < len(vf); i++ { + f1 := tanLarge[i] + f2 := Tan(vf[i] + large) + if !close(f1, f2) { + t.Errorf("Tan(%g) = %g, want %g", vf[i]+large, f2, f1) + } + } +} + +// Check that math constants are accepted by compiler +// and have right value (assumes strconv.ParseFloat works). +// http://code.google.com/p/go/issues/detail?id=201 + +type floatTest struct { + val interface{} + name string + str string +} + +var floatTests = []floatTest{ + {float64(MaxFloat64), "MaxFloat64", "1.7976931348623157e+308"}, + {float64(SmallestNonzeroFloat64), "SmallestNonzeroFloat64", "5e-324"}, + {float32(MaxFloat32), "MaxFloat32", "3.4028235e+38"}, + {float32(SmallestNonzeroFloat32), "SmallestNonzeroFloat32", "1e-45"}, +} + +func TestFloatMinMax(t *testing.T) { + for _, tt := range floatTests { + s := fmt.Sprint(tt.val) + if s != tt.str { + t.Errorf("Sprint(%v) = %s, want %s", tt.name, s, tt.str) + } + } +} + +// Benchmarks + +func BenchmarkAcos(b *testing.B) { + for i := 0; i < b.N; i++ { + Acos(.5) + } +} + +func BenchmarkAcosh(b *testing.B) { + for i := 0; i < b.N; i++ { + Acosh(1.5) + } +} + +func BenchmarkAsin(b *testing.B) { + for i := 0; i < b.N; i++ { + Asin(.5) + } +} + +func BenchmarkAsinh(b *testing.B) { + for i := 0; i < b.N; i++ { + Asinh(.5) + } +} + +func BenchmarkAtan(b *testing.B) { + for i := 0; i < b.N; i++ { + Atan(.5) + } +} + +func BenchmarkAtanh(b *testing.B) { + for i := 0; i < b.N; i++ { + Atanh(.5) + } +} + +func BenchmarkAtan2(b *testing.B) { + for i := 0; i < b.N; i++ { + Atan2(.5, 1) + } +} + +func BenchmarkCbrt(b *testing.B) { + for i := 0; i < b.N; i++ { + Cbrt(10) + } +} + +func BenchmarkCeil(b *testing.B) { + for i := 0; i < b.N; i++ { + Ceil(.5) + } +} + +func BenchmarkCopysign(b *testing.B) { + for i := 0; i < b.N; i++ { + Copysign(.5, -1) + } +} + +func BenchmarkCos(b *testing.B) { + for i := 0; i < b.N; i++ { + Cos(.5) + } +} + +func BenchmarkCosh(b *testing.B) { + for i := 0; i < b.N; i++ { + Cosh(2.5) + } +} + +func BenchmarkErf(b *testing.B) { + for i := 0; i < b.N; i++ { + Erf(.5) + } +} + +func BenchmarkErfc(b *testing.B) { + for i := 0; i < b.N; i++ { + Erfc(.5) + } +} + +func BenchmarkExp(b *testing.B) { + for i := 0; i < b.N; i++ { + Exp(.5) + } +} + +func BenchmarkExpGo(b *testing.B) { + for i := 0; i < b.N; i++ { + ExpGo(.5) + } +} + +func BenchmarkExpm1(b *testing.B) { + for i := 0; i < b.N; i++ { + Expm1(.5) + } +} + +func BenchmarkExp2(b *testing.B) { + for i := 0; i < b.N; i++ { + Exp2(.5) + } +} + +func BenchmarkExp2Go(b *testing.B) { + for i := 0; i < b.N; i++ { + Exp2Go(.5) + } +} + +func BenchmarkAbs(b *testing.B) { + for i := 0; i < b.N; i++ { + Abs(.5) + } +} + +func BenchmarkDim(b *testing.B) { + for i := 0; i < b.N; i++ { + Dim(10, 3) + } +} + +func BenchmarkFloor(b *testing.B) { + for i := 0; i < b.N; i++ { + Floor(.5) + } +} + +func BenchmarkMax(b *testing.B) { + for i := 0; i < b.N; i++ { + Max(10, 3) + } +} + +func BenchmarkMin(b *testing.B) { + for i := 0; i < b.N; i++ { + Min(10, 3) + } +} + +func BenchmarkMod(b *testing.B) { + for i := 0; i < b.N; i++ { + Mod(10, 3) + } +} + +func BenchmarkFrexp(b *testing.B) { + for i := 0; i < b.N; i++ { + Frexp(8) + } +} + +func BenchmarkGamma(b *testing.B) { + for i := 0; i < b.N; i++ { + Gamma(2.5) + } +} + +func BenchmarkHypot(b *testing.B) { + for i := 0; i < b.N; i++ { + Hypot(3, 4) + } +} + +func BenchmarkHypotGo(b *testing.B) { + for i := 0; i < b.N; i++ { + HypotGo(3, 4) + } +} + +func BenchmarkIlogb(b *testing.B) { + for i := 0; i < b.N; i++ { + Ilogb(.5) + } +} + +func BenchmarkJ0(b *testing.B) { + for i := 0; i < b.N; i++ { + J0(2.5) + } +} + +func BenchmarkJ1(b *testing.B) { + for i := 0; i < b.N; i++ { + J1(2.5) + } +} + +func BenchmarkJn(b *testing.B) { + for i := 0; i < b.N; i++ { + Jn(2, 2.5) + } +} + +func BenchmarkLdexp(b *testing.B) { + for i := 0; i < b.N; i++ { + Ldexp(.5, 2) + } +} + +func BenchmarkLgamma(b *testing.B) { + for i := 0; i < b.N; i++ { + Lgamma(2.5) + } +} + +func BenchmarkLog(b *testing.B) { + for i := 0; i < b.N; i++ { + Log(.5) + } +} + +func BenchmarkLogb(b *testing.B) { + for i := 0; i < b.N; i++ { + Logb(.5) + } +} + +func BenchmarkLog1p(b *testing.B) { + for i := 0; i < b.N; i++ { + Log1p(.5) + } +} + +func BenchmarkLog10(b *testing.B) { + for i := 0; i < b.N; i++ { + Log10(.5) + } +} + +func BenchmarkLog2(b *testing.B) { + for i := 0; i < b.N; i++ { + Log2(.5) + } +} + +func BenchmarkModf(b *testing.B) { + for i := 0; i < b.N; i++ { + Modf(1.5) + } +} + +func BenchmarkNextafter32(b *testing.B) { + for i := 0; i < b.N; i++ { + Nextafter32(.5, 1) + } +} + +func BenchmarkNextafter64(b *testing.B) { + for i := 0; i < b.N; i++ { + Nextafter(.5, 1) + } +} + +func BenchmarkPowInt(b *testing.B) { + for i := 0; i < b.N; i++ { + Pow(2, 2) + } +} + +func BenchmarkPowFrac(b *testing.B) { + for i := 0; i < b.N; i++ { + Pow(2.5, 1.5) + } +} + +func BenchmarkPow10Pos(b *testing.B) { + for i := 0; i < b.N; i++ { + Pow10(300) + } +} + +func BenchmarkPow10Neg(b *testing.B) { + for i := 0; i < b.N; i++ { + Pow10(-300) + } +} + +func BenchmarkRemainder(b *testing.B) { + for i := 0; i < b.N; i++ { + Remainder(10, 3) + } +} + +func BenchmarkSignbit(b *testing.B) { + for i := 0; i < b.N; i++ { + Signbit(2.5) + } +} + +func BenchmarkSin(b *testing.B) { + for i := 0; i < b.N; i++ { + Sin(.5) + } +} + +func BenchmarkSincos(b *testing.B) { + for i := 0; i < b.N; i++ { + Sincos(.5) + } +} + +func BenchmarkSinh(b *testing.B) { + for i := 0; i < b.N; i++ { + Sinh(2.5) + } +} + +func BenchmarkSqrt(b *testing.B) { + for i := 0; i < b.N; i++ { + Sqrt(10) + } +} + +func BenchmarkSqrtGo(b *testing.B) { + for i := 0; i < b.N; i++ { + SqrtGo(10) + } +} + +func BenchmarkTan(b *testing.B) { + for i := 0; i < b.N; i++ { + Tan(.5) + } +} + +func BenchmarkTanh(b *testing.B) { + for i := 0; i < b.N; i++ { + Tanh(2.5) + } +} +func BenchmarkTrunc(b *testing.B) { + for i := 0; i < b.N; i++ { + Trunc(.5) + } +} + +func BenchmarkY0(b *testing.B) { + for i := 0; i < b.N; i++ { + Y0(2.5) + } +} + +func BenchmarkY1(b *testing.B) { + for i := 0; i < b.N; i++ { + Y1(2.5) + } +} + +func BenchmarkYn(b *testing.B) { + for i := 0; i < b.N; i++ { + Yn(2, 2.5) + } +} diff --git a/src/math/asin.go b/src/math/asin.go new file mode 100644 index 000000000..88b851e55 --- /dev/null +++ b/src/math/asin.go @@ -0,0 +1,55 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point arcsine and arccosine. + + They are implemented by computing the arctangent + after appropriate range reduction. +*/ + +// Asin returns the arcsine, in radians, of x. +// +// Special cases are: +// Asin(±0) = ±0 +// Asin(x) = NaN if x < -1 or x > 1 +func Asin(x float64) float64 + +func asin(x float64) float64 { + if x == 0 { + return x // special case + } + sign := false + if x < 0 { + x = -x + sign = true + } + if x > 1 { + return NaN() // special case + } + + temp := Sqrt(1 - x*x) + if x > 0.7 { + temp = Pi/2 - satan(temp/x) + } else { + temp = satan(x / temp) + } + + if sign { + temp = -temp + } + return temp +} + +// Acos returns the arccosine, in radians, of x. +// +// Special case is: +// Acos(x) = NaN if x < -1 or x > 1 +func Acos(x float64) float64 + +func acos(x float64) float64 { + return Pi/2 - Asin(x) +} diff --git a/src/math/asin_386.s b/src/math/asin_386.s new file mode 100644 index 000000000..4f34e123e --- /dev/null +++ b/src/math/asin_386.s @@ -0,0 +1,30 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Asin(x float64) float64 +TEXT ·Asin(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=sin(x) + FMOVD F0, F1 // F0=sin(x), F1=sin(x) + FMULD F0, F0 // F0=sin(x)*sin(x), F1=sin(x) + FLD1 // F0=1, F1=sin(x)*sin(x), F2=sin(x) + FSUBRDP F0, F1 // F0=1-sin(x)*sin(x) (=cos(x)*cos(x)), F1=sin(x) + FSQRT // F0=cos(x), F1=sin(x) + FPATAN // F0=arcsin(sin(x))=x + FMOVDP F0, ret+8(FP) + RET + +// func Acos(x float64) float64 +TEXT ·Acos(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=cos(x) + FMOVD F0, F1 // F0=cos(x), F1=cos(x) + FMULD F0, F0 // F0=cos(x)*cos(x), F1=cos(x) + FLD1 // F0=1, F1=cos(x)*cos(x), F2=cos(x) + FSUBRDP F0, F1 // F0=1-cos(x)*cos(x) (=sin(x)*sin(x)), F1=cos(x) + FSQRT // F0=sin(x), F1=cos(x) + FXCHD F0, F1 // F0=cos(x), F1=sin(x) + FPATAN // F0=arccos(cos(x))=x + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/asin_amd64.s b/src/math/asin_amd64.s new file mode 100644 index 000000000..1a43d489b --- /dev/null +++ b/src/math/asin_amd64.s @@ -0,0 +1,11 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Asin(SB),NOSPLIT,$0 + JMP ·asin(SB) + +TEXT ·Acos(SB),NOSPLIT,$0 + JMP ·acos(SB) diff --git a/src/math/asin_amd64p32.s b/src/math/asin_amd64p32.s new file mode 100644 index 000000000..2751c475f --- /dev/null +++ b/src/math/asin_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "asin_amd64.s" diff --git a/src/math/asin_arm.s b/src/math/asin_arm.s new file mode 100644 index 000000000..8fe03b61d --- /dev/null +++ b/src/math/asin_arm.s @@ -0,0 +1,11 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Asin(SB),NOSPLIT,$0 + B ·asin(SB) + +TEXT ·Acos(SB),NOSPLIT,$0 + B ·acos(SB) diff --git a/src/math/asinh.go b/src/math/asinh.go new file mode 100644 index 000000000..ff2de0215 --- /dev/null +++ b/src/math/asinh.go @@ -0,0 +1,69 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_asinh.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// asinh(x) +// Method : +// Based on +// asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] +// we have +// asinh(x) := x if 1+x*x=1, +// := sign(x)*(log(x)+ln2)) for large |x|, else +// := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else +// := sign(x)*log1p(|x| + x**2/(1 + sqrt(1+x**2))) +// + +// Asinh returns the inverse hyperbolic sine of x. +// +// Special cases are: +// Asinh(±0) = ±0 +// Asinh(±Inf) = ±Inf +// Asinh(NaN) = NaN +func Asinh(x float64) float64 { + const ( + Ln2 = 6.93147180559945286227e-01 // 0x3FE62E42FEFA39EF + NearZero = 1.0 / (1 << 28) // 2**-28 + Large = 1 << 28 // 2**28 + ) + // special cases + if IsNaN(x) || IsInf(x, 0) { + return x + } + sign := false + if x < 0 { + x = -x + sign = true + } + var temp float64 + switch { + case x > Large: + temp = Log(x) + Ln2 // |x| > 2**28 + case x > 2: + temp = Log(2*x + 1/(Sqrt(x*x+1)+x)) // 2**28 > |x| > 2.0 + case x < NearZero: + temp = x // |x| < 2**-28 + default: + temp = Log1p(x + x*x/(1+Sqrt(1+x*x))) // 2.0 > |x| > 2**-28 + } + if sign { + temp = -temp + } + return temp +} diff --git a/src/math/atan.go b/src/math/atan.go new file mode 100644 index 000000000..7fcc90b8b --- /dev/null +++ b/src/math/atan.go @@ -0,0 +1,105 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point arctangent. +*/ + +// The original C code, the long comment, and the constants below were +// from http://netlib.sandia.gov/cephes/cmath/atan.c, available from +// http://www.netlib.org/cephes/cmath.tgz. +// The go code is a version of the original C. +// +// atan.c +// Inverse circular tangent (arctangent) +// +// SYNOPSIS: +// double x, y, atan(); +// y = atan( x ); +// +// DESCRIPTION: +// Returns radian angle between -pi/2 and +pi/2 whose tangent is x. +// +// Range reduction is from three intervals into the interval from zero to 0.66. +// The approximant uses a rational function of degree 4/5 of the form +// x + x**3 P(x)/Q(x). +// +// ACCURACY: +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10, 10 50000 2.4e-17 8.3e-18 +// IEEE -10, 10 10^6 1.8e-16 5.0e-17 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// xatan evaluates a series valid in the range [0, 0.66]. +func xatan(x float64) float64 { + const ( + P0 = -8.750608600031904122785e-01 + P1 = -1.615753718733365076637e+01 + P2 = -7.500855792314704667340e+01 + P3 = -1.228866684490136173410e+02 + P4 = -6.485021904942025371773e+01 + Q0 = +2.485846490142306297962e+01 + Q1 = +1.650270098316988542046e+02 + Q2 = +4.328810604912902668951e+02 + Q3 = +4.853903996359136964868e+02 + Q4 = +1.945506571482613964425e+02 + ) + z := x * x + z = z * ((((P0*z+P1)*z+P2)*z+P3)*z + P4) / (((((z+Q0)*z+Q1)*z+Q2)*z+Q3)*z + Q4) + z = x*z + x + return z +} + +// satan reduces its argument (known to be positive) +// to the range [0, 0.66] and calls xatan. +func satan(x float64) float64 { + const ( + Morebits = 6.123233995736765886130e-17 // pi/2 = PIO2 + Morebits + Tan3pio8 = 2.41421356237309504880 // tan(3*pi/8) + ) + if x <= 0.66 { + return xatan(x) + } + if x > Tan3pio8 { + return Pi/2 - xatan(1/x) + Morebits + } + return Pi/4 + xatan((x-1)/(x+1)) + 0.5*Morebits +} + +// Atan returns the arctangent, in radians, of x. +// +// Special cases are: +// Atan(±0) = ±0 +// Atan(±Inf) = ±Pi/2 +func Atan(x float64) float64 + +func atan(x float64) float64 { + if x == 0 { + return x + } + if x > 0 { + return satan(x) + } + return -satan(-x) +} diff --git a/src/math/atan2.go b/src/math/atan2.go new file mode 100644 index 000000000..d84b332c9 --- /dev/null +++ b/src/math/atan2.go @@ -0,0 +1,71 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Atan2 returns the arc tangent of y/x, using +// the signs of the two to determine the quadrant +// of the return value. +// +// Special cases are (in order): +// Atan2(y, NaN) = NaN +// Atan2(NaN, x) = NaN +// Atan2(+0, x>=0) = +0 +// Atan2(-0, x>=0) = -0 +// Atan2(+0, x<=-0) = +Pi +// Atan2(-0, x<=-0) = -Pi +// Atan2(y>0, 0) = +Pi/2 +// Atan2(y<0, 0) = -Pi/2 +// Atan2(+Inf, +Inf) = +Pi/4 +// Atan2(-Inf, +Inf) = -Pi/4 +// Atan2(+Inf, -Inf) = 3Pi/4 +// Atan2(-Inf, -Inf) = -3Pi/4 +// Atan2(y, +Inf) = 0 +// Atan2(y>0, -Inf) = +Pi +// Atan2(y<0, -Inf) = -Pi +// Atan2(+Inf, x) = +Pi/2 +// Atan2(-Inf, x) = -Pi/2 +func Atan2(y, x float64) float64 + +func atan2(y, x float64) float64 { + // special cases + switch { + case IsNaN(y) || IsNaN(x): + return NaN() + case y == 0: + if x >= 0 && !Signbit(x) { + return Copysign(0, y) + } + return Copysign(Pi, y) + case x == 0: + return Copysign(Pi/2, y) + case IsInf(x, 0): + if IsInf(x, 1) { + switch { + case IsInf(y, 0): + return Copysign(Pi/4, y) + default: + return Copysign(0, y) + } + } + switch { + case IsInf(y, 0): + return Copysign(3*Pi/4, y) + default: + return Copysign(Pi, y) + } + case IsInf(y, 0): + return Copysign(Pi/2, y) + } + + // Call atan and determine the quadrant. + q := Atan(y / x) + if x < 0 { + if q <= 0 { + return q + Pi + } + return q - Pi + } + return q +} diff --git a/src/math/atan2_386.s b/src/math/atan2_386.s new file mode 100644 index 000000000..31a74e726 --- /dev/null +++ b/src/math/atan2_386.s @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Atan2(y, x float64) float64 // =atan(y/x) +TEXT ·Atan2(SB),NOSPLIT,$0 + FMOVD y+0(FP), F0 // F0=y + FMOVD x+8(FP), F0 // F0=x, F1=y + FPATAN // F0=atan(F1/F0) + FMOVDP F0, ret+16(FP) + RET diff --git a/src/math/atan2_amd64.s b/src/math/atan2_amd64.s new file mode 100644 index 000000000..fc471f76c --- /dev/null +++ b/src/math/atan2_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Atan2(SB),NOSPLIT,$0 + JMP ·atan2(SB) diff --git a/src/math/atan2_amd64p32.s b/src/math/atan2_amd64p32.s new file mode 100644 index 000000000..3fdc03ca8 --- /dev/null +++ b/src/math/atan2_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "atan2_amd64.s" diff --git a/src/math/atan2_arm.s b/src/math/atan2_arm.s new file mode 100644 index 000000000..06c12ecbc --- /dev/null +++ b/src/math/atan2_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Atan2(SB),NOSPLIT,$0 + B ·atan2(SB) diff --git a/src/math/atan_386.s b/src/math/atan_386.s new file mode 100644 index 000000000..f3976b1d3 --- /dev/null +++ b/src/math/atan_386.s @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Atan(x float64) float64 +TEXT ·Atan(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FLD1 // F0=1, F1=x + FPATAN // F0=atan(F1/F0) + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/atan_amd64.s b/src/math/atan_amd64.s new file mode 100644 index 000000000..b801ae99d --- /dev/null +++ b/src/math/atan_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Atan(SB),NOSPLIT,$0 + JMP ·atan(SB) diff --git a/src/math/atan_amd64p32.s b/src/math/atan_amd64p32.s new file mode 100644 index 000000000..1c1f6ceda --- /dev/null +++ b/src/math/atan_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "atan_amd64.s" diff --git a/src/math/atan_arm.s b/src/math/atan_arm.s new file mode 100644 index 000000000..d190a8bb0 --- /dev/null +++ b/src/math/atan_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Atan(SB),NOSPLIT,$0 + B ·atan(SB) diff --git a/src/math/atanh.go b/src/math/atanh.go new file mode 100644 index 000000000..113d5c103 --- /dev/null +++ b/src/math/atanh.go @@ -0,0 +1,77 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_atanh.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// __ieee754_atanh(x) +// Method : +// 1. Reduce x to positive by atanh(-x) = -atanh(x) +// 2. For x>=0.5 +// 1 2x x +// atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) +// 2 1 - x 1 - x +// +// For x<0.5 +// atanh(x) = 0.5*log1p(2x+2x*x/(1-x)) +// +// Special cases: +// atanh(x) is NaN if |x| > 1 with signal; +// atanh(NaN) is that NaN with no signal; +// atanh(+-1) is +-INF with signal. +// + +// Atanh returns the inverse hyperbolic tangent of x. +// +// Special cases are: +// Atanh(1) = +Inf +// Atanh(±0) = ±0 +// Atanh(-1) = -Inf +// Atanh(x) = NaN if x < -1 or x > 1 +// Atanh(NaN) = NaN +func Atanh(x float64) float64 { + const NearZero = 1.0 / (1 << 28) // 2**-28 + // special cases + switch { + case x < -1 || x > 1 || IsNaN(x): + return NaN() + case x == 1: + return Inf(1) + case x == -1: + return Inf(-1) + } + sign := false + if x < 0 { + x = -x + sign = true + } + var temp float64 + switch { + case x < NearZero: + temp = x + case x < 0.5: + temp = x + x + temp = 0.5 * Log1p(temp+temp*x/(1-x)) + default: + temp = 0.5 * Log1p((x+x)/(1-x)) + } + if sign { + temp = -temp + } + return temp +} diff --git a/src/math/big/arith.go b/src/math/big/arith.go new file mode 100644 index 000000000..3d5a8682d --- /dev/null +++ b/src/math/big/arith.go @@ -0,0 +1,240 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file provides Go implementations of elementary multi-precision +// arithmetic operations on word vectors. Needed for platforms without +// assembly implementations of these routines. + +package big + +// A Word represents a single digit of a multi-precision unsigned integer. +type Word uintptr + +const ( + // Compute the size _S of a Word in bytes. + _m = ^Word(0) + _logS = _m>>8&1 + _m>>16&1 + _m>>32&1 + _S = 1 << _logS + + _W = _S << 3 // word size in bits + _B = 1 << _W // digit base + _M = _B - 1 // digit mask + + _W2 = _W / 2 // half word size in bits + _B2 = 1 << _W2 // half digit base + _M2 = _B2 - 1 // half digit mask +) + +// ---------------------------------------------------------------------------- +// Elementary operations on words +// +// These operations are used by the vector operations below. + +// z1<<_W + z0 = x+y+c, with c == 0 or 1 +func addWW_g(x, y, c Word) (z1, z0 Word) { + yc := y + c + z0 = x + yc + if z0 < x || yc < y { + z1 = 1 + } + return +} + +// z1<<_W + z0 = x-y-c, with c == 0 or 1 +func subWW_g(x, y, c Word) (z1, z0 Word) { + yc := y + c + z0 = x - yc + if z0 > x || yc < y { + z1 = 1 + } + return +} + +// z1<<_W + z0 = x*y +// Adapted from Warren, Hacker's Delight, p. 132. +func mulWW_g(x, y Word) (z1, z0 Word) { + x0 := x & _M2 + x1 := x >> _W2 + y0 := y & _M2 + y1 := y >> _W2 + w0 := x0 * y0 + t := x1*y0 + w0>>_W2 + w1 := t & _M2 + w2 := t >> _W2 + w1 += x0 * y1 + z1 = x1*y1 + w2 + w1>>_W2 + z0 = x * y + return +} + +// z1<<_W + z0 = x*y + c +func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { + z1, zz0 := mulWW(x, y) + if z0 = zz0 + c; z0 < zz0 { + z1++ + } + return +} + +// Length of x in bits. +func bitLen_g(x Word) (n int) { + for ; x >= 0x8000; x >>= 16 { + n += 16 + } + if x >= 0x80 { + x >>= 8 + n += 8 + } + if x >= 0x8 { + x >>= 4 + n += 4 + } + if x >= 0x2 { + x >>= 2 + n += 2 + } + if x >= 0x1 { + n++ + } + return +} + +// log2 computes the integer binary logarithm of x. +// The result is the integer n for which 2^n <= x < 2^(n+1). +// If x == 0, the result is -1. +func log2(x Word) int { + return bitLen(x) - 1 +} + +// Number of leading zeros in x. +func leadingZeros(x Word) uint { + return uint(_W - bitLen(x)) +} + +// q = (u1<<_W + u0 - r)/y +// Adapted from Warren, Hacker's Delight, p. 152. +func divWW_g(u1, u0, v Word) (q, r Word) { + if u1 >= v { + return 1<<_W - 1, 1<<_W - 1 + } + + s := leadingZeros(v) + v <<= s + + vn1 := v >> _W2 + vn0 := v & _M2 + un32 := u1<<s | u0>>(_W-s) + un10 := u0 << s + un1 := un10 >> _W2 + un0 := un10 & _M2 + q1 := un32 / vn1 + rhat := un32 - q1*vn1 + + for q1 >= _B2 || q1*vn0 > _B2*rhat+un1 { + q1-- + rhat += vn1 + if rhat >= _B2 { + break + } + } + + un21 := un32*_B2 + un1 - q1*v + q0 := un21 / vn1 + rhat = un21 - q0*vn1 + + for q0 >= _B2 || q0*vn0 > _B2*rhat+un0 { + q0-- + rhat += vn1 + if rhat >= _B2 { + break + } + } + + return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s +} + +func addVV_g(z, x, y []Word) (c Word) { + for i := range z { + c, z[i] = addWW_g(x[i], y[i], c) + } + return +} + +func subVV_g(z, x, y []Word) (c Word) { + for i := range z { + c, z[i] = subWW_g(x[i], y[i], c) + } + return +} + +func addVW_g(z, x []Word, y Word) (c Word) { + c = y + for i := range z { + c, z[i] = addWW_g(x[i], c, 0) + } + return +} + +func subVW_g(z, x []Word, y Word) (c Word) { + c = y + for i := range z { + c, z[i] = subWW_g(x[i], c, 0) + } + return +} + +func shlVU_g(z, x []Word, s uint) (c Word) { + if n := len(z); n > 0 { + ŝ := _W - s + w1 := x[n-1] + c = w1 >> ŝ + for i := n - 1; i > 0; i-- { + w := w1 + w1 = x[i-1] + z[i] = w<<s | w1>>ŝ + } + z[0] = w1 << s + } + return +} + +func shrVU_g(z, x []Word, s uint) (c Word) { + if n := len(z); n > 0 { + ŝ := _W - s + w1 := x[0] + c = w1 << ŝ + for i := 0; i < n-1; i++ { + w := w1 + w1 = x[i+1] + z[i] = w>>s | w1<<ŝ + } + z[n-1] = w1 >> s + } + return +} + +func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { + c = r + for i := range z { + c, z[i] = mulAddWWW_g(x[i], y, c) + } + return +} + +func addMulVVW_g(z, x []Word, y Word) (c Word) { + for i := range z { + z1, z0 := mulAddWWW_g(x[i], y, z[i]) + c, z[i] = addWW_g(z0, c, 0) + c += z1 + } + return +} + +func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) { + r = xn + for i := len(z) - 1; i >= 0; i-- { + z[i], r = divWW_g(r, x[i], y) + } + return +} diff --git a/src/math/big/arith_386.s b/src/math/big/arith_386.s new file mode 100644 index 000000000..1b47c898f --- /dev/null +++ b/src/math/big/arith_386.s @@ -0,0 +1,278 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVL x+0(FP), AX + MULL y+4(FP) + MOVL DX, z1+8(FP) + MOVL AX, z0+12(FP) + RET + + +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB),NOSPLIT,$0 + MOVL x1+0(FP), DX + MOVL x0+4(FP), AX + DIVL y+8(FP) + MOVL AX, q+12(FP) + MOVL DX, r+16(FP) + RET + + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), CX + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 + JMP E1 + +L1: MOVL (SI)(BX*4), AX + RCRL $1, DX + ADCL (CX)(BX*4), AX + RCLL $1, DX + MOVL AX, (DI)(BX*4) + ADDL $1, BX // i++ + +E1: CMPL BX, BP // i < n + JL L1 + + MOVL DX, c+36(FP) + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBL instead of ADCL and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), CX + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + MOVL $0, DX // c = 0 + JMP E2 + +L2: MOVL (SI)(BX*4), AX + RCRL $1, DX + SBBL (CX)(BX*4), AX + RCLL $1, DX + MOVL AX, (DI)(BX*4) + ADDL $1, BX // i++ + +E2: CMPL BX, BP // i < n + JL L2 + + MOVL DX, c+36(FP) + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), AX // c = y + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + JMP E3 + +L3: ADDL (SI)(BX*4), AX + MOVL AX, (DI)(BX*4) + RCLL $1, AX + ANDL $1, AX + ADDL $1, BX // i++ + +E3: CMPL BX, BP // i < n + JL L3 + + MOVL AX, c+28(FP) + RET + + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), AX // c = y + MOVL z_len+4(FP), BP + MOVL $0, BX // i = 0 + JMP E4 + +L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? + SUBL AX, DX + MOVL DX, (DI)(BX*4) + RCLL $1, AX + ANDL $1, AX + ADDL $1, BX // i++ + +E4: CMPL BX, BP // i < n + JL L4 + + MOVL AX, c+28(FP) + RET + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVL z_len+4(FP), BX // i = z + SUBL $1, BX // i-- + JL X8b // i < 0 (n <= 0) + + // n > 0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL s+24(FP), CX + MOVL (SI)(BX*4), AX // w1 = x[n-1] + MOVL $0, DX + SHLL CX, DX:AX // w1>>ŝ + MOVL DX, c+28(FP) + + CMPL BX, $0 + JLE X8a // i <= 0 + + // i > 0 +L8: MOVL AX, DX // w = w1 + MOVL -4(SI)(BX*4), AX // w1 = x[i-1] + SHLL CX, DX:AX // w<<s | w1>>ŝ + MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ + SUBL $1, BX // i-- + JG L8 // i > 0 + + // i <= 0 +X8a: SHLL CX, AX // w1<<s + MOVL AX, (DI) // z[0] = w1<<s + RET + +X8b: MOVL $0, c+28(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVL z_len+4(FP), BP + SUBL $1, BP // n-- + JL X9b // n < 0 (n <= 0) + + // n > 0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL s+24(FP), CX + MOVL (SI), AX // w1 = x[0] + MOVL $0, DX + SHRL CX, DX:AX // w1<<ŝ + MOVL DX, c+28(FP) + + MOVL $0, BX // i = 0 + JMP E9 + + // i < n-1 +L9: MOVL AX, DX // w = w1 + MOVL 4(SI)(BX*4), AX // w1 = x[i+1] + SHRL CX, DX:AX // w>>s | w1<<ŝ + MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ + ADDL $1, BX // i++ + +E9: CMPL BX, BP + JL L9 // i < n-1 + + // i >= n-1 +X9a: SHRL CX, AX // w1>>s + MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s + RET + +X9b: MOVL $0, c+28(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), BP + MOVL r+28(FP), CX // c = r + MOVL z_len+4(FP), BX + LEAL (DI)(BX*4), DI + LEAL (SI)(BX*4), SI + NEGL BX // i = -n + JMP E5 + +L5: MOVL (SI)(BX*4), AX + MULL BP + ADDL CX, AX + ADCL $0, DX + MOVL AX, (DI)(BX*4) + MOVL DX, CX + ADDL $1, BX // i++ + +E5: CMPL BX, $0 // i < 0 + JL L5 + + MOVL CX, c+32(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL x+12(FP), SI + MOVL y+24(FP), BP + MOVL z_len+4(FP), BX + LEAL (DI)(BX*4), DI + LEAL (SI)(BX*4), SI + NEGL BX // i = -n + MOVL $0, CX // c = 0 + JMP E6 + +L6: MOVL (SI)(BX*4), AX + MULL BP + ADDL CX, AX + ADCL $0, DX + ADDL AX, (DI)(BX*4) + ADCL $0, DX + MOVL DX, CX + ADDL $1, BX // i++ + +E6: CMPL BX, $0 // i < 0 + JL L6 + + MOVL CX, c+28(FP) + RET + + +// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) +TEXT ·divWVW(SB),NOSPLIT,$0 + MOVL z+0(FP), DI + MOVL xn+12(FP), DX // r = xn + MOVL x+16(FP), SI + MOVL y+28(FP), CX + MOVL z_len+4(FP), BX // i = z + JMP E7 + +L7: MOVL (SI)(BX*4), AX + DIVL CX + MOVL AX, (DI)(BX*4) + +E7: SUBL $1, BX // i-- + JGE L7 // i >= 0 + + MOVL DX, r+32(FP) + RET + +// func bitLen(x Word) (n int) +TEXT ·bitLen(SB),NOSPLIT,$0 + BSRL x+0(FP), AX + JZ Z1 + INCL AX + MOVL AX, n+4(FP) + RET + +Z1: MOVL $0, n+4(FP) + RET diff --git a/src/math/big/arith_amd64.s b/src/math/big/arith_amd64.s new file mode 100644 index 000000000..56c4cb050 --- /dev/null +++ b/src/math/big/arith_amd64.s @@ -0,0 +1,401 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// Literal instruction for MOVQ $0, CX. +// (MOVQ $0, reg is translated to XORQ reg, reg and clears CF.) +#define ZERO_CX BYTE $0x48; \ + BYTE $0xc7; \ + BYTE $0xc1; \ + BYTE $0x00; \ + BYTE $0x00; \ + BYTE $0x00; \ + BYTE $0x00 + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MULQ y+8(FP) + MOVQ DX, z1+16(FP) + MOVQ AX, z0+24(FP) + RET + + +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB),NOSPLIT,$0 + MOVQ x1+0(FP), DX + MOVQ x0+8(FP), AX + DIVQ y+16(FP) + MOVQ AX, q+24(FP) + MOVQ DX, r+32(FP) + RET + + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z+0(FP), R10 + + MOVQ $0, CX // c = 0 + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V1 // if n < 0 goto V1 + +U1: // n >= 0 + // regular loop body unrolled 4x + RCRQ $1, CX // CF = c + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + ADCQ 0(R9)(SI*8), R11 + ADCQ 8(R9)(SI*8), R12 + ADCQ 16(R9)(SI*8), R13 + ADCQ 24(R9)(SI*8), R14 + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + RCLQ $1, CX // c = CF + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U1 // if n >= 0 goto U1 + +V1: ADDQ $4, DI // n += 4 + JLE E1 // if n <= 0 goto E1 + +L1: // n > 0 + RCRQ $1, CX // CF = c + MOVQ 0(R8)(SI*8), R11 + ADCQ 0(R9)(SI*8), R11 + MOVQ R11, 0(R10)(SI*8) + RCLQ $1, CX // c = CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L1 // if n > 0 goto L1 + +E1: MOVQ CX, c+72(FP) // return c + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBBQ instead of ADCQ and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z+0(FP), R10 + + MOVQ $0, CX // c = 0 + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V2 // if n < 0 goto V2 + +U2: // n >= 0 + // regular loop body unrolled 4x + RCRQ $1, CX // CF = c + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + SBBQ 0(R9)(SI*8), R11 + SBBQ 8(R9)(SI*8), R12 + SBBQ 16(R9)(SI*8), R13 + SBBQ 24(R9)(SI*8), R14 + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + RCLQ $1, CX // c = CF + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U2 // if n >= 0 goto U2 + +V2: ADDQ $4, DI // n += 4 + JLE E2 // if n <= 0 goto E2 + +L2: // n > 0 + RCRQ $1, CX // CF = c + MOVQ 0(R8)(SI*8), R11 + SBBQ 0(R9)(SI*8), R11 + MOVQ R11, 0(R10)(SI*8) + RCLQ $1, CX // c = CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L2 // if n > 0 goto L2 + +E2: MOVQ CX, c+72(FP) // return c + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), CX // c = y + MOVQ z+0(FP), R10 + + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V3 // if n < 4 goto V3 + +U3: // n >= 0 + // regular loop body unrolled 4x + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + ADDQ CX, R11 + ZERO_CX + ADCQ $0, R12 + ADCQ $0, R13 + ADCQ $0, R14 + SETCS CX // c = CF + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U3 // if n >= 0 goto U3 + +V3: ADDQ $4, DI // n += 4 + JLE E3 // if n <= 0 goto E3 + +L3: // n > 0 + ADDQ 0(R8)(SI*8), CX + MOVQ CX, 0(R10)(SI*8) + ZERO_CX + RCLQ $1, CX // c = CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L3 // if n > 0 goto L3 + +E3: MOVQ CX, c+56(FP) // return c + RET + + +// func subVW(z, x []Word, y Word) (c Word) +// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), DI + MOVQ x+24(FP), R8 + MOVQ y+48(FP), CX // c = y + MOVQ z+0(FP), R10 + + MOVQ $0, SI // i = 0 + + // s/JL/JMP/ below to disable the unrolled loop + SUBQ $4, DI // n -= 4 + JL V4 // if n < 4 goto V4 + +U4: // n >= 0 + // regular loop body unrolled 4x + MOVQ 0(R8)(SI*8), R11 + MOVQ 8(R8)(SI*8), R12 + MOVQ 16(R8)(SI*8), R13 + MOVQ 24(R8)(SI*8), R14 + SUBQ CX, R11 + ZERO_CX + SBBQ $0, R12 + SBBQ $0, R13 + SBBQ $0, R14 + SETCS CX // c = CF + MOVQ R11, 0(R10)(SI*8) + MOVQ R12, 8(R10)(SI*8) + MOVQ R13, 16(R10)(SI*8) + MOVQ R14, 24(R10)(SI*8) + + ADDQ $4, SI // i += 4 + SUBQ $4, DI // n -= 4 + JGE U4 // if n >= 0 goto U4 + +V4: ADDQ $4, DI // n += 4 + JLE E4 // if n <= 0 goto E4 + +L4: // n > 0 + MOVQ 0(R8)(SI*8), R11 + SUBQ CX, R11 + MOVQ R11, 0(R10)(SI*8) + ZERO_CX + RCLQ $1, CX // c = CF + + ADDQ $1, SI // i++ + SUBQ $1, DI // n-- + JG L4 // if n > 0 goto L4 + +E4: MOVQ CX, c+56(FP) // return c + RET + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), BX // i = z + SUBQ $1, BX // i-- + JL X8b // i < 0 (n <= 0) + + // n > 0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ s+48(FP), CX + MOVQ (R8)(BX*8), AX // w1 = x[n-1] + MOVQ $0, DX + SHLQ CX, DX:AX // w1>>ŝ + MOVQ DX, c+56(FP) + + CMPQ BX, $0 + JLE X8a // i <= 0 + + // i > 0 +L8: MOVQ AX, DX // w = w1 + MOVQ -8(R8)(BX*8), AX // w1 = x[i-1] + SHLQ CX, DX:AX // w<<s | w1>>ŝ + MOVQ DX, (R10)(BX*8) // z[i] = w<<s | w1>>ŝ + SUBQ $1, BX // i-- + JG L8 // i > 0 + + // i <= 0 +X8a: SHLQ CX, AX // w1<<s + MOVQ AX, (R10) // z[0] = w1<<s + RET + +X8b: MOVQ $0, c+56(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVQ z_len+8(FP), R11 + SUBQ $1, R11 // n-- + JL X9b // n < 0 (n <= 0) + + // n > 0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ s+48(FP), CX + MOVQ (R8), AX // w1 = x[0] + MOVQ $0, DX + SHRQ CX, DX:AX // w1<<ŝ + MOVQ DX, c+56(FP) + + MOVQ $0, BX // i = 0 + JMP E9 + + // i < n-1 +L9: MOVQ AX, DX // w = w1 + MOVQ 8(R8)(BX*8), AX // w1 = x[i+1] + SHRQ CX, DX:AX // w>>s | w1<<ŝ + MOVQ DX, (R10)(BX*8) // z[i] = w>>s | w1<<ŝ + ADDQ $1, BX // i++ + +E9: CMPQ BX, R11 + JL L9 // i < n-1 + + // i >= n-1 +X9a: SHRQ CX, AX // w1>>s + MOVQ AX, (R10)(R11*8) // z[n-1] = w1>>s + RET + +X9b: MOVQ $0, c+56(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ r+56(FP), CX // c = r + MOVQ z_len+8(FP), R11 + MOVQ $0, BX // i = 0 + JMP E5 + +L5: MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + MOVQ AX, (R10)(BX*8) + MOVQ DX, CX + ADDQ $1, BX // i++ + +E5: CMPQ BX, R11 // i < n + JL L5 + + MOVQ CX, c+64(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + MOVQ z+0(FP), R10 + MOVQ x+24(FP), R8 + MOVQ y+48(FP), R9 + MOVQ z_len+8(FP), R11 + MOVQ $0, BX // i = 0 + MOVQ $0, CX // c = 0 + JMP E6 + +L6: MOVQ (R8)(BX*8), AX + MULQ R9 + ADDQ CX, AX + ADCQ $0, DX + ADDQ AX, (R10)(BX*8) + ADCQ $0, DX + MOVQ DX, CX + ADDQ $1, BX // i++ + +E6: CMPQ BX, R11 // i < n + JL L6 + + MOVQ CX, c+56(FP) + RET + + +// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) +TEXT ·divWVW(SB),NOSPLIT,$0 + MOVQ z+0(FP), R10 + MOVQ xn+24(FP), DX // r = xn + MOVQ x+32(FP), R8 + MOVQ y+56(FP), R9 + MOVQ z_len+8(FP), BX // i = z + JMP E7 + +L7: MOVQ (R8)(BX*8), AX + DIVQ R9 + MOVQ AX, (R10)(BX*8) + +E7: SUBQ $1, BX // i-- + JGE L7 // i >= 0 + + MOVQ DX, r+64(FP) + RET + +// func bitLen(x Word) (n int) +TEXT ·bitLen(SB),NOSPLIT,$0 + BSRQ x+0(FP), AX + JZ Z1 + ADDQ $1, AX + MOVQ AX, n+8(FP) + RET + +Z1: MOVQ $0, n+8(FP) + RET diff --git a/src/math/big/arith_amd64p32.s b/src/math/big/arith_amd64p32.s new file mode 100644 index 000000000..908dbbdc5 --- /dev/null +++ b/src/math/big/arith_amd64p32.s @@ -0,0 +1,41 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·mulWW(SB),NOSPLIT,$0 + JMP ·mulWW_g(SB) + +TEXT ·divWW(SB),NOSPLIT,$0 + JMP ·divWW_g(SB) + +TEXT ·addVV(SB),NOSPLIT,$0 + JMP ·addVV_g(SB) + +TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) + +TEXT ·addVW(SB),NOSPLIT,$0 + JMP ·addVW_g(SB) + +TEXT ·subVW(SB),NOSPLIT,$0 + JMP ·subVW_g(SB) + +TEXT ·shlVU(SB),NOSPLIT,$0 + JMP ·shlVU_g(SB) + +TEXT ·shrVU(SB),NOSPLIT,$0 + JMP ·shrVU_g(SB) + +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP ·mulAddVWW_g(SB) + +TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP ·addMulVVW_g(SB) + +TEXT ·divWVW(SB),NOSPLIT,$0 + JMP ·divWVW_g(SB) + +TEXT ·bitLen(SB),NOSPLIT,$0 + JMP ·bitLen_g(SB) diff --git a/src/math/big/arith_arm.s b/src/math/big/arith_arm.s new file mode 100644 index 000000000..a4c51c212 --- /dev/null +++ b/src/math/big/arith_arm.s @@ -0,0 +1,300 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func addVV(z, x, y []Word) (c Word) +TEXT ·addVV(SB),NOSPLIT,$0 + ADD.S $0, R0 // clear carry flag + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + B E1 +L1: + MOVW.P 4(R2), R5 + MOVW.P 4(R3), R6 + ADC.S R6, R5 + MOVW.P R5, 4(R1) +E1: + TEQ R1, R4 + BNE L1 + + MOVW $0, R0 + MOVW.CS $1, R0 + MOVW R0, c+36(FP) + RET + + +// func subVV(z, x, y []Word) (c Word) +// (same as addVV except for SBC instead of ADC and label names) +TEXT ·subVV(SB),NOSPLIT,$0 + SUB.S $0, R0 // clear borrow flag + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + B E2 +L2: + MOVW.P 4(R2), R5 + MOVW.P 4(R3), R6 + SBC.S R6, R5 + MOVW.P R5, 4(R1) +E2: + TEQ R1, R4 + BNE L2 + + MOVW $0, R0 + MOVW.CC $1, R0 + MOVW R0, c+36(FP) + RET + + +// func addVW(z, x []Word, y Word) (c Word) +TEXT ·addVW(SB),NOSPLIT,$0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + TEQ R1, R4 + BNE L3a + MOVW R3, c+28(FP) + RET +L3a: + MOVW.P 4(R2), R5 + ADD.S R3, R5 + MOVW.P R5, 4(R1) + B E3 +L3: + MOVW.P 4(R2), R5 + ADC.S $0, R5 + MOVW.P R5, 4(R1) +E3: + TEQ R1, R4 + BNE L3 + + MOVW $0, R0 + MOVW.CS $1, R0 + MOVW R0, c+28(FP) + RET + + +// func subVW(z, x []Word, y Word) (c Word) +TEXT ·subVW(SB),NOSPLIT,$0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R4 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R4<<2, R1, R4 + TEQ R1, R4 + BNE L4a + MOVW R3, c+28(FP) + RET +L4a: + MOVW.P 4(R2), R5 + SUB.S R3, R5 + MOVW.P R5, 4(R1) + B E4 +L4: + MOVW.P 4(R2), R5 + SBC.S $0, R5 + MOVW.P R5, 4(R1) +E4: + TEQ R1, R4 + BNE L4 + + MOVW $0, R0 + MOVW.CC $1, R0 + MOVW R0, c+28(FP) + RET + + +// func shlVU(z, x []Word, s uint) (c Word) +TEXT ·shlVU(SB),NOSPLIT,$0 + MOVW z_len+4(FP), R5 + TEQ $0, R5 + BEQ X7 + + MOVW z+0(FP), R1 + MOVW x+12(FP), R2 + ADD R5<<2, R2, R2 + ADD R5<<2, R1, R5 + MOVW s+24(FP), R3 + TEQ $0, R3 // shift 0 is special + BEQ Y7 + ADD $4, R1 // stop one word early + MOVW $32, R4 + SUB R3, R4 + MOVW $0, R7 + + MOVW.W -4(R2), R6 + MOVW R6<<R3, R7 + MOVW R6>>R4, R6 + MOVW R6, c+28(FP) + B E7 + +L7: + MOVW.W -4(R2), R6 + ORR R6>>R4, R7 + MOVW.W R7, -4(R5) + MOVW R6<<R3, R7 +E7: + TEQ R1, R5 + BNE L7 + + MOVW R7, -4(R5) + RET + +Y7: // copy loop, because shift 0 == shift 32 + MOVW.W -4(R2), R6 + MOVW.W R6, -4(R5) + TEQ R1, R5 + BNE Y7 + +X7: + MOVW $0, R1 + MOVW R1, c+28(FP) + RET + + +// func shrVU(z, x []Word, s uint) (c Word) +TEXT ·shrVU(SB),NOSPLIT,$0 + MOVW z_len+4(FP), R5 + TEQ $0, R5 + BEQ X6 + + MOVW z+0(FP), R1 + MOVW x+12(FP), R2 + ADD R5<<2, R1, R5 + MOVW s+24(FP), R3 + TEQ $0, R3 // shift 0 is special + BEQ Y6 + SUB $4, R5 // stop one word early + MOVW $32, R4 + SUB R3, R4 + MOVW $0, R7 + + // first word + MOVW.P 4(R2), R6 + MOVW R6>>R3, R7 + MOVW R6<<R4, R6 + MOVW R6, c+28(FP) + B E6 + + // word loop +L6: + MOVW.P 4(R2), R6 + ORR R6<<R4, R7 + MOVW.P R7, 4(R1) + MOVW R6>>R3, R7 +E6: + TEQ R1, R5 + BNE L6 + + MOVW R7, 0(R1) + RET + +Y6: // copy loop, because shift 0 == shift 32 + MOVW.P 4(R2), R6 + MOVW.P R6, 4(R1) + TEQ R1, R5 + BNE Y6 + +X6: + MOVW $0, R1 + MOVW R1, c+28(FP) + RET + + +// func mulAddVWW(z, x []Word, y, r Word) (c Word) +TEXT ·mulAddVWW(SB),NOSPLIT,$0 + MOVW $0, R0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R5 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + MOVW r+28(FP), R4 + ADD R5<<2, R1, R5 + B E8 + + // word loop +L8: + MOVW.P 4(R2), R6 + MULLU R6, R3, (R7, R6) + ADD.S R4, R6 + ADC R0, R7 + MOVW.P R6, 4(R1) + MOVW R7, R4 +E8: + TEQ R1, R5 + BNE L8 + + MOVW R4, c+32(FP) + RET + + +// func addMulVVW(z, x []Word, y Word) (c Word) +TEXT ·addMulVVW(SB),NOSPLIT,$0 + MOVW $0, R0 + MOVW z+0(FP), R1 + MOVW z_len+4(FP), R5 + MOVW x+12(FP), R2 + MOVW y+24(FP), R3 + ADD R5<<2, R1, R5 + MOVW $0, R4 + B E9 + + // word loop +L9: + MOVW.P 4(R2), R6 + MULLU R6, R3, (R7, R6) + ADD.S R4, R6 + ADC R0, R7 + MOVW 0(R1), R4 + ADD.S R4, R6 + ADC R0, R7 + MOVW.P R6, 4(R1) + MOVW R7, R4 +E9: + TEQ R1, R5 + BNE L9 + + MOVW R4, c+28(FP) + RET + + +// func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) +TEXT ·divWVW(SB),NOSPLIT,$0 + // ARM has no multiword division, so use portable code. + B ·divWVW_g(SB) + + +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB),NOSPLIT,$0 + // ARM has no multiword division, so use portable code. + B ·divWW_g(SB) + + +// func mulWW(x, y Word) (z1, z0 Word) +TEXT ·mulWW(SB),NOSPLIT,$0 + MOVW x+0(FP), R1 + MOVW y+4(FP), R2 + MULLU R1, R2, (R4, R3) + MOVW R4, z1+8(FP) + MOVW R3, z0+12(FP) + RET + +// func bitLen(x Word) (n int) +TEXT ·bitLen(SB),NOSPLIT,$0 + MOVW x+0(FP), R0 + CLZ R0, R0 + RSB $32, R0 + MOVW R0, n+4(FP) + RET diff --git a/src/math/big/arith_decl.go b/src/math/big/arith_decl.go new file mode 100644 index 000000000..068cc8d93 --- /dev/null +++ b/src/math/big/arith_decl.go @@ -0,0 +1,19 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +// implemented in arith_$GOARCH.s +func mulWW(x, y Word) (z1, z0 Word) +func divWW(x1, x0, y Word) (q, r Word) +func addVV(z, x, y []Word) (c Word) +func subVV(z, x, y []Word) (c Word) +func addVW(z, x []Word, y Word) (c Word) +func subVW(z, x []Word, y Word) (c Word) +func shlVU(z, x []Word, s uint) (c Word) +func shrVU(z, x []Word, s uint) (c Word) +func mulAddVWW(z, x []Word, y, r Word) (c Word) +func addMulVVW(z, x []Word, y Word) (c Word) +func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) +func bitLen(x Word) (n int) diff --git a/src/math/big/arith_test.go b/src/math/big/arith_test.go new file mode 100644 index 000000000..3615a659c --- /dev/null +++ b/src/math/big/arith_test.go @@ -0,0 +1,456 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "math/rand" + "testing" +) + +type funWW func(x, y, c Word) (z1, z0 Word) +type argWW struct { + x, y, c, z1, z0 Word +} + +var sumWW = []argWW{ + {0, 0, 0, 0, 0}, + {0, 1, 0, 0, 1}, + {0, 0, 1, 0, 1}, + {0, 1, 1, 0, 2}, + {12345, 67890, 0, 0, 80235}, + {12345, 67890, 1, 0, 80236}, + {_M, 1, 0, 1, 0}, + {_M, 0, 1, 1, 0}, + {_M, 1, 1, 1, 1}, + {_M, _M, 0, 1, _M - 1}, + {_M, _M, 1, 1, _M}, +} + +func testFunWW(t *testing.T, msg string, f funWW, a argWW) { + z1, z0 := f(a.x, a.y, a.c) + if z1 != a.z1 || z0 != a.z0 { + t.Errorf("%s%+v\n\tgot z1:z0 = %#x:%#x; want %#x:%#x", msg, a, z1, z0, a.z1, a.z0) + } +} + +func TestFunWW(t *testing.T) { + for _, a := range sumWW { + arg := a + testFunWW(t, "addWW_g", addWW_g, arg) + + arg = argWW{a.y, a.x, a.c, a.z1, a.z0} + testFunWW(t, "addWW_g symmetric", addWW_g, arg) + + arg = argWW{a.z0, a.x, a.c, a.z1, a.y} + testFunWW(t, "subWW_g", subWW_g, arg) + + arg = argWW{a.z0, a.y, a.c, a.z1, a.x} + testFunWW(t, "subWW_g symmetric", subWW_g, arg) + } +} + +type funVV func(z, x, y []Word) (c Word) +type argVV struct { + z, x, y nat + c Word +} + +var sumVV = []argVV{ + {}, + {nat{0}, nat{0}, nat{0}, 0}, + {nat{1}, nat{1}, nat{0}, 0}, + {nat{0}, nat{_M}, nat{1}, 1}, + {nat{80235}, nat{12345}, nat{67890}, 0}, + {nat{_M - 1}, nat{_M}, nat{_M}, 1}, + {nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, nat{1, 0, 0, 0}, 1}, + {nat{0, 0, 0, _M}, nat{_M, _M, _M, _M - 1}, nat{1, 0, 0, 0}, 0}, + {nat{0, 0, 0, 0}, nat{_M, 0, _M, 0}, nat{1, _M, 0, _M}, 1}, +} + +func testFunVV(t *testing.T, msg string, f funVV, a argVV) { + z := make(nat, len(a.z)) + c := f(z, a.x, a.y) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if c != a.c { + t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c) + } +} + +func TestFunVV(t *testing.T) { + for _, a := range sumVV { + arg := a + testFunVV(t, "addVV_g", addVV_g, arg) + testFunVV(t, "addVV", addVV, arg) + + arg = argVV{a.z, a.y, a.x, a.c} + testFunVV(t, "addVV_g symmetric", addVV_g, arg) + testFunVV(t, "addVV symmetric", addVV, arg) + + arg = argVV{a.x, a.z, a.y, a.c} + testFunVV(t, "subVV_g", subVV_g, arg) + testFunVV(t, "subVV", subVV, arg) + + arg = argVV{a.y, a.z, a.x, a.c} + testFunVV(t, "subVV_g symmetric", subVV_g, arg) + testFunVV(t, "subVV symmetric", subVV, arg) + } +} + +// Always the same seed for reproducible results. +var rnd = rand.New(rand.NewSource(0)) + +func rndW() Word { + return Word(rnd.Int63()<<1 | rnd.Int63n(2)) +} + +func rndV(n int) []Word { + v := make([]Word, n) + for i := range v { + v[i] = rndW() + } + return v +} + +func benchmarkFunVV(b *testing.B, f funVV, n int) { + x := rndV(n) + y := rndV(n) + z := make([]Word, n) + b.SetBytes(int64(n * _W)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + f(z, x, y) + } +} + +func BenchmarkAddVV_1(b *testing.B) { benchmarkFunVV(b, addVV, 1) } +func BenchmarkAddVV_2(b *testing.B) { benchmarkFunVV(b, addVV, 2) } +func BenchmarkAddVV_3(b *testing.B) { benchmarkFunVV(b, addVV, 3) } +func BenchmarkAddVV_4(b *testing.B) { benchmarkFunVV(b, addVV, 4) } +func BenchmarkAddVV_5(b *testing.B) { benchmarkFunVV(b, addVV, 5) } +func BenchmarkAddVV_1e1(b *testing.B) { benchmarkFunVV(b, addVV, 1e1) } +func BenchmarkAddVV_1e2(b *testing.B) { benchmarkFunVV(b, addVV, 1e2) } +func BenchmarkAddVV_1e3(b *testing.B) { benchmarkFunVV(b, addVV, 1e3) } +func BenchmarkAddVV_1e4(b *testing.B) { benchmarkFunVV(b, addVV, 1e4) } +func BenchmarkAddVV_1e5(b *testing.B) { benchmarkFunVV(b, addVV, 1e5) } + +type funVW func(z, x []Word, y Word) (c Word) +type argVW struct { + z, x nat + y Word + c Word +} + +var sumVW = []argVW{ + {}, + {nil, nil, 2, 2}, + {nat{0}, nat{0}, 0, 0}, + {nat{1}, nat{0}, 1, 0}, + {nat{1}, nat{1}, 0, 0}, + {nat{0}, nat{_M}, 1, 1}, + {nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, 1, 1}, +} + +var prodVW = []argVW{ + {}, + {nat{0}, nat{0}, 0, 0}, + {nat{0}, nat{_M}, 0, 0}, + {nat{0}, nat{0}, _M, 0}, + {nat{1}, nat{1}, 1, 0}, + {nat{22793}, nat{991}, 23, 0}, + {nat{0, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 0}, + {nat{0, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 0}, + {nat{0, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 0}, + {nat{_M << 1 & _M}, nat{_M}, 1 << 1, _M >> (_W - 1)}, + {nat{_M << 7 & _M}, nat{_M}, 1 << 7, _M >> (_W - 7)}, + {nat{_M << 7 & _M, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, _M >> (_W - 7)}, +} + +var lshVW = []argVW{ + {}, + {nat{0}, nat{0}, 0, 0}, + {nat{0}, nat{0}, 1, 0}, + {nat{0}, nat{0}, 20, 0}, + + {nat{_M}, nat{_M}, 0, 0}, + {nat{_M << 1 & _M}, nat{_M}, 1, 1}, + {nat{_M << 20 & _M}, nat{_M}, 20, _M >> (_W - 20)}, + + {nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0}, + {nat{_M << 1 & _M, _M, _M}, nat{_M, _M, _M}, 1, 1}, + {nat{_M << 20 & _M, _M, _M}, nat{_M, _M, _M}, 20, _M >> (_W - 20)}, +} + +var rshVW = []argVW{ + {}, + {nat{0}, nat{0}, 0, 0}, + {nat{0}, nat{0}, 1, 0}, + {nat{0}, nat{0}, 20, 0}, + + {nat{_M}, nat{_M}, 0, 0}, + {nat{_M >> 1}, nat{_M}, 1, _M << (_W - 1) & _M}, + {nat{_M >> 20}, nat{_M}, 20, _M << (_W - 20) & _M}, + + {nat{_M, _M, _M}, nat{_M, _M, _M}, 0, 0}, + {nat{_M, _M, _M >> 1}, nat{_M, _M, _M}, 1, _M << (_W - 1) & _M}, + {nat{_M, _M, _M >> 20}, nat{_M, _M, _M}, 20, _M << (_W - 20) & _M}, +} + +func testFunVW(t *testing.T, msg string, f funVW, a argVW) { + z := make(nat, len(a.z)) + c := f(z, a.x, a.y) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if c != a.c { + t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c) + } +} + +func makeFunVW(f func(z, x []Word, s uint) (c Word)) funVW { + return func(z, x []Word, s Word) (c Word) { + return f(z, x, uint(s)) + } +} + +func TestFunVW(t *testing.T) { + for _, a := range sumVW { + arg := a + testFunVW(t, "addVW_g", addVW_g, arg) + testFunVW(t, "addVW", addVW, arg) + + arg = argVW{a.x, a.z, a.y, a.c} + testFunVW(t, "subVW_g", subVW_g, arg) + testFunVW(t, "subVW", subVW, arg) + } + + shlVW_g := makeFunVW(shlVU_g) + shlVW := makeFunVW(shlVU) + for _, a := range lshVW { + arg := a + testFunVW(t, "shlVU_g", shlVW_g, arg) + testFunVW(t, "shlVU", shlVW, arg) + } + + shrVW_g := makeFunVW(shrVU_g) + shrVW := makeFunVW(shrVU) + for _, a := range rshVW { + arg := a + testFunVW(t, "shrVU_g", shrVW_g, arg) + testFunVW(t, "shrVU", shrVW, arg) + } +} + +func benchmarkFunVW(b *testing.B, f funVW, n int) { + x := rndV(n) + y := rndW() + z := make([]Word, n) + b.SetBytes(int64(n * _W)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + f(z, x, y) + } +} + +func BenchmarkAddVW_1(b *testing.B) { benchmarkFunVW(b, addVW, 1) } +func BenchmarkAddVW_2(b *testing.B) { benchmarkFunVW(b, addVW, 2) } +func BenchmarkAddVW_3(b *testing.B) { benchmarkFunVW(b, addVW, 3) } +func BenchmarkAddVW_4(b *testing.B) { benchmarkFunVW(b, addVW, 4) } +func BenchmarkAddVW_5(b *testing.B) { benchmarkFunVW(b, addVW, 5) } +func BenchmarkAddVW_1e1(b *testing.B) { benchmarkFunVW(b, addVW, 1e1) } +func BenchmarkAddVW_1e2(b *testing.B) { benchmarkFunVW(b, addVW, 1e2) } +func BenchmarkAddVW_1e3(b *testing.B) { benchmarkFunVW(b, addVW, 1e3) } +func BenchmarkAddVW_1e4(b *testing.B) { benchmarkFunVW(b, addVW, 1e4) } +func BenchmarkAddVW_1e5(b *testing.B) { benchmarkFunVW(b, addVW, 1e5) } + +type funVWW func(z, x []Word, y, r Word) (c Word) +type argVWW struct { + z, x nat + y, r Word + c Word +} + +var prodVWW = []argVWW{ + {}, + {nat{0}, nat{0}, 0, 0, 0}, + {nat{991}, nat{0}, 0, 991, 0}, + {nat{0}, nat{_M}, 0, 0, 0}, + {nat{991}, nat{_M}, 0, 991, 0}, + {nat{0}, nat{0}, _M, 0, 0}, + {nat{991}, nat{0}, _M, 991, 0}, + {nat{1}, nat{1}, 1, 0, 0}, + {nat{992}, nat{1}, 1, 991, 0}, + {nat{22793}, nat{991}, 23, 0, 0}, + {nat{22800}, nat{991}, 23, 7, 0}, + {nat{0, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 0, 0}, + {nat{7, 0, 0, 22793}, nat{0, 0, 0, 991}, 23, 7, 0}, + {nat{0, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 0, 0}, + {nat{991, 0, 0, 0}, nat{7893475, 7395495, 798547395, 68943}, 0, 991, 0}, + {nat{0, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 0, 0}, + {nat{991, 0, 0, 0}, nat{0, 0, 0, 0}, 894375984, 991, 0}, + {nat{_M << 1 & _M}, nat{_M}, 1 << 1, 0, _M >> (_W - 1)}, + {nat{_M<<1&_M + 1}, nat{_M}, 1 << 1, 1, _M >> (_W - 1)}, + {nat{_M << 7 & _M}, nat{_M}, 1 << 7, 0, _M >> (_W - 7)}, + {nat{_M<<7&_M + 1<<6}, nat{_M}, 1 << 7, 1 << 6, _M >> (_W - 7)}, + {nat{_M << 7 & _M, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, 0, _M >> (_W - 7)}, + {nat{_M<<7&_M + 1<<6, _M, _M, _M}, nat{_M, _M, _M, _M}, 1 << 7, 1 << 6, _M >> (_W - 7)}, +} + +func testFunVWW(t *testing.T, msg string, f funVWW, a argVWW) { + z := make(nat, len(a.z)) + c := f(z, a.x, a.y, a.r) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if c != a.c { + t.Errorf("%s%+v\n\tgot c = %#x; want %#x", msg, a, c, a.c) + } +} + +// TODO(gri) mulAddVWW and divWVW are symmetric operations but +// their signature is not symmetric. Try to unify. + +type funWVW func(z []Word, xn Word, x []Word, y Word) (r Word) +type argWVW struct { + z nat + xn Word + x nat + y Word + r Word +} + +func testFunWVW(t *testing.T, msg string, f funWVW, a argWVW) { + z := make(nat, len(a.z)) + r := f(z, a.xn, a.x, a.y) + for i, zi := range z { + if zi != a.z[i] { + t.Errorf("%s%+v\n\tgot z[%d] = %#x; want %#x", msg, a, i, zi, a.z[i]) + break + } + } + if r != a.r { + t.Errorf("%s%+v\n\tgot r = %#x; want %#x", msg, a, r, a.r) + } +} + +func TestFunVWW(t *testing.T) { + for _, a := range prodVWW { + arg := a + testFunVWW(t, "mulAddVWW_g", mulAddVWW_g, arg) + testFunVWW(t, "mulAddVWW", mulAddVWW, arg) + + if a.y != 0 && a.r < a.y { + arg := argWVW{a.x, a.c, a.z, a.y, a.r} + testFunWVW(t, "divWVW_g", divWVW_g, arg) + testFunWVW(t, "divWVW", divWVW, arg) + } + } +} + +var mulWWTests = []struct { + x, y Word + q, r Word +}{ + {_M, _M, _M - 1, 1}, + // 32 bit only: {0xc47dfa8c, 50911, 0x98a4, 0x998587f4}, +} + +func TestMulWW(t *testing.T) { + for i, test := range mulWWTests { + q, r := mulWW_g(test.x, test.y) + if q != test.q || r != test.r { + t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r) + } + } +} + +var mulAddWWWTests = []struct { + x, y, c Word + q, r Word +}{ + // TODO(agl): These will only work on 64-bit platforms. + // {15064310297182388543, 0xe7df04d2d35d5d80, 13537600649892366549, 13644450054494335067, 10832252001440893781}, + // {15064310297182388543, 0xdab2f18048baa68d, 13644450054494335067, 12869334219691522700, 14233854684711418382}, + {_M, _M, 0, _M - 1, 1}, + {_M, _M, _M, _M, 0}, +} + +func TestMulAddWWW(t *testing.T) { + for i, test := range mulAddWWWTests { + q, r := mulAddWWW_g(test.x, test.y, test.c) + if q != test.q || r != test.r { + t.Errorf("#%d got (%x, %x) want (%x, %x)", i, q, r, test.q, test.r) + } + } +} + +func benchmarkAddMulVVW(b *testing.B, n int) { + x := rndV(n) + y := rndW() + z := make([]Word, n) + b.SetBytes(int64(n * _W)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + addMulVVW(z, x, y) + } +} + +func BenchmarkAddMulVVW_1(b *testing.B) { benchmarkAddMulVVW(b, 1) } +func BenchmarkAddMulVVW_2(b *testing.B) { benchmarkAddMulVVW(b, 2) } +func BenchmarkAddMulVVW_3(b *testing.B) { benchmarkAddMulVVW(b, 3) } +func BenchmarkAddMulVVW_4(b *testing.B) { benchmarkAddMulVVW(b, 4) } +func BenchmarkAddMulVVW_5(b *testing.B) { benchmarkAddMulVVW(b, 5) } +func BenchmarkAddMulVVW_1e1(b *testing.B) { benchmarkAddMulVVW(b, 1e1) } +func BenchmarkAddMulVVW_1e2(b *testing.B) { benchmarkAddMulVVW(b, 1e2) } +func BenchmarkAddMulVVW_1e3(b *testing.B) { benchmarkAddMulVVW(b, 1e3) } +func BenchmarkAddMulVVW_1e4(b *testing.B) { benchmarkAddMulVVW(b, 1e4) } +func BenchmarkAddMulVVW_1e5(b *testing.B) { benchmarkAddMulVVW(b, 1e5) } + +func testWordBitLen(t *testing.T, fname string, f func(Word) int) { + for i := 0; i <= _W; i++ { + x := Word(1) << uint(i-1) // i == 0 => x == 0 + n := f(x) + if n != i { + t.Errorf("got %d; want %d for %s(%#x)", n, i, fname, x) + } + } +} + +func TestWordBitLen(t *testing.T) { + testWordBitLen(t, "bitLen", bitLen) + testWordBitLen(t, "bitLen_g", bitLen_g) +} + +// runs b.N iterations of bitLen called on a Word containing (1 << nbits)-1. +func benchmarkBitLenN(b *testing.B, nbits uint) { + testword := Word((uint64(1) << nbits) - 1) + for i := 0; i < b.N; i++ { + bitLen(testword) + } +} + +// Individual bitLen tests. Numbers chosen to examine both sides +// of powers-of-two boundaries. +func BenchmarkBitLen0(b *testing.B) { benchmarkBitLenN(b, 0) } +func BenchmarkBitLen1(b *testing.B) { benchmarkBitLenN(b, 1) } +func BenchmarkBitLen2(b *testing.B) { benchmarkBitLenN(b, 2) } +func BenchmarkBitLen3(b *testing.B) { benchmarkBitLenN(b, 3) } +func BenchmarkBitLen4(b *testing.B) { benchmarkBitLenN(b, 4) } +func BenchmarkBitLen5(b *testing.B) { benchmarkBitLenN(b, 5) } +func BenchmarkBitLen8(b *testing.B) { benchmarkBitLenN(b, 8) } +func BenchmarkBitLen9(b *testing.B) { benchmarkBitLenN(b, 9) } +func BenchmarkBitLen16(b *testing.B) { benchmarkBitLenN(b, 16) } +func BenchmarkBitLen17(b *testing.B) { benchmarkBitLenN(b, 17) } +func BenchmarkBitLen31(b *testing.B) { benchmarkBitLenN(b, 31) } diff --git a/src/math/big/calibrate_test.go b/src/math/big/calibrate_test.go new file mode 100644 index 000000000..f69ffbf5c --- /dev/null +++ b/src/math/big/calibrate_test.go @@ -0,0 +1,88 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file prints execution times for the Mul benchmark +// given different Karatsuba thresholds. The result may be +// used to manually fine-tune the threshold constant. The +// results are somewhat fragile; use repeated runs to get +// a clear picture. + +// Usage: go test -run=TestCalibrate -calibrate + +package big + +import ( + "flag" + "fmt" + "testing" + "time" +) + +var calibrate = flag.Bool("calibrate", false, "run calibration test") + +func karatsubaLoad(b *testing.B) { + BenchmarkMul(b) +} + +// measureKaratsuba returns the time to run a Karatsuba-relevant benchmark +// given Karatsuba threshold th. +func measureKaratsuba(th int) time.Duration { + th, karatsubaThreshold = karatsubaThreshold, th + res := testing.Benchmark(karatsubaLoad) + karatsubaThreshold = th + return time.Duration(res.NsPerOp()) +} + +func computeThresholds() { + fmt.Printf("Multiplication times for varying Karatsuba thresholds\n") + fmt.Printf("(run repeatedly for good results)\n") + + // determine Tk, the work load execution time using basic multiplication + Tb := measureKaratsuba(1e9) // th == 1e9 => Karatsuba multiplication disabled + fmt.Printf("Tb = %10s\n", Tb) + + // thresholds + th := 4 + th1 := -1 + th2 := -1 + + var deltaOld time.Duration + for count := -1; count != 0 && th < 128; count-- { + // determine Tk, the work load execution time using Karatsuba multiplication + Tk := measureKaratsuba(th) + + // improvement over Tb + delta := (Tb - Tk) * 100 / Tb + + fmt.Printf("th = %3d Tk = %10s %4d%%", th, Tk, delta) + + // determine break-even point + if Tk < Tb && th1 < 0 { + th1 = th + fmt.Print(" break-even point") + } + + // determine diminishing return + if 0 < delta && delta < deltaOld && th2 < 0 { + th2 = th + fmt.Print(" diminishing return") + } + deltaOld = delta + + fmt.Println() + + // trigger counter + if th1 >= 0 && th2 >= 0 && count < 0 { + count = 10 // this many extra measurements after we got both thresholds + } + + th++ + } +} + +func TestCalibrate(t *testing.T) { + if *calibrate { + computeThresholds() + } +} diff --git a/src/math/big/example_test.go b/src/math/big/example_test.go new file mode 100644 index 000000000..078be47f9 --- /dev/null +++ b/src/math/big/example_test.go @@ -0,0 +1,51 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big_test + +import ( + "fmt" + "log" + "math/big" +) + +func ExampleRat_SetString() { + r := new(big.Rat) + r.SetString("355/113") + fmt.Println(r.FloatString(3)) + // Output: 3.142 +} + +func ExampleInt_SetString() { + i := new(big.Int) + i.SetString("644", 8) // octal + fmt.Println(i) + // Output: 420 +} + +func ExampleRat_Scan() { + // The Scan function is rarely used directly; + // the fmt package recognizes it as an implementation of fmt.Scanner. + r := new(big.Rat) + _, err := fmt.Sscan("1.5000", r) + if err != nil { + log.Println("error scanning value:", err) + } else { + fmt.Println(r) + } + // Output: 3/2 +} + +func ExampleInt_Scan() { + // The Scan function is rarely used directly; + // the fmt package recognizes it as an implementation of fmt.Scanner. + i := new(big.Int) + _, err := fmt.Sscan("18446744073709551617", i) + if err != nil { + log.Println("error scanning value:", err) + } else { + fmt.Println(i) + } + // Output: 18446744073709551617 +} diff --git a/src/math/big/gcd_test.go b/src/math/big/gcd_test.go new file mode 100644 index 000000000..c0b9f5830 --- /dev/null +++ b/src/math/big/gcd_test.go @@ -0,0 +1,47 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a GCD benchmark. +// Usage: go test math/big -test.bench GCD + +package big + +import ( + "math/rand" + "testing" +) + +// randInt returns a pseudo-random Int in the range [1<<(size-1), (1<<size) - 1] +func randInt(r *rand.Rand, size uint) *Int { + n := new(Int).Lsh(intOne, size-1) + x := new(Int).Rand(r, n) + return x.Add(x, n) // make sure result > 1<<(size-1) +} + +func runGCD(b *testing.B, aSize, bSize uint) { + b.StopTimer() + var r = rand.New(rand.NewSource(1234)) + aa := randInt(r, aSize) + bb := randInt(r, bSize) + b.StartTimer() + for i := 0; i < b.N; i++ { + new(Int).GCD(nil, nil, aa, bb) + } +} + +func BenchmarkGCD10x10(b *testing.B) { runGCD(b, 10, 10) } +func BenchmarkGCD10x100(b *testing.B) { runGCD(b, 10, 100) } +func BenchmarkGCD10x1000(b *testing.B) { runGCD(b, 10, 1000) } +func BenchmarkGCD10x10000(b *testing.B) { runGCD(b, 10, 10000) } +func BenchmarkGCD10x100000(b *testing.B) { runGCD(b, 10, 100000) } +func BenchmarkGCD100x100(b *testing.B) { runGCD(b, 100, 100) } +func BenchmarkGCD100x1000(b *testing.B) { runGCD(b, 100, 1000) } +func BenchmarkGCD100x10000(b *testing.B) { runGCD(b, 100, 10000) } +func BenchmarkGCD100x100000(b *testing.B) { runGCD(b, 100, 100000) } +func BenchmarkGCD1000x1000(b *testing.B) { runGCD(b, 1000, 1000) } +func BenchmarkGCD1000x10000(b *testing.B) { runGCD(b, 1000, 10000) } +func BenchmarkGCD1000x100000(b *testing.B) { runGCD(b, 1000, 100000) } +func BenchmarkGCD10000x10000(b *testing.B) { runGCD(b, 10000, 10000) } +func BenchmarkGCD10000x100000(b *testing.B) { runGCD(b, 10000, 100000) } +func BenchmarkGCD100000x100000(b *testing.B) { runGCD(b, 100000, 100000) } diff --git a/src/math/big/hilbert_test.go b/src/math/big/hilbert_test.go new file mode 100644 index 000000000..1a84341b3 --- /dev/null +++ b/src/math/big/hilbert_test.go @@ -0,0 +1,160 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A little test program and benchmark for rational arithmetics. +// Computes a Hilbert matrix, its inverse, multiplies them +// and verifies that the product is the identity matrix. + +package big + +import ( + "fmt" + "testing" +) + +type matrix struct { + n, m int + a []*Rat +} + +func (a *matrix) at(i, j int) *Rat { + if !(0 <= i && i < a.n && 0 <= j && j < a.m) { + panic("index out of range") + } + return a.a[i*a.m+j] +} + +func (a *matrix) set(i, j int, x *Rat) { + if !(0 <= i && i < a.n && 0 <= j && j < a.m) { + panic("index out of range") + } + a.a[i*a.m+j] = x +} + +func newMatrix(n, m int) *matrix { + if !(0 <= n && 0 <= m) { + panic("illegal matrix") + } + a := new(matrix) + a.n = n + a.m = m + a.a = make([]*Rat, n*m) + return a +} + +func newUnit(n int) *matrix { + a := newMatrix(n, n) + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + x := NewRat(0, 1) + if i == j { + x.SetInt64(1) + } + a.set(i, j, x) + } + } + return a +} + +func newHilbert(n int) *matrix { + a := newMatrix(n, n) + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + a.set(i, j, NewRat(1, int64(i+j+1))) + } + } + return a +} + +func newInverseHilbert(n int) *matrix { + a := newMatrix(n, n) + for i := 0; i < n; i++ { + for j := 0; j < n; j++ { + x1 := new(Rat).SetInt64(int64(i + j + 1)) + x2 := new(Rat).SetInt(new(Int).Binomial(int64(n+i), int64(n-j-1))) + x3 := new(Rat).SetInt(new(Int).Binomial(int64(n+j), int64(n-i-1))) + x4 := new(Rat).SetInt(new(Int).Binomial(int64(i+j), int64(i))) + + x1.Mul(x1, x2) + x1.Mul(x1, x3) + x1.Mul(x1, x4) + x1.Mul(x1, x4) + + if (i+j)&1 != 0 { + x1.Neg(x1) + } + + a.set(i, j, x1) + } + } + return a +} + +func (a *matrix) mul(b *matrix) *matrix { + if a.m != b.n { + panic("illegal matrix multiply") + } + c := newMatrix(a.n, b.m) + for i := 0; i < c.n; i++ { + for j := 0; j < c.m; j++ { + x := NewRat(0, 1) + for k := 0; k < a.m; k++ { + x.Add(x, new(Rat).Mul(a.at(i, k), b.at(k, j))) + } + c.set(i, j, x) + } + } + return c +} + +func (a *matrix) eql(b *matrix) bool { + if a.n != b.n || a.m != b.m { + return false + } + for i := 0; i < a.n; i++ { + for j := 0; j < a.m; j++ { + if a.at(i, j).Cmp(b.at(i, j)) != 0 { + return false + } + } + } + return true +} + +func (a *matrix) String() string { + s := "" + for i := 0; i < a.n; i++ { + for j := 0; j < a.m; j++ { + s += fmt.Sprintf("\t%s", a.at(i, j)) + } + s += "\n" + } + return s +} + +func doHilbert(t *testing.T, n int) { + a := newHilbert(n) + b := newInverseHilbert(n) + I := newUnit(n) + ab := a.mul(b) + if !ab.eql(I) { + if t == nil { + panic("Hilbert failed") + } + t.Errorf("a = %s\n", a) + t.Errorf("b = %s\n", b) + t.Errorf("a*b = %s\n", ab) + t.Errorf("I = %s\n", I) + } +} + +func TestHilbert(t *testing.T) { + doHilbert(t, 10) +} + +func BenchmarkHilbert(b *testing.B) { + for i := 0; i < b.N; i++ { + doHilbert(nil, 10) + } +} diff --git a/src/math/big/int.go b/src/math/big/int.go new file mode 100644 index 000000000..d22e39e7c --- /dev/null +++ b/src/math/big/int.go @@ -0,0 +1,1031 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements signed multi-precision integers. + +package big + +import ( + "errors" + "fmt" + "io" + "math/rand" + "strings" +) + +// An Int represents a signed multi-precision integer. +// The zero value for an Int represents the value 0. +type Int struct { + neg bool // sign + abs nat // absolute value of the integer +} + +var intOne = &Int{false, natOne} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x == 0 +// +1 if x > 0 +// +func (x *Int) Sign() int { + if len(x.abs) == 0 { + return 0 + } + if x.neg { + return -1 + } + return 1 +} + +// SetInt64 sets z to x and returns z. +func (z *Int) SetInt64(x int64) *Int { + neg := false + if x < 0 { + neg = true + x = -x + } + z.abs = z.abs.setUint64(uint64(x)) + z.neg = neg + return z +} + +// SetUint64 sets z to x and returns z. +func (z *Int) SetUint64(x uint64) *Int { + z.abs = z.abs.setUint64(x) + z.neg = false + return z +} + +// NewInt allocates and returns a new Int set to x. +func NewInt(x int64) *Int { + return new(Int).SetInt64(x) +} + +// Set sets z to x and returns z. +func (z *Int) Set(x *Int) *Int { + if z != x { + z.abs = z.abs.set(x.abs) + z.neg = x.neg + } + return z +} + +// Bits provides raw (unchecked but fast) access to x by returning its +// absolute value as a little-endian Word slice. The result and x share +// the same underlying array. +// Bits is intended to support implementation of missing low-level Int +// functionality outside this package; it should be avoided otherwise. +func (x *Int) Bits() []Word { + return x.abs +} + +// SetBits provides raw (unchecked but fast) access to z by setting its +// value to abs, interpreted as a little-endian Word slice, and returning +// z. The result and abs share the same underlying array. +// SetBits is intended to support implementation of missing low-level Int +// functionality outside this package; it should be avoided otherwise. +func (z *Int) SetBits(abs []Word) *Int { + z.abs = nat(abs).norm() + z.neg = false + return z +} + +// Abs sets z to |x| (the absolute value of x) and returns z. +func (z *Int) Abs(x *Int) *Int { + z.Set(x) + z.neg = false + return z +} + +// Neg sets z to -x and returns z. +func (z *Int) Neg(x *Int) *Int { + z.Set(x) + z.neg = len(z.abs) > 0 && !z.neg // 0 has no sign + return z +} + +// Add sets z to the sum x+y and returns z. +func (z *Int) Add(x, y *Int) *Int { + neg := x.neg + if x.neg == y.neg { + // x + y == x + y + // (-x) + (-y) == -(x + y) + z.abs = z.abs.add(x.abs, y.abs) + } else { + // x + (-y) == x - y == -(y - x) + // (-x) + y == y - x == -(x - y) + if x.abs.cmp(y.abs) >= 0 { + z.abs = z.abs.sub(x.abs, y.abs) + } else { + neg = !neg + z.abs = z.abs.sub(y.abs, x.abs) + } + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + return z +} + +// Sub sets z to the difference x-y and returns z. +func (z *Int) Sub(x, y *Int) *Int { + neg := x.neg + if x.neg != y.neg { + // x - (-y) == x + y + // (-x) - y == -(x + y) + z.abs = z.abs.add(x.abs, y.abs) + } else { + // x - y == x - y == -(y - x) + // (-x) - (-y) == y - x == -(x - y) + if x.abs.cmp(y.abs) >= 0 { + z.abs = z.abs.sub(x.abs, y.abs) + } else { + neg = !neg + z.abs = z.abs.sub(y.abs, x.abs) + } + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + return z +} + +// Mul sets z to the product x*y and returns z. +func (z *Int) Mul(x, y *Int) *Int { + // x * y == x * y + // x * (-y) == -(x * y) + // (-x) * y == -(x * y) + // (-x) * (-y) == x * y + z.abs = z.abs.mul(x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign + return z +} + +// MulRange sets z to the product of all integers +// in the range [a, b] inclusively and returns z. +// If a > b (empty range), the result is 1. +func (z *Int) MulRange(a, b int64) *Int { + switch { + case a > b: + return z.SetInt64(1) // empty range + case a <= 0 && b >= 0: + return z.SetInt64(0) // range includes 0 + } + // a <= b && (b < 0 || a > 0) + + neg := false + if a < 0 { + neg = (b-a)&1 == 0 + a, b = -b, -a + } + + z.abs = z.abs.mulRange(uint64(a), uint64(b)) + z.neg = neg + return z +} + +// Binomial sets z to the binomial coefficient of (n, k) and returns z. +func (z *Int) Binomial(n, k int64) *Int { + var a, b Int + a.MulRange(n-k+1, n) + b.MulRange(1, k) + return z.Quo(&a, &b) +} + +// Quo sets z to the quotient x/y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Quo implements truncated division (like Go); see QuoRem for more details. +func (z *Int) Quo(x, y *Int) *Int { + z.abs, _ = z.abs.div(nil, x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg != y.neg // 0 has no sign + return z +} + +// Rem sets z to the remainder x%y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Rem implements truncated modulus (like Go); see QuoRem for more details. +func (z *Int) Rem(x, y *Int) *Int { + _, z.abs = nat(nil).div(z.abs, x.abs, y.abs) + z.neg = len(z.abs) > 0 && x.neg // 0 has no sign + return z +} + +// QuoRem sets z to the quotient x/y and r to the remainder x%y +// and returns the pair (z, r) for y != 0. +// If y == 0, a division-by-zero run-time panic occurs. +// +// QuoRem implements T-division and modulus (like Go): +// +// q = x/y with the result truncated to zero +// r = x - y*q +// +// (See Daan Leijen, ``Division and Modulus for Computer Scientists''.) +// See DivMod for Euclidean division and modulus (unlike Go). +// +func (z *Int) QuoRem(x, y, r *Int) (*Int, *Int) { + z.abs, r.abs = z.abs.div(r.abs, x.abs, y.abs) + z.neg, r.neg = len(z.abs) > 0 && x.neg != y.neg, len(r.abs) > 0 && x.neg // 0 has no sign + return z, r +} + +// Div sets z to the quotient x/y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Div implements Euclidean division (unlike Go); see DivMod for more details. +func (z *Int) Div(x, y *Int) *Int { + y_neg := y.neg // z may be an alias for y + var r Int + z.QuoRem(x, y, &r) + if r.neg { + if y_neg { + z.Add(z, intOne) + } else { + z.Sub(z, intOne) + } + } + return z +} + +// Mod sets z to the modulus x%y for y != 0 and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +// Mod implements Euclidean modulus (unlike Go); see DivMod for more details. +func (z *Int) Mod(x, y *Int) *Int { + y0 := y // save y + if z == y || alias(z.abs, y.abs) { + y0 = new(Int).Set(y) + } + var q Int + q.QuoRem(x, y, z) + if z.neg { + if y0.neg { + z.Sub(z, y0) + } else { + z.Add(z, y0) + } + } + return z +} + +// DivMod sets z to the quotient x div y and m to the modulus x mod y +// and returns the pair (z, m) for y != 0. +// If y == 0, a division-by-zero run-time panic occurs. +// +// DivMod implements Euclidean division and modulus (unlike Go): +// +// q = x div y such that +// m = x - y*q with 0 <= m < |q| +// +// (See Raymond T. Boute, ``The Euclidean definition of the functions +// div and mod''. ACM Transactions on Programming Languages and +// Systems (TOPLAS), 14(2):127-144, New York, NY, USA, 4/1992. +// ACM press.) +// See QuoRem for T-division and modulus (like Go). +// +func (z *Int) DivMod(x, y, m *Int) (*Int, *Int) { + y0 := y // save y + if z == y || alias(z.abs, y.abs) { + y0 = new(Int).Set(y) + } + z.QuoRem(x, y, m) + if m.neg { + if y0.neg { + z.Add(z, intOne) + m.Sub(m, y0) + } else { + z.Sub(z, intOne) + m.Add(m, y0) + } + } + return z, m +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func (x *Int) Cmp(y *Int) (r int) { + // x cmp y == x cmp y + // x cmp (-y) == x + // (-x) cmp y == y + // (-x) cmp (-y) == -(x cmp y) + switch { + case x.neg == y.neg: + r = x.abs.cmp(y.abs) + if x.neg { + r = -r + } + case x.neg: + r = -1 + default: + r = 1 + } + return +} + +func (x *Int) String() string { + switch { + case x == nil: + return "<nil>" + case x.neg: + return "-" + x.abs.decimalString() + } + return x.abs.decimalString() +} + +func charset(ch rune) string { + switch ch { + case 'b': + return lowercaseDigits[0:2] + case 'o': + return lowercaseDigits[0:8] + case 'd', 's', 'v': + return lowercaseDigits[0:10] + case 'x': + return lowercaseDigits[0:16] + case 'X': + return uppercaseDigits[0:16] + } + return "" // unknown format +} + +// write count copies of text to s +func writeMultiple(s fmt.State, text string, count int) { + if len(text) > 0 { + b := []byte(text) + for ; count > 0; count-- { + s.Write(b) + } + } +} + +// Format is a support routine for fmt.Formatter. It accepts +// the formats 'b' (binary), 'o' (octal), 'd' (decimal), 'x' +// (lowercase hexadecimal), and 'X' (uppercase hexadecimal). +// Also supported are the full suite of package fmt's format +// verbs for integral types, including '+', '-', and ' ' +// for sign control, '#' for leading zero in octal and for +// hexadecimal, a leading "0x" or "0X" for "%#x" and "%#X" +// respectively, specification of minimum digits precision, +// output field width, space or zero padding, and left or +// right justification. +// +func (x *Int) Format(s fmt.State, ch rune) { + cs := charset(ch) + + // special cases + switch { + case cs == "": + // unknown format + fmt.Fprintf(s, "%%!%c(big.Int=%s)", ch, x.String()) + return + case x == nil: + fmt.Fprint(s, "<nil>") + return + } + + // determine sign character + sign := "" + switch { + case x.neg: + sign = "-" + case s.Flag('+'): // supersedes ' ' when both specified + sign = "+" + case s.Flag(' '): + sign = " " + } + + // determine prefix characters for indicating output base + prefix := "" + if s.Flag('#') { + switch ch { + case 'o': // octal + prefix = "0" + case 'x': // hexadecimal + prefix = "0x" + case 'X': + prefix = "0X" + } + } + + // determine digits with base set by len(cs) and digit characters from cs + digits := x.abs.string(cs) + + // number of characters for the three classes of number padding + var left int // space characters to left of digits for right justification ("%8d") + var zeroes int // zero characters (actually cs[0]) as left-most digits ("%.8d") + var right int // space characters to right of digits for left justification ("%-8d") + + // determine number padding from precision: the least number of digits to output + precision, precisionSet := s.Precision() + if precisionSet { + switch { + case len(digits) < precision: + zeroes = precision - len(digits) // count of zero padding + case digits == "0" && precision == 0: + return // print nothing if zero value (x == 0) and zero precision ("." or ".0") + } + } + + // determine field pad from width: the least number of characters to output + length := len(sign) + len(prefix) + zeroes + len(digits) + if width, widthSet := s.Width(); widthSet && length < width { // pad as specified + switch d := width - length; { + case s.Flag('-'): + // pad on the right with spaces; supersedes '0' when both specified + right = d + case s.Flag('0') && !precisionSet: + // pad with zeroes unless precision also specified + zeroes = d + default: + // pad on the left with spaces + left = d + } + } + + // print number as [left pad][sign][prefix][zero pad][digits][right pad] + writeMultiple(s, " ", left) + writeMultiple(s, sign, 1) + writeMultiple(s, prefix, 1) + writeMultiple(s, "0", zeroes) + writeMultiple(s, digits, 1) + writeMultiple(s, " ", right) +} + +// scan sets z to the integer value corresponding to the longest possible prefix +// read from r representing a signed integer number in a given conversion base. +// It returns z, the actual conversion base used, and an error, if any. In the +// error case, the value of z is undefined but the returned value is nil. The +// syntax follows the syntax of integer literals in Go. +// +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a +// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. +// +func (z *Int) scan(r io.RuneScanner, base int) (*Int, int, error) { + // determine sign + ch, _, err := r.ReadRune() + if err != nil { + return nil, 0, err + } + neg := false + switch ch { + case '-': + neg = true + case '+': // nothing to do + default: + r.UnreadRune() + } + + // determine mantissa + z.abs, base, err = z.abs.scan(r, base) + if err != nil { + return nil, base, err + } + z.neg = len(z.abs) > 0 && neg // 0 has no sign + + return z, base, nil +} + +// Scan is a support routine for fmt.Scanner; it sets z to the value of +// the scanned number. It accepts the formats 'b' (binary), 'o' (octal), +// 'd' (decimal), 'x' (lowercase hexadecimal), and 'X' (uppercase hexadecimal). +func (z *Int) Scan(s fmt.ScanState, ch rune) error { + s.SkipSpace() // skip leading space characters + base := 0 + switch ch { + case 'b': + base = 2 + case 'o': + base = 8 + case 'd': + base = 10 + case 'x', 'X': + base = 16 + case 's', 'v': + // let scan determine the base + default: + return errors.New("Int.Scan: invalid verb") + } + _, _, err := z.scan(s, base) + return err +} + +// low32 returns the least significant 32 bits of z. +func low32(z nat) uint32 { + if len(z) == 0 { + return 0 + } + return uint32(z[0]) +} + +// low64 returns the least significant 64 bits of z. +func low64(z nat) uint64 { + if len(z) == 0 { + return 0 + } + v := uint64(z[0]) + if _W == 32 && len(z) > 1 { + v |= uint64(z[1]) << 32 + } + return v +} + +// Int64 returns the int64 representation of x. +// If x cannot be represented in an int64, the result is undefined. +func (x *Int) Int64() int64 { + v := int64(low64(x.abs)) + if x.neg { + v = -v + } + return v +} + +// Uint64 returns the uint64 representation of x. +// If x cannot be represented in a uint64, the result is undefined. +func (x *Int) Uint64() uint64 { + return low64(x.abs) +} + +// SetString sets z to the value of s, interpreted in the given base, +// and returns z and a boolean indicating success. If SetString fails, +// the value of z is undefined but the returned value is nil. +// +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a +// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. +// +func (z *Int) SetString(s string, base int) (*Int, bool) { + r := strings.NewReader(s) + _, _, err := z.scan(r, base) + if err != nil { + return nil, false + } + _, _, err = r.ReadRune() + if err != io.EOF { + return nil, false + } + return z, true // err == io.EOF => scan consumed all of s +} + +// SetBytes interprets buf as the bytes of a big-endian unsigned +// integer, sets z to that value, and returns z. +func (z *Int) SetBytes(buf []byte) *Int { + z.abs = z.abs.setBytes(buf) + z.neg = false + return z +} + +// Bytes returns the absolute value of x as a big-endian byte slice. +func (x *Int) Bytes() []byte { + buf := make([]byte, len(x.abs)*_S) + return buf[x.abs.bytes(buf):] +} + +// BitLen returns the length of the absolute value of x in bits. +// The bit length of 0 is 0. +func (x *Int) BitLen() int { + return x.abs.bitLen() +} + +// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z. +// If y <= 0, the result is 1 mod |m|; if m == nil or m == 0, z = x**y. +// See Knuth, volume 2, section 4.6.3. +func (z *Int) Exp(x, y, m *Int) *Int { + var yWords nat + if !y.neg { + yWords = y.abs + } + // y >= 0 + + var mWords nat + if m != nil { + mWords = m.abs // m.abs may be nil for m == 0 + } + + z.abs = z.abs.expNN(x.abs, yWords, mWords) + z.neg = len(z.abs) > 0 && x.neg && len(yWords) > 0 && yWords[0]&1 == 1 // 0 has no sign + if z.neg && len(mWords) > 0 { + // make modulus result positive + z.abs = z.abs.sub(mWords, z.abs) // z == x**y mod |m| && 0 <= z < |m| + z.neg = false + } + + return z +} + +// GCD sets z to the greatest common divisor of a and b, which both must +// be > 0, and returns z. +// If x and y are not nil, GCD sets x and y such that z = a*x + b*y. +// If either a or b is <= 0, GCD sets z = x = y = 0. +func (z *Int) GCD(x, y, a, b *Int) *Int { + if a.Sign() <= 0 || b.Sign() <= 0 { + z.SetInt64(0) + if x != nil { + x.SetInt64(0) + } + if y != nil { + y.SetInt64(0) + } + return z + } + if x == nil && y == nil { + return z.binaryGCD(a, b) + } + + A := new(Int).Set(a) + B := new(Int).Set(b) + + X := new(Int) + Y := new(Int).SetInt64(1) + + lastX := new(Int).SetInt64(1) + lastY := new(Int) + + q := new(Int) + temp := new(Int) + + for len(B.abs) > 0 { + r := new(Int) + q, r = q.QuoRem(A, B, r) + + A, B = B, r + + temp.Set(X) + X.Mul(X, q) + X.neg = !X.neg + X.Add(X, lastX) + lastX.Set(temp) + + temp.Set(Y) + Y.Mul(Y, q) + Y.neg = !Y.neg + Y.Add(Y, lastY) + lastY.Set(temp) + } + + if x != nil { + *x = *lastX + } + + if y != nil { + *y = *lastY + } + + *z = *A + return z +} + +// binaryGCD sets z to the greatest common divisor of a and b, which both must +// be > 0, and returns z. +// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm B. +func (z *Int) binaryGCD(a, b *Int) *Int { + u := z + v := new(Int) + + // use one Euclidean iteration to ensure that u and v are approx. the same size + switch { + case len(a.abs) > len(b.abs): + u.Set(b) + v.Rem(a, b) + case len(a.abs) < len(b.abs): + u.Set(a) + v.Rem(b, a) + default: + u.Set(a) + v.Set(b) + } + + // v might be 0 now + if len(v.abs) == 0 { + return u + } + // u > 0 && v > 0 + + // determine largest k such that u = u' << k, v = v' << k + k := u.abs.trailingZeroBits() + if vk := v.abs.trailingZeroBits(); vk < k { + k = vk + } + u.Rsh(u, k) + v.Rsh(v, k) + + // determine t (we know that u > 0) + t := new(Int) + if u.abs[0]&1 != 0 { + // u is odd + t.Neg(v) + } else { + t.Set(u) + } + + for len(t.abs) > 0 { + // reduce t + t.Rsh(t, t.abs.trailingZeroBits()) + if t.neg { + v, t = t, v + v.neg = len(v.abs) > 0 && !v.neg // 0 has no sign + } else { + u, t = t, u + } + t.Sub(u, v) + } + + return z.Lsh(u, k) +} + +// ProbablyPrime performs n Miller-Rabin tests to check whether x is prime. +// If it returns true, x is prime with probability 1 - 1/4^n. +// If it returns false, x is not prime. +func (x *Int) ProbablyPrime(n int) bool { + return !x.neg && x.abs.probablyPrime(n) +} + +// Rand sets z to a pseudo-random number in [0, n) and returns z. +func (z *Int) Rand(rnd *rand.Rand, n *Int) *Int { + z.neg = false + if n.neg == true || len(n.abs) == 0 { + z.abs = nil + return z + } + z.abs = z.abs.random(rnd, n.abs, n.abs.bitLen()) + return z +} + +// ModInverse sets z to the multiplicative inverse of g in the ring ℤ/nℤ +// and returns z. If g and n are not relatively prime, the result is undefined. +func (z *Int) ModInverse(g, n *Int) *Int { + var d Int + d.GCD(z, nil, g, n) + // x and y are such that g*x + n*y = d. Since g and n are + // relatively prime, d = 1. Taking that modulo n results in + // g*x = 1, therefore x is the inverse element. + if z.neg { + z.Add(z, n) + } + return z +} + +// Lsh sets z = x << n and returns z. +func (z *Int) Lsh(x *Int, n uint) *Int { + z.abs = z.abs.shl(x.abs, n) + z.neg = x.neg + return z +} + +// Rsh sets z = x >> n and returns z. +func (z *Int) Rsh(x *Int, n uint) *Int { + if x.neg { + // (-x) >> s == ^(x-1) >> s == ^((x-1) >> s) == -(((x-1) >> s) + 1) + t := z.abs.sub(x.abs, natOne) // no underflow because |x| > 0 + t = t.shr(t, n) + z.abs = t.add(t, natOne) + z.neg = true // z cannot be zero if x is negative + return z + } + + z.abs = z.abs.shr(x.abs, n) + z.neg = false + return z +} + +// Bit returns the value of the i'th bit of x. That is, it +// returns (x>>i)&1. The bit index i must be >= 0. +func (x *Int) Bit(i int) uint { + if i == 0 { + // optimization for common case: odd/even test of x + if len(x.abs) > 0 { + return uint(x.abs[0] & 1) // bit 0 is same for -x + } + return 0 + } + if i < 0 { + panic("negative bit index") + } + if x.neg { + t := nat(nil).sub(x.abs, natOne) + return t.bit(uint(i)) ^ 1 + } + + return x.abs.bit(uint(i)) +} + +// SetBit sets z to x, with x's i'th bit set to b (0 or 1). +// That is, if b is 1 SetBit sets z = x | (1 << i); +// if b is 0 SetBit sets z = x &^ (1 << i). If b is not 0 or 1, +// SetBit will panic. +func (z *Int) SetBit(x *Int, i int, b uint) *Int { + if i < 0 { + panic("negative bit index") + } + if x.neg { + t := z.abs.sub(x.abs, natOne) + t = t.setBit(t, uint(i), b^1) + z.abs = t.add(t, natOne) + z.neg = len(z.abs) > 0 + return z + } + z.abs = z.abs.setBit(x.abs, uint(i), b) + z.neg = false + return z +} + +// And sets z = x & y and returns z. +func (z *Int) And(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) & (-y) == ^(x-1) & ^(y-1) == ^((x-1) | (y-1)) == -(((x-1) | (y-1)) + 1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.or(x1, y1), natOne) + z.neg = true // z cannot be zero if x and y are negative + return z + } + + // x & y == x & y + z.abs = z.abs.and(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // & is symmetric + } + + // x & (-y) == x & ^(y-1) == x &^ (y-1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.andNot(x.abs, y1) + z.neg = false + return z +} + +// AndNot sets z = x &^ y and returns z. +func (z *Int) AndNot(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) &^ (-y) == ^(x-1) &^ ^(y-1) == ^(x-1) & (y-1) == (y-1) &^ (x-1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.andNot(y1, x1) + z.neg = false + return z + } + + // x &^ y == x &^ y + z.abs = z.abs.andNot(x.abs, y.abs) + z.neg = false + return z + } + + if x.neg { + // (-x) &^ y == ^(x-1) &^ y == ^(x-1) & ^y == ^((x-1) | y) == -(((x-1) | y) + 1) + x1 := nat(nil).sub(x.abs, natOne) + z.abs = z.abs.add(z.abs.or(x1, y.abs), natOne) + z.neg = true // z cannot be zero if x is negative and y is positive + return z + } + + // x &^ (-y) == x &^ ^(y-1) == x & (y-1) + y1 := nat(nil).add(y.abs, natOne) + z.abs = z.abs.and(x.abs, y1) + z.neg = false + return z +} + +// Or sets z = x | y and returns z. +func (z *Int) Or(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) | (-y) == ^(x-1) | ^(y-1) == ^((x-1) & (y-1)) == -(((x-1) & (y-1)) + 1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.and(x1, y1), natOne) + z.neg = true // z cannot be zero if x and y are negative + return z + } + + // x | y == x | y + z.abs = z.abs.or(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // | is symmetric + } + + // x | (-y) == x | ^(y-1) == ^((y-1) &^ x) == -(^((y-1) &^ x) + 1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.andNot(y1, x.abs), natOne) + z.neg = true // z cannot be zero if one of x or y is negative + return z +} + +// Xor sets z = x ^ y and returns z. +func (z *Int) Xor(x, y *Int) *Int { + if x.neg == y.neg { + if x.neg { + // (-x) ^ (-y) == ^(x-1) ^ ^(y-1) == (x-1) ^ (y-1) + x1 := nat(nil).sub(x.abs, natOne) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.xor(x1, y1) + z.neg = false + return z + } + + // x ^ y == x ^ y + z.abs = z.abs.xor(x.abs, y.abs) + z.neg = false + return z + } + + // x.neg != y.neg + if x.neg { + x, y = y, x // ^ is symmetric + } + + // x ^ (-y) == x ^ ^(y-1) == ^(x ^ (y-1)) == -((x ^ (y-1)) + 1) + y1 := nat(nil).sub(y.abs, natOne) + z.abs = z.abs.add(z.abs.xor(x.abs, y1), natOne) + z.neg = true // z cannot be zero if only one of x or y is negative + return z +} + +// Not sets z = ^x and returns z. +func (z *Int) Not(x *Int) *Int { + if x.neg { + // ^(-x) == ^(^(x-1)) == x-1 + z.abs = z.abs.sub(x.abs, natOne) + z.neg = false + return z + } + + // ^x == -x-1 == -(x+1) + z.abs = z.abs.add(x.abs, natOne) + z.neg = true // z cannot be zero if x is positive + return z +} + +// Gob codec version. Permits backward-compatible changes to the encoding. +const intGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +func (x *Int) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + buf := make([]byte, 1+len(x.abs)*_S) // extra byte for version and sign bit + i := x.abs.bytes(buf) - 1 // i >= 0 + b := intGobVersion << 1 // make space for sign bit + if x.neg { + b |= 1 + } + buf[i] = b + return buf[i:], nil +} + +// GobDecode implements the gob.GobDecoder interface. +func (z *Int) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Int{} + return nil + } + b := buf[0] + if b>>1 != intGobVersion { + return errors.New(fmt.Sprintf("Int.GobDecode: encoding version %d not supported", b>>1)) + } + z.neg = b&1 != 0 + z.abs = z.abs.setBytes(buf[1:]) + return nil +} + +// MarshalJSON implements the json.Marshaler interface. +func (z *Int) MarshalJSON() ([]byte, error) { + // TODO(gri): get rid of the []byte/string conversions + return []byte(z.String()), nil +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (z *Int) UnmarshalJSON(text []byte) error { + // TODO(gri): get rid of the []byte/string conversions + if _, ok := z.SetString(string(text), 0); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text) + } + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +func (z *Int) MarshalText() (text []byte, err error) { + return []byte(z.String()), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +func (z *Int) UnmarshalText(text []byte) error { + if _, ok := z.SetString(string(text), 0); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Int", text) + } + return nil +} diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go new file mode 100644 index 000000000..6070cf325 --- /dev/null +++ b/src/math/big/int_test.go @@ -0,0 +1,1625 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "encoding/gob" + "encoding/hex" + "encoding/json" + "encoding/xml" + "fmt" + "math/rand" + "testing" + "testing/quick" +) + +func isNormalized(x *Int) bool { + if len(x.abs) == 0 { + return !x.neg + } + // len(x.abs) > 0 + return x.abs[len(x.abs)-1] != 0 +} + +type funZZ func(z, x, y *Int) *Int +type argZZ struct { + z, x, y *Int +} + +var sumZZ = []argZZ{ + {NewInt(0), NewInt(0), NewInt(0)}, + {NewInt(1), NewInt(1), NewInt(0)}, + {NewInt(1111111110), NewInt(123456789), NewInt(987654321)}, + {NewInt(-1), NewInt(-1), NewInt(0)}, + {NewInt(864197532), NewInt(-123456789), NewInt(987654321)}, + {NewInt(-1111111110), NewInt(-123456789), NewInt(-987654321)}, +} + +var prodZZ = []argZZ{ + {NewInt(0), NewInt(0), NewInt(0)}, + {NewInt(0), NewInt(1), NewInt(0)}, + {NewInt(1), NewInt(1), NewInt(1)}, + {NewInt(-991 * 991), NewInt(991), NewInt(-991)}, + // TODO(gri) add larger products +} + +func TestSignZ(t *testing.T) { + var zero Int + for _, a := range sumZZ { + s := a.z.Sign() + e := a.z.Cmp(&zero) + if s != e { + t.Errorf("got %d; want %d for z = %v", s, e, a.z) + } + } +} + +func TestSetZ(t *testing.T) { + for _, a := range sumZZ { + var z Int + z.Set(a.z) + if !isNormalized(&z) { + t.Errorf("%v is not normalized", z) + } + if (&z).Cmp(a.z) != 0 { + t.Errorf("got z = %v; want %v", z, a.z) + } + } +} + +func TestAbsZ(t *testing.T) { + var zero Int + for _, a := range sumZZ { + var z Int + z.Abs(a.z) + var e Int + e.Set(a.z) + if e.Cmp(&zero) < 0 { + e.Sub(&zero, &e) + } + if z.Cmp(&e) != 0 { + t.Errorf("got z = %v; want %v", z, e) + } + } +} + +func testFunZZ(t *testing.T, msg string, f funZZ, a argZZ) { + var z Int + f(&z, a.x, a.y) + if !isNormalized(&z) { + t.Errorf("%s%v is not normalized", msg, z) + } + if (&z).Cmp(a.z) != 0 { + t.Errorf("%s%+v\n\tgot z = %v; want %v", msg, a, &z, a.z) + } +} + +func TestSumZZ(t *testing.T) { + AddZZ := func(z, x, y *Int) *Int { return z.Add(x, y) } + SubZZ := func(z, x, y *Int) *Int { return z.Sub(x, y) } + for _, a := range sumZZ { + arg := a + testFunZZ(t, "AddZZ", AddZZ, arg) + + arg = argZZ{a.z, a.y, a.x} + testFunZZ(t, "AddZZ symmetric", AddZZ, arg) + + arg = argZZ{a.x, a.z, a.y} + testFunZZ(t, "SubZZ", SubZZ, arg) + + arg = argZZ{a.y, a.z, a.x} + testFunZZ(t, "SubZZ symmetric", SubZZ, arg) + } +} + +func TestProdZZ(t *testing.T) { + MulZZ := func(z, x, y *Int) *Int { return z.Mul(x, y) } + for _, a := range prodZZ { + arg := a + testFunZZ(t, "MulZZ", MulZZ, arg) + + arg = argZZ{a.z, a.y, a.x} + testFunZZ(t, "MulZZ symmetric", MulZZ, arg) + } +} + +// mulBytes returns x*y via grade school multiplication. Both inputs +// and the result are assumed to be in big-endian representation (to +// match the semantics of Int.Bytes and Int.SetBytes). +func mulBytes(x, y []byte) []byte { + z := make([]byte, len(x)+len(y)) + + // multiply + k0 := len(z) - 1 + for j := len(y) - 1; j >= 0; j-- { + d := int(y[j]) + if d != 0 { + k := k0 + carry := 0 + for i := len(x) - 1; i >= 0; i-- { + t := int(z[k]) + int(x[i])*d + carry + z[k], carry = byte(t), t>>8 + k-- + } + z[k] = byte(carry) + } + k0-- + } + + // normalize (remove leading 0's) + i := 0 + for i < len(z) && z[i] == 0 { + i++ + } + + return z[i:] +} + +func checkMul(a, b []byte) bool { + var x, y, z1 Int + x.SetBytes(a) + y.SetBytes(b) + z1.Mul(&x, &y) + + var z2 Int + z2.SetBytes(mulBytes(a, b)) + + return z1.Cmp(&z2) == 0 +} + +func TestMul(t *testing.T) { + if err := quick.Check(checkMul, nil); err != nil { + t.Error(err) + } +} + +var mulRangesZ = []struct { + a, b int64 + prod string +}{ + // entirely positive ranges are covered by mulRangesN + {-1, 1, "0"}, + {-2, -1, "2"}, + {-3, -2, "6"}, + {-3, -1, "-6"}, + {1, 3, "6"}, + {-10, -10, "-10"}, + {0, -1, "1"}, // empty range + {-1, -100, "1"}, // empty range + {-1, 1, "0"}, // range includes 0 + {-1e9, 0, "0"}, // range includes 0 + {-1e9, 1e9, "0"}, // range includes 0 + {-10, -1, "3628800"}, // 10! + {-20, -2, "-2432902008176640000"}, // -20! + {-99, -1, + "-933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "511852109168640000000000000000000000", // -99! + }, +} + +func TestMulRangeZ(t *testing.T) { + var tmp Int + // test entirely positive ranges + for i, r := range mulRangesN { + prod := tmp.MulRange(int64(r.a), int64(r.b)).String() + if prod != r.prod { + t.Errorf("#%da: got %s; want %s", i, prod, r.prod) + } + } + // test other ranges + for i, r := range mulRangesZ { + prod := tmp.MulRange(r.a, r.b).String() + if prod != r.prod { + t.Errorf("#%db: got %s; want %s", i, prod, r.prod) + } + } +} + +var stringTests = []struct { + in string + out string + base int + val int64 + ok bool +}{ + {in: "", ok: false}, + {in: "a", ok: false}, + {in: "z", ok: false}, + {in: "+", ok: false}, + {in: "-", ok: false}, + {in: "0b", ok: false}, + {in: "0x", ok: false}, + {in: "2", base: 2, ok: false}, + {in: "0b2", base: 0, ok: false}, + {in: "08", ok: false}, + {in: "8", base: 8, ok: false}, + {in: "0xg", base: 0, ok: false}, + {in: "g", base: 16, ok: false}, + {"0", "0", 0, 0, true}, + {"0", "0", 10, 0, true}, + {"0", "0", 16, 0, true}, + {"+0", "0", 0, 0, true}, + {"-0", "0", 0, 0, true}, + {"10", "10", 0, 10, true}, + {"10", "10", 10, 10, true}, + {"10", "10", 16, 16, true}, + {"-10", "-10", 16, -16, true}, + {"+10", "10", 16, 16, true}, + {"0x10", "16", 0, 16, true}, + {in: "0x10", base: 16, ok: false}, + {"-0x10", "-16", 0, -16, true}, + {"+0x10", "16", 0, 16, true}, + {"00", "0", 0, 0, true}, + {"0", "0", 8, 0, true}, + {"07", "7", 0, 7, true}, + {"7", "7", 8, 7, true}, + {"023", "19", 0, 19, true}, + {"23", "23", 8, 19, true}, + {"cafebabe", "cafebabe", 16, 0xcafebabe, true}, + {"0b0", "0", 0, 0, true}, + {"-111", "-111", 2, -7, true}, + {"-0b111", "-7", 0, -7, true}, + {"0b1001010111", "599", 0, 0x257, true}, + {"1001010111", "1001010111", 2, 0x257, true}, +} + +func format(base int) string { + switch base { + case 2: + return "%b" + case 8: + return "%o" + case 16: + return "%x" + } + return "%d" +} + +func TestGetString(t *testing.T) { + z := new(Int) + for i, test := range stringTests { + if !test.ok { + continue + } + z.SetInt64(test.val) + + if test.base == 10 { + s := z.String() + if s != test.out { + t.Errorf("#%da got %s; want %s", i, s, test.out) + } + } + + s := fmt.Sprintf(format(test.base), z) + if s != test.out { + t.Errorf("#%db got %s; want %s", i, s, test.out) + } + } +} + +func TestSetString(t *testing.T) { + tmp := new(Int) + for i, test := range stringTests { + // initialize to a non-zero value so that issues with parsing + // 0 are detected + tmp.SetInt64(1234567890) + n1, ok1 := new(Int).SetString(test.in, test.base) + n2, ok2 := tmp.SetString(test.in, test.base) + expected := NewInt(test.val) + if ok1 != test.ok || ok2 != test.ok { + t.Errorf("#%d (input '%s') ok incorrect (should be %t)", i, test.in, test.ok) + continue + } + if !ok1 { + if n1 != nil { + t.Errorf("#%d (input '%s') n1 != nil", i, test.in) + } + continue + } + if !ok2 { + if n2 != nil { + t.Errorf("#%d (input '%s') n2 != nil", i, test.in) + } + continue + } + + if ok1 && !isNormalized(n1) { + t.Errorf("#%d (input '%s'): %v is not normalized", i, test.in, *n1) + } + if ok2 && !isNormalized(n2) { + t.Errorf("#%d (input '%s'): %v is not normalized", i, test.in, *n2) + } + + if n1.Cmp(expected) != 0 { + t.Errorf("#%d (input '%s') got: %s want: %d", i, test.in, n1, test.val) + } + if n2.Cmp(expected) != 0 { + t.Errorf("#%d (input '%s') got: %s want: %d", i, test.in, n2, test.val) + } + } +} + +var formatTests = []struct { + input string + format string + output string +}{ + {"<nil>", "%x", "<nil>"}, + {"<nil>", "%#x", "<nil>"}, + {"<nil>", "%#y", "%!y(big.Int=<nil>)"}, + + {"10", "%b", "1010"}, + {"10", "%o", "12"}, + {"10", "%d", "10"}, + {"10", "%v", "10"}, + {"10", "%x", "a"}, + {"10", "%X", "A"}, + {"-10", "%X", "-A"}, + {"10", "%y", "%!y(big.Int=10)"}, + {"-10", "%y", "%!y(big.Int=-10)"}, + + {"10", "%#b", "1010"}, + {"10", "%#o", "012"}, + {"10", "%#d", "10"}, + {"10", "%#v", "10"}, + {"10", "%#x", "0xa"}, + {"10", "%#X", "0XA"}, + {"-10", "%#X", "-0XA"}, + {"10", "%#y", "%!y(big.Int=10)"}, + {"-10", "%#y", "%!y(big.Int=-10)"}, + + {"1234", "%d", "1234"}, + {"1234", "%3d", "1234"}, + {"1234", "%4d", "1234"}, + {"-1234", "%d", "-1234"}, + {"1234", "% 5d", " 1234"}, + {"1234", "%+5d", "+1234"}, + {"1234", "%-5d", "1234 "}, + {"1234", "%x", "4d2"}, + {"1234", "%X", "4D2"}, + {"-1234", "%3x", "-4d2"}, + {"-1234", "%4x", "-4d2"}, + {"-1234", "%5x", " -4d2"}, + {"-1234", "%-5x", "-4d2 "}, + {"1234", "%03d", "1234"}, + {"1234", "%04d", "1234"}, + {"1234", "%05d", "01234"}, + {"1234", "%06d", "001234"}, + {"-1234", "%06d", "-01234"}, + {"1234", "%+06d", "+01234"}, + {"1234", "% 06d", " 01234"}, + {"1234", "%-6d", "1234 "}, + {"1234", "%-06d", "1234 "}, + {"-1234", "%-06d", "-1234 "}, + + {"1234", "%.3d", "1234"}, + {"1234", "%.4d", "1234"}, + {"1234", "%.5d", "01234"}, + {"1234", "%.6d", "001234"}, + {"-1234", "%.3d", "-1234"}, + {"-1234", "%.4d", "-1234"}, + {"-1234", "%.5d", "-01234"}, + {"-1234", "%.6d", "-001234"}, + + {"1234", "%8.3d", " 1234"}, + {"1234", "%8.4d", " 1234"}, + {"1234", "%8.5d", " 01234"}, + {"1234", "%8.6d", " 001234"}, + {"-1234", "%8.3d", " -1234"}, + {"-1234", "%8.4d", " -1234"}, + {"-1234", "%8.5d", " -01234"}, + {"-1234", "%8.6d", " -001234"}, + + {"1234", "%+8.3d", " +1234"}, + {"1234", "%+8.4d", " +1234"}, + {"1234", "%+8.5d", " +01234"}, + {"1234", "%+8.6d", " +001234"}, + {"-1234", "%+8.3d", " -1234"}, + {"-1234", "%+8.4d", " -1234"}, + {"-1234", "%+8.5d", " -01234"}, + {"-1234", "%+8.6d", " -001234"}, + + {"1234", "% 8.3d", " 1234"}, + {"1234", "% 8.4d", " 1234"}, + {"1234", "% 8.5d", " 01234"}, + {"1234", "% 8.6d", " 001234"}, + {"-1234", "% 8.3d", " -1234"}, + {"-1234", "% 8.4d", " -1234"}, + {"-1234", "% 8.5d", " -01234"}, + {"-1234", "% 8.6d", " -001234"}, + + {"1234", "%.3x", "4d2"}, + {"1234", "%.4x", "04d2"}, + {"1234", "%.5x", "004d2"}, + {"1234", "%.6x", "0004d2"}, + {"-1234", "%.3x", "-4d2"}, + {"-1234", "%.4x", "-04d2"}, + {"-1234", "%.5x", "-004d2"}, + {"-1234", "%.6x", "-0004d2"}, + + {"1234", "%8.3x", " 4d2"}, + {"1234", "%8.4x", " 04d2"}, + {"1234", "%8.5x", " 004d2"}, + {"1234", "%8.6x", " 0004d2"}, + {"-1234", "%8.3x", " -4d2"}, + {"-1234", "%8.4x", " -04d2"}, + {"-1234", "%8.5x", " -004d2"}, + {"-1234", "%8.6x", " -0004d2"}, + + {"1234", "%+8.3x", " +4d2"}, + {"1234", "%+8.4x", " +04d2"}, + {"1234", "%+8.5x", " +004d2"}, + {"1234", "%+8.6x", " +0004d2"}, + {"-1234", "%+8.3x", " -4d2"}, + {"-1234", "%+8.4x", " -04d2"}, + {"-1234", "%+8.5x", " -004d2"}, + {"-1234", "%+8.6x", " -0004d2"}, + + {"1234", "% 8.3x", " 4d2"}, + {"1234", "% 8.4x", " 04d2"}, + {"1234", "% 8.5x", " 004d2"}, + {"1234", "% 8.6x", " 0004d2"}, + {"1234", "% 8.7x", " 00004d2"}, + {"1234", "% 8.8x", " 000004d2"}, + {"-1234", "% 8.3x", " -4d2"}, + {"-1234", "% 8.4x", " -04d2"}, + {"-1234", "% 8.5x", " -004d2"}, + {"-1234", "% 8.6x", " -0004d2"}, + {"-1234", "% 8.7x", "-00004d2"}, + {"-1234", "% 8.8x", "-000004d2"}, + + {"1234", "%-8.3d", "1234 "}, + {"1234", "%-8.4d", "1234 "}, + {"1234", "%-8.5d", "01234 "}, + {"1234", "%-8.6d", "001234 "}, + {"1234", "%-8.7d", "0001234 "}, + {"1234", "%-8.8d", "00001234"}, + {"-1234", "%-8.3d", "-1234 "}, + {"-1234", "%-8.4d", "-1234 "}, + {"-1234", "%-8.5d", "-01234 "}, + {"-1234", "%-8.6d", "-001234 "}, + {"-1234", "%-8.7d", "-0001234"}, + {"-1234", "%-8.8d", "-00001234"}, + + {"16777215", "%b", "111111111111111111111111"}, // 2**24 - 1 + + {"0", "%.d", ""}, + {"0", "%.0d", ""}, + {"0", "%3.d", ""}, +} + +func TestFormat(t *testing.T) { + for i, test := range formatTests { + var x *Int + if test.input != "<nil>" { + var ok bool + x, ok = new(Int).SetString(test.input, 0) + if !ok { + t.Errorf("#%d failed reading input %s", i, test.input) + } + } + output := fmt.Sprintf(test.format, x) + if output != test.output { + t.Errorf("#%d got %q; want %q, {%q, %q, %q}", i, output, test.output, test.input, test.format, test.output) + } + } +} + +var scanTests = []struct { + input string + format string + output string + remaining int +}{ + {"1010", "%b", "10", 0}, + {"0b1010", "%v", "10", 0}, + {"12", "%o", "10", 0}, + {"012", "%v", "10", 0}, + {"10", "%d", "10", 0}, + {"10", "%v", "10", 0}, + {"a", "%x", "10", 0}, + {"0xa", "%v", "10", 0}, + {"A", "%X", "10", 0}, + {"-A", "%X", "-10", 0}, + {"+0b1011001", "%v", "89", 0}, + {"0xA", "%v", "10", 0}, + {"0 ", "%v", "0", 1}, + {"2+3", "%v", "2", 2}, + {"0XABC 12", "%v", "2748", 3}, +} + +func TestScan(t *testing.T) { + var buf bytes.Buffer + for i, test := range scanTests { + x := new(Int) + buf.Reset() + buf.WriteString(test.input) + if _, err := fmt.Fscanf(&buf, test.format, x); err != nil { + t.Errorf("#%d error: %s", i, err) + } + if x.String() != test.output { + t.Errorf("#%d got %s; want %s", i, x.String(), test.output) + } + if buf.Len() != test.remaining { + t.Errorf("#%d got %d bytes remaining; want %d", i, buf.Len(), test.remaining) + } + } +} + +// Examples from the Go Language Spec, section "Arithmetic operators" +var divisionSignsTests = []struct { + x, y int64 + q, r int64 // T-division + d, m int64 // Euclidian division +}{ + {5, 3, 1, 2, 1, 2}, + {-5, 3, -1, -2, -2, 1}, + {5, -3, -1, 2, -1, 2}, + {-5, -3, 1, -2, 2, 1}, + {1, 2, 0, 1, 0, 1}, + {8, 4, 2, 0, 2, 0}, +} + +func TestDivisionSigns(t *testing.T) { + for i, test := range divisionSignsTests { + x := NewInt(test.x) + y := NewInt(test.y) + q := NewInt(test.q) + r := NewInt(test.r) + d := NewInt(test.d) + m := NewInt(test.m) + + q1 := new(Int).Quo(x, y) + r1 := new(Int).Rem(x, y) + if !isNormalized(q1) { + t.Errorf("#%d Quo: %v is not normalized", i, *q1) + } + if !isNormalized(r1) { + t.Errorf("#%d Rem: %v is not normalized", i, *r1) + } + if q1.Cmp(q) != 0 || r1.Cmp(r) != 0 { + t.Errorf("#%d QuoRem: got (%s, %s), want (%s, %s)", i, q1, r1, q, r) + } + + q2, r2 := new(Int).QuoRem(x, y, new(Int)) + if !isNormalized(q2) { + t.Errorf("#%d Quo: %v is not normalized", i, *q2) + } + if !isNormalized(r2) { + t.Errorf("#%d Rem: %v is not normalized", i, *r2) + } + if q2.Cmp(q) != 0 || r2.Cmp(r) != 0 { + t.Errorf("#%d QuoRem: got (%s, %s), want (%s, %s)", i, q2, r2, q, r) + } + + d1 := new(Int).Div(x, y) + m1 := new(Int).Mod(x, y) + if !isNormalized(d1) { + t.Errorf("#%d Div: %v is not normalized", i, *d1) + } + if !isNormalized(m1) { + t.Errorf("#%d Mod: %v is not normalized", i, *m1) + } + if d1.Cmp(d) != 0 || m1.Cmp(m) != 0 { + t.Errorf("#%d DivMod: got (%s, %s), want (%s, %s)", i, d1, m1, d, m) + } + + d2, m2 := new(Int).DivMod(x, y, new(Int)) + if !isNormalized(d2) { + t.Errorf("#%d Div: %v is not normalized", i, *d2) + } + if !isNormalized(m2) { + t.Errorf("#%d Mod: %v is not normalized", i, *m2) + } + if d2.Cmp(d) != 0 || m2.Cmp(m) != 0 { + t.Errorf("#%d DivMod: got (%s, %s), want (%s, %s)", i, d2, m2, d, m) + } + } +} + +func checkSetBytes(b []byte) bool { + hex1 := hex.EncodeToString(new(Int).SetBytes(b).Bytes()) + hex2 := hex.EncodeToString(b) + + for len(hex1) < len(hex2) { + hex1 = "0" + hex1 + } + + for len(hex1) > len(hex2) { + hex2 = "0" + hex2 + } + + return hex1 == hex2 +} + +func TestSetBytes(t *testing.T) { + if err := quick.Check(checkSetBytes, nil); err != nil { + t.Error(err) + } +} + +func checkBytes(b []byte) bool { + b2 := new(Int).SetBytes(b).Bytes() + return bytes.Equal(b, b2) +} + +func TestBytes(t *testing.T) { + if err := quick.Check(checkSetBytes, nil); err != nil { + t.Error(err) + } +} + +func checkQuo(x, y []byte) bool { + u := new(Int).SetBytes(x) + v := new(Int).SetBytes(y) + + if len(v.abs) == 0 { + return true + } + + r := new(Int) + q, r := new(Int).QuoRem(u, v, r) + + if r.Cmp(v) >= 0 { + return false + } + + uprime := new(Int).Set(q) + uprime.Mul(uprime, v) + uprime.Add(uprime, r) + + return uprime.Cmp(u) == 0 +} + +var quoTests = []struct { + x, y string + q, r string +}{ + { + "476217953993950760840509444250624797097991362735329973741718102894495832294430498335824897858659711275234906400899559094370964723884706254265559534144986498357", + "9353930466774385905609975137998169297361893554149986716853295022578535724979483772383667534691121982974895531435241089241440253066816724367338287092081996", + "50911", + "1", + }, + { + "11510768301994997771168", + "1328165573307167369775", + "8", + "885443715537658812968", + }, +} + +func TestQuo(t *testing.T) { + if err := quick.Check(checkQuo, nil); err != nil { + t.Error(err) + } + + for i, test := range quoTests { + x, _ := new(Int).SetString(test.x, 10) + y, _ := new(Int).SetString(test.y, 10) + expectedQ, _ := new(Int).SetString(test.q, 10) + expectedR, _ := new(Int).SetString(test.r, 10) + + r := new(Int) + q, r := new(Int).QuoRem(x, y, r) + + if q.Cmp(expectedQ) != 0 || r.Cmp(expectedR) != 0 { + t.Errorf("#%d got (%s, %s) want (%s, %s)", i, q, r, expectedQ, expectedR) + } + } +} + +func TestQuoStepD6(t *testing.T) { + // See Knuth, Volume 2, section 4.3.1, exercise 21. This code exercises + // a code path which only triggers 1 in 10^{-19} cases. + + u := &Int{false, nat{0, 0, 1 + 1<<(_W-1), _M ^ (1 << (_W - 1))}} + v := &Int{false, nat{5, 2 + 1<<(_W-1), 1 << (_W - 1)}} + + r := new(Int) + q, r := new(Int).QuoRem(u, v, r) + const expectedQ64 = "18446744073709551613" + const expectedR64 = "3138550867693340382088035895064302439801311770021610913807" + const expectedQ32 = "4294967293" + const expectedR32 = "39614081266355540837921718287" + if q.String() != expectedQ64 && q.String() != expectedQ32 || + r.String() != expectedR64 && r.String() != expectedR32 { + t.Errorf("got (%s, %s) want (%s, %s) or (%s, %s)", q, r, expectedQ64, expectedR64, expectedQ32, expectedR32) + } +} + +var bitLenTests = []struct { + in string + out int +}{ + {"-1", 1}, + {"0", 0}, + {"1", 1}, + {"2", 2}, + {"4", 3}, + {"0xabc", 12}, + {"0x8000", 16}, + {"0x80000000", 32}, + {"0x800000000000", 48}, + {"0x8000000000000000", 64}, + {"0x80000000000000000000", 80}, + {"-0x4000000000000000000000", 87}, +} + +func TestBitLen(t *testing.T) { + for i, test := range bitLenTests { + x, ok := new(Int).SetString(test.in, 0) + if !ok { + t.Errorf("#%d test input invalid: %s", i, test.in) + continue + } + + if n := x.BitLen(); n != test.out { + t.Errorf("#%d got %d want %d", i, n, test.out) + } + } +} + +var expTests = []struct { + x, y, m string + out string +}{ + // y <= 0 + {"0", "0", "", "1"}, + {"1", "0", "", "1"}, + {"-10", "0", "", "1"}, + {"1234", "-1", "", "1"}, + + // m == 1 + {"0", "0", "1", "0"}, + {"1", "0", "1", "0"}, + {"-10", "0", "1", "0"}, + {"1234", "-1", "1", "0"}, + + // misc + {"5", "-7", "", "1"}, + {"-5", "-7", "", "1"}, + {"5", "0", "", "1"}, + {"-5", "0", "", "1"}, + {"5", "1", "", "5"}, + {"-5", "1", "", "-5"}, + {"-5", "1", "7", "2"}, + {"-2", "3", "2", "0"}, + {"5", "2", "", "25"}, + {"1", "65537", "2", "1"}, + {"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"}, + {"0x8000000000000000", "2", "6719", "4944"}, + {"0x8000000000000000", "3", "6719", "5447"}, + {"0x8000000000000000", "1000", "6719", "1603"}, + {"0x8000000000000000", "1000000", "6719", "3199"}, + {"0x8000000000000000", "-1000000", "6719", "1"}, + { + "2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347", + "298472983472983471903246121093472394872319615612417471234712061", + "29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464", + "23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291", + }, + // test case for issue 8822 + { + "-0x1BCE04427D8032319A89E5C4136456671AC620883F2C4139E57F91307C485AD2D6204F4F87A58262652DB5DBBAC72B0613E51B835E7153BEC6068F5C8D696B74DBD18FEC316AEF73985CF0475663208EB46B4F17DD9DA55367B03323E5491A70997B90C059FB34809E6EE55BCFBD5F2F52233BFE62E6AA9E4E26A1D4C2439883D14F2633D55D8AA66A1ACD5595E778AC3A280517F1157989E70C1A437B849F1877B779CC3CDDEDE2DAA6594A6C66D181A00A5F777EE60596D8773998F6E988DEAE4CCA60E4DDCF9590543C89F74F603259FCAD71660D30294FBBE6490300F78A9D63FA660DC9417B8B9DDA28BEB3977B621B988E23D4D954F322C3540541BC649ABD504C50FADFD9F0987D58A2BF689313A285E773FF02899A6EF887D1D4A0D2", + "0xB08FFB20760FFED58FADA86DFEF71AD72AA0FA763219618FE022C197E54708BB1191C66470250FCE8879487507CEE41381CA4D932F81C2B3F1AB20B539D50DCD", + "0xAC6BDB41324A9A9BF166DE5E1389582FAF72B6651987EE07FC3192943DB56050A37329CBB4A099ED8193E0757767A13DD52312AB4B03310DCD7F48A9DA04FD50E8083969EDB767B0CF6095179A163AB3661A05FBD5FAAAE82918A9962F0B93B855F97993EC975EEAA80D740ADBF4FF747359D041D5C33EA71D281E446B14773BCA97B43A23FB801676BD207A436C6481F1D2B9078717461A5B9D32E688F87748544523B524B0D57D5EA77A2775D2ECFA032CFBDBF52FB3786160279004E57AE6AF874E7303CE53299CCC041C7BC308D82A5698F3A8D0C38271AE35F8E9DBFBB694B5C803D89F7AE435DE236D525F54759B65E372FCD68EF20FA7111F9E4AFF73", + "21484252197776302499639938883777710321993113097987201050501182909581359357618579566746556372589385361683610524730509041328855066514963385522570894839035884713051640171474186548713546686476761306436434146475140156284389181808675016576845833340494848283681088886584219750554408060556769486628029028720727393293111678826356480455433909233520504112074401376133077150471237549474149190242010469539006449596611576612573955754349042329130631128234637924786466585703488460540228477440853493392086251021228087076124706778899179648655221663765993962724699135217212118535057766739392069738618682722216712319320435674779146070442", + }, +} + +func TestExp(t *testing.T) { + for i, test := range expTests { + x, ok1 := new(Int).SetString(test.x, 0) + y, ok2 := new(Int).SetString(test.y, 0) + out, ok3 := new(Int).SetString(test.out, 0) + + var ok4 bool + var m *Int + + if len(test.m) == 0 { + m, ok4 = nil, true + } else { + m, ok4 = new(Int).SetString(test.m, 0) + } + + if !ok1 || !ok2 || !ok3 || !ok4 { + t.Errorf("#%d: error in input", i) + continue + } + + z1 := new(Int).Exp(x, y, m) + if !isNormalized(z1) { + t.Errorf("#%d: %v is not normalized", i, *z1) + } + if z1.Cmp(out) != 0 { + t.Errorf("#%d: got %s want %s", i, z1, out) + } + + if m == nil { + // The result should be the same as for m == 0; + // specifically, there should be no div-zero panic. + m = &Int{abs: nat{}} // m != nil && len(m.abs) == 0 + z2 := new(Int).Exp(x, y, m) + if z2.Cmp(z1) != 0 { + t.Errorf("#%d: got %s want %s", i, z2, z1) + } + } + } +} + +func checkGcd(aBytes, bBytes []byte) bool { + x := new(Int) + y := new(Int) + a := new(Int).SetBytes(aBytes) + b := new(Int).SetBytes(bBytes) + + d := new(Int).GCD(x, y, a, b) + x.Mul(x, a) + y.Mul(y, b) + x.Add(x, y) + + return x.Cmp(d) == 0 +} + +var gcdTests = []struct { + d, x, y, a, b string +}{ + // a <= 0 || b <= 0 + {"0", "0", "0", "0", "0"}, + {"0", "0", "0", "0", "7"}, + {"0", "0", "0", "11", "0"}, + {"0", "0", "0", "-77", "35"}, + {"0", "0", "0", "64515", "-24310"}, + {"0", "0", "0", "-64515", "-24310"}, + + {"1", "-9", "47", "120", "23"}, + {"7", "1", "-2", "77", "35"}, + {"935", "-3", "8", "64515", "24310"}, + {"935000000000000000", "-3", "8", "64515000000000000000", "24310000000000000000"}, + {"1", "-221", "22059940471369027483332068679400581064239780177629666810348940098015901108344", "98920366548084643601728869055592650835572950932266967461790948584315647051443", "991"}, + + // test early exit (after one Euclidean iteration) in binaryGCD + {"1", "", "", "1", "98920366548084643601728869055592650835572950932266967461790948584315647051443"}, +} + +func testGcd(t *testing.T, d, x, y, a, b *Int) { + var X *Int + if x != nil { + X = new(Int) + } + var Y *Int + if y != nil { + Y = new(Int) + } + + D := new(Int).GCD(X, Y, a, b) + if D.Cmp(d) != 0 { + t.Errorf("GCD(%s, %s): got d = %s, want %s", a, b, D, d) + } + if x != nil && X.Cmp(x) != 0 { + t.Errorf("GCD(%s, %s): got x = %s, want %s", a, b, X, x) + } + if y != nil && Y.Cmp(y) != 0 { + t.Errorf("GCD(%s, %s): got y = %s, want %s", a, b, Y, y) + } + + // binaryGCD requires a > 0 && b > 0 + if a.Sign() <= 0 || b.Sign() <= 0 { + return + } + + D.binaryGCD(a, b) + if D.Cmp(d) != 0 { + t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, D, d) + } +} + +func TestGcd(t *testing.T) { + for _, test := range gcdTests { + d, _ := new(Int).SetString(test.d, 0) + x, _ := new(Int).SetString(test.x, 0) + y, _ := new(Int).SetString(test.y, 0) + a, _ := new(Int).SetString(test.a, 0) + b, _ := new(Int).SetString(test.b, 0) + + testGcd(t, d, nil, nil, a, b) + testGcd(t, d, x, nil, a, b) + testGcd(t, d, nil, y, a, b) + testGcd(t, d, x, y, a, b) + } + + quick.Check(checkGcd, nil) +} + +var primes = []string{ + "2", + "3", + "5", + "7", + "11", + + "13756265695458089029", + "13496181268022124907", + "10953742525620032441", + "17908251027575790097", + + // http://code.google.com/p/go/issues/detail?id=638 + "18699199384836356663", + + "98920366548084643601728869055592650835572950932266967461790948584315647051443", + "94560208308847015747498523884063394671606671904944666360068158221458669711639", + + // http://primes.utm.edu/lists/small/small3.html + "449417999055441493994709297093108513015373787049558499205492347871729927573118262811508386655998299074566974373711472560655026288668094291699357843464363003144674940345912431129144354948751003607115263071543163", + "230975859993204150666423538988557839555560243929065415434980904258310530753006723857139742334640122533598517597674807096648905501653461687601339782814316124971547968912893214002992086353183070342498989426570593", + "5521712099665906221540423207019333379125265462121169655563495403888449493493629943498064604536961775110765377745550377067893607246020694972959780839151452457728855382113555867743022746090187341871655890805971735385789993", + "203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", +} + +var composites = []string{ + "21284175091214687912771199898307297748211672914763848041968395774954376176754", + "6084766654921918907427900243509372380954290099172559290432744450051395395951", + "84594350493221918389213352992032324280367711247940675652888030554255915464401", + "82793403787388584738507275144194252681", +} + +func TestProbablyPrime(t *testing.T) { + nreps := 20 + if testing.Short() { + nreps = 1 + } + for i, s := range primes { + p, _ := new(Int).SetString(s, 10) + if !p.ProbablyPrime(nreps) { + t.Errorf("#%d prime found to be non-prime (%s)", i, s) + } + } + + for i, s := range composites { + c, _ := new(Int).SetString(s, 10) + if c.ProbablyPrime(nreps) { + t.Errorf("#%d composite found to be prime (%s)", i, s) + } + if testing.Short() { + break + } + } +} + +type intShiftTest struct { + in string + shift uint + out string +} + +var rshTests = []intShiftTest{ + {"0", 0, "0"}, + {"-0", 0, "0"}, + {"0", 1, "0"}, + {"0", 2, "0"}, + {"1", 0, "1"}, + {"1", 1, "0"}, + {"1", 2, "0"}, + {"2", 0, "2"}, + {"2", 1, "1"}, + {"-1", 0, "-1"}, + {"-1", 1, "-1"}, + {"-1", 10, "-1"}, + {"-100", 2, "-25"}, + {"-100", 3, "-13"}, + {"-100", 100, "-1"}, + {"4294967296", 0, "4294967296"}, + {"4294967296", 1, "2147483648"}, + {"4294967296", 2, "1073741824"}, + {"18446744073709551616", 0, "18446744073709551616"}, + {"18446744073709551616", 1, "9223372036854775808"}, + {"18446744073709551616", 2, "4611686018427387904"}, + {"18446744073709551616", 64, "1"}, + {"340282366920938463463374607431768211456", 64, "18446744073709551616"}, + {"340282366920938463463374607431768211456", 128, "1"}, +} + +func TestRsh(t *testing.T) { + for i, test := range rshTests { + in, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + out := new(Int).Rsh(in, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if out.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, out, expected) + } + } +} + +func TestRshSelf(t *testing.T) { + for i, test := range rshTests { + z, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + z.Rsh(z, test.shift) + + if !isNormalized(z) { + t.Errorf("#%d: %v is not normalized", i, *z) + } + if z.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, z, expected) + } + } +} + +var lshTests = []intShiftTest{ + {"0", 0, "0"}, + {"0", 1, "0"}, + {"0", 2, "0"}, + {"1", 0, "1"}, + {"1", 1, "2"}, + {"1", 2, "4"}, + {"2", 0, "2"}, + {"2", 1, "4"}, + {"2", 2, "8"}, + {"-87", 1, "-174"}, + {"4294967296", 0, "4294967296"}, + {"4294967296", 1, "8589934592"}, + {"4294967296", 2, "17179869184"}, + {"18446744073709551616", 0, "18446744073709551616"}, + {"9223372036854775808", 1, "18446744073709551616"}, + {"4611686018427387904", 2, "18446744073709551616"}, + {"1", 64, "18446744073709551616"}, + {"18446744073709551616", 64, "340282366920938463463374607431768211456"}, + {"1", 128, "340282366920938463463374607431768211456"}, +} + +func TestLsh(t *testing.T) { + for i, test := range lshTests { + in, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + out := new(Int).Lsh(in, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if out.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, out, expected) + } + } +} + +func TestLshSelf(t *testing.T) { + for i, test := range lshTests { + z, _ := new(Int).SetString(test.in, 10) + expected, _ := new(Int).SetString(test.out, 10) + z.Lsh(z, test.shift) + + if !isNormalized(z) { + t.Errorf("#%d: %v is not normalized", i, *z) + } + if z.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, z, expected) + } + } +} + +func TestLshRsh(t *testing.T) { + for i, test := range rshTests { + in, _ := new(Int).SetString(test.in, 10) + out := new(Int).Lsh(in, test.shift) + out = out.Rsh(out, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if in.Cmp(out) != 0 { + t.Errorf("#%d: got %s want %s", i, out, in) + } + } + for i, test := range lshTests { + in, _ := new(Int).SetString(test.in, 10) + out := new(Int).Lsh(in, test.shift) + out.Rsh(out, test.shift) + + if !isNormalized(out) { + t.Errorf("#%d: %v is not normalized", i, *out) + } + if in.Cmp(out) != 0 { + t.Errorf("#%d: got %s want %s", i, out, in) + } + } +} + +var int64Tests = []int64{ + 0, + 1, + -1, + 4294967295, + -4294967295, + 4294967296, + -4294967296, + 9223372036854775807, + -9223372036854775807, + -9223372036854775808, +} + +func TestInt64(t *testing.T) { + for i, testVal := range int64Tests { + in := NewInt(testVal) + out := in.Int64() + + if out != testVal { + t.Errorf("#%d got %d want %d", i, out, testVal) + } + } +} + +var uint64Tests = []uint64{ + 0, + 1, + 4294967295, + 4294967296, + 8589934591, + 8589934592, + 9223372036854775807, + 9223372036854775808, + 18446744073709551615, // 1<<64 - 1 +} + +func TestUint64(t *testing.T) { + in := new(Int) + for i, testVal := range uint64Tests { + in.SetUint64(testVal) + out := in.Uint64() + + if out != testVal { + t.Errorf("#%d got %d want %d", i, out, testVal) + } + + str := fmt.Sprint(testVal) + strOut := in.String() + if strOut != str { + t.Errorf("#%d.String got %s want %s", i, strOut, str) + } + } +} + +var bitwiseTests = []struct { + x, y string + and, or, xor, andNot string +}{ + {"0x00", "0x00", "0x00", "0x00", "0x00", "0x00"}, + {"0x00", "0x01", "0x00", "0x01", "0x01", "0x00"}, + {"0x01", "0x00", "0x00", "0x01", "0x01", "0x01"}, + {"-0x01", "0x00", "0x00", "-0x01", "-0x01", "-0x01"}, + {"-0xaf", "-0x50", "-0xf0", "-0x0f", "0xe1", "0x41"}, + {"0x00", "-0x01", "0x00", "-0x01", "-0x01", "0x00"}, + {"0x01", "0x01", "0x01", "0x01", "0x00", "0x00"}, + {"-0x01", "-0x01", "-0x01", "-0x01", "0x00", "0x00"}, + {"0x07", "0x08", "0x00", "0x0f", "0x0f", "0x07"}, + {"0x05", "0x0f", "0x05", "0x0f", "0x0a", "0x00"}, + {"0x013ff6", "0x9a4e", "0x1a46", "0x01bffe", "0x01a5b8", "0x0125b0"}, + {"-0x013ff6", "0x9a4e", "0x800a", "-0x0125b2", "-0x01a5bc", "-0x01c000"}, + {"-0x013ff6", "-0x9a4e", "-0x01bffe", "-0x1a46", "0x01a5b8", "0x8008"}, + { + "0x1000009dc6e3d9822cba04129bcbe3401", + "0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd", + "0x1000001186210100001000009048c2001", + "0xb9bd7d543685789d57cb918e8bfeff7fddb2ebe87dfbbdfe35fd", + "0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fc", + "0x8c40c2d8822caa04120b8321400", + }, + { + "0x1000009dc6e3d9822cba04129bcbe3401", + "-0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd", + "0x8c40c2d8822caa04120b8321401", + "-0xb9bd7d543685789d57ca918e82229142459020483cd2014001fd", + "-0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fe", + "0x1000001186210100001000009048c2000", + }, + { + "-0x1000009dc6e3d9822cba04129bcbe3401", + "-0xb9bd7d543685789d57cb918e833af352559021483cdb05cc21fd", + "-0xb9bd7d543685789d57cb918e8bfeff7fddb2ebe87dfbbdfe35fd", + "-0x1000001186210100001000009048c2001", + "0xb9bd7d543685789d57ca918e8ae69d6fcdb2eae87df2b97215fc", + "0xb9bd7d543685789d57ca918e82229142459020483cd2014001fc", + }, +} + +type bitFun func(z, x, y *Int) *Int + +func testBitFun(t *testing.T, msg string, f bitFun, x, y *Int, exp string) { + expected := new(Int) + expected.SetString(exp, 0) + + out := f(new(Int), x, y) + if out.Cmp(expected) != 0 { + t.Errorf("%s: got %s want %s", msg, out, expected) + } +} + +func testBitFunSelf(t *testing.T, msg string, f bitFun, x, y *Int, exp string) { + self := new(Int) + self.Set(x) + expected := new(Int) + expected.SetString(exp, 0) + + self = f(self, self, y) + if self.Cmp(expected) != 0 { + t.Errorf("%s: got %s want %s", msg, self, expected) + } +} + +func altBit(x *Int, i int) uint { + z := new(Int).Rsh(x, uint(i)) + z = z.And(z, NewInt(1)) + if z.Cmp(new(Int)) != 0 { + return 1 + } + return 0 +} + +func altSetBit(z *Int, x *Int, i int, b uint) *Int { + one := NewInt(1) + m := one.Lsh(one, uint(i)) + switch b { + case 1: + return z.Or(x, m) + case 0: + return z.AndNot(x, m) + } + panic("set bit is not 0 or 1") +} + +func testBitset(t *testing.T, x *Int) { + n := x.BitLen() + z := new(Int).Set(x) + z1 := new(Int).Set(x) + for i := 0; i < n+10; i++ { + old := z.Bit(i) + old1 := altBit(z1, i) + if old != old1 { + t.Errorf("bitset: inconsistent value for Bit(%s, %d), got %v want %v", z1, i, old, old1) + } + z := new(Int).SetBit(z, i, 1) + z1 := altSetBit(new(Int), z1, i, 1) + if z.Bit(i) == 0 { + t.Errorf("bitset: bit %d of %s got 0 want 1", i, x) + } + if z.Cmp(z1) != 0 { + t.Errorf("bitset: inconsistent value after SetBit 1, got %s want %s", z, z1) + } + z.SetBit(z, i, 0) + altSetBit(z1, z1, i, 0) + if z.Bit(i) != 0 { + t.Errorf("bitset: bit %d of %s got 1 want 0", i, x) + } + if z.Cmp(z1) != 0 { + t.Errorf("bitset: inconsistent value after SetBit 0, got %s want %s", z, z1) + } + altSetBit(z1, z1, i, old) + z.SetBit(z, i, old) + if z.Cmp(z1) != 0 { + t.Errorf("bitset: inconsistent value after SetBit old, got %s want %s", z, z1) + } + } + if z.Cmp(x) != 0 { + t.Errorf("bitset: got %s want %s", z, x) + } +} + +var bitsetTests = []struct { + x string + i int + b uint +}{ + {"0", 0, 0}, + {"0", 200, 0}, + {"1", 0, 1}, + {"1", 1, 0}, + {"-1", 0, 1}, + {"-1", 200, 1}, + {"0x2000000000000000000000000000", 108, 0}, + {"0x2000000000000000000000000000", 109, 1}, + {"0x2000000000000000000000000000", 110, 0}, + {"-0x2000000000000000000000000001", 108, 1}, + {"-0x2000000000000000000000000001", 109, 0}, + {"-0x2000000000000000000000000001", 110, 1}, +} + +func TestBitSet(t *testing.T) { + for _, test := range bitwiseTests { + x := new(Int) + x.SetString(test.x, 0) + testBitset(t, x) + x = new(Int) + x.SetString(test.y, 0) + testBitset(t, x) + } + for i, test := range bitsetTests { + x := new(Int) + x.SetString(test.x, 0) + b := x.Bit(test.i) + if b != test.b { + t.Errorf("#%d got %v want %v", i, b, test.b) + } + } + z := NewInt(1) + z.SetBit(NewInt(0), 2, 1) + if z.Cmp(NewInt(4)) != 0 { + t.Errorf("destination leaked into result; got %s want 4", z) + } +} + +func BenchmarkBitset(b *testing.B) { + z := new(Int) + z.SetBit(z, 512, 1) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + z.SetBit(z, i&512, 1) + } +} + +func BenchmarkBitsetNeg(b *testing.B) { + z := NewInt(-1) + z.SetBit(z, 512, 0) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + z.SetBit(z, i&512, 0) + } +} + +func BenchmarkBitsetOrig(b *testing.B) { + z := new(Int) + altSetBit(z, z, 512, 1) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + altSetBit(z, z, i&512, 1) + } +} + +func BenchmarkBitsetNegOrig(b *testing.B) { + z := NewInt(-1) + altSetBit(z, z, 512, 0) + b.ResetTimer() + b.StartTimer() + for i := b.N - 1; i >= 0; i-- { + altSetBit(z, z, i&512, 0) + } +} + +func TestBitwise(t *testing.T) { + x := new(Int) + y := new(Int) + for _, test := range bitwiseTests { + x.SetString(test.x, 0) + y.SetString(test.y, 0) + + testBitFun(t, "and", (*Int).And, x, y, test.and) + testBitFunSelf(t, "and", (*Int).And, x, y, test.and) + testBitFun(t, "andNot", (*Int).AndNot, x, y, test.andNot) + testBitFunSelf(t, "andNot", (*Int).AndNot, x, y, test.andNot) + testBitFun(t, "or", (*Int).Or, x, y, test.or) + testBitFunSelf(t, "or", (*Int).Or, x, y, test.or) + testBitFun(t, "xor", (*Int).Xor, x, y, test.xor) + testBitFunSelf(t, "xor", (*Int).Xor, x, y, test.xor) + } +} + +var notTests = []struct { + in string + out string +}{ + {"0", "-1"}, + {"1", "-2"}, + {"7", "-8"}, + {"0", "-1"}, + {"-81910", "81909"}, + { + "298472983472983471903246121093472394872319615612417471234712061", + "-298472983472983471903246121093472394872319615612417471234712062", + }, +} + +func TestNot(t *testing.T) { + in := new(Int) + out := new(Int) + expected := new(Int) + for i, test := range notTests { + in.SetString(test.in, 10) + expected.SetString(test.out, 10) + out = out.Not(in) + if out.Cmp(expected) != 0 { + t.Errorf("#%d: got %s want %s", i, out, expected) + } + out = out.Not(out) + if out.Cmp(in) != 0 { + t.Errorf("#%d: got %s want %s", i, out, in) + } + } +} + +var modInverseTests = []struct { + element string + modulus string +}{ + {"1234567", "458948883992"}, + {"239487239847", "2410312426921032588552076022197566074856950548502459942654116941958108831682612228890093858261341614673227141477904012196503648957050582631942730706805009223062734745341073406696246014589361659774041027169249453200378729434170325843778659198143763193776859869524088940195577346119843545301547043747207749969763750084308926339295559968882457872412993810129130294592999947926365264059284647209730384947211681434464714438488520940127459844288859336526896320919633919"}, +} + +func TestModInverse(t *testing.T) { + var element, modulus, gcd, inverse Int + one := NewInt(1) + for i, test := range modInverseTests { + (&element).SetString(test.element, 10) + (&modulus).SetString(test.modulus, 10) + (&inverse).ModInverse(&element, &modulus) + (&inverse).Mul(&inverse, &element) + (&inverse).Mod(&inverse, &modulus) + if (&inverse).Cmp(one) != 0 { + t.Errorf("#%d: failed (e·e^(-1)=%s)", i, &inverse) + } + } + // exhaustive test for small values + for n := 2; n < 100; n++ { + (&modulus).SetInt64(int64(n)) + for x := 1; x < n; x++ { + (&element).SetInt64(int64(x)) + (&gcd).GCD(nil, nil, &element, &modulus) + if (&gcd).Cmp(one) != 0 { + continue + } + (&inverse).ModInverse(&element, &modulus) + (&inverse).Mul(&inverse, &element) + (&inverse).Mod(&inverse, &modulus) + if (&inverse).Cmp(one) != 0 { + t.Errorf("ModInverse(%d,%d)*%d%%%d=%d, not 1", &element, &modulus, &element, &modulus, &inverse) + } + } + } +} + +var encodingTests = []string{ + "-539345864568634858364538753846587364875430589374589", + "-678645873", + "-100", + "-2", + "-1", + "0", + "1", + "2", + "10", + "42", + "1234567890", + "298472983472983471903246121093472394872319615612417471234712061", +} + +func TestIntGobEncoding(t *testing.T) { + var medium bytes.Buffer + enc := gob.NewEncoder(&medium) + dec := gob.NewDecoder(&medium) + for _, test := range encodingTests { + medium.Reset() // empty buffer for each test case (in case of failures) + var tx Int + tx.SetString(test, 10) + if err := enc.Encode(&tx); err != nil { + t.Errorf("encoding of %s failed: %s", &tx, err) + } + var rx Int + if err := dec.Decode(&rx); err != nil { + t.Errorf("decoding of %s failed: %s", &tx, err) + } + if rx.Cmp(&tx) != 0 { + t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx) + } + } +} + +// Sending a nil Int pointer (inside a slice) on a round trip through gob should yield a zero. +// TODO: top-level nils. +func TestGobEncodingNilIntInSlice(t *testing.T) { + buf := new(bytes.Buffer) + enc := gob.NewEncoder(buf) + dec := gob.NewDecoder(buf) + + var in = make([]*Int, 1) + err := enc.Encode(&in) + if err != nil { + t.Errorf("gob encode failed: %q", err) + } + var out []*Int + err = dec.Decode(&out) + if err != nil { + t.Fatalf("gob decode failed: %q", err) + } + if len(out) != 1 { + t.Fatalf("wrong len; want 1 got %d", len(out)) + } + var zero Int + if out[0].Cmp(&zero) != 0 { + t.Errorf("transmission of (*Int)(nill) failed: got %s want 0", out) + } +} + +func TestIntJSONEncoding(t *testing.T) { + for _, test := range encodingTests { + var tx Int + tx.SetString(test, 10) + b, err := json.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + } + var rx Int + if err := json.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + } + if rx.Cmp(&tx) != 0 { + t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } +} + +var intVals = []string{ + "-141592653589793238462643383279502884197169399375105820974944592307816406286", + "-1415926535897932384626433832795028841971", + "-141592653589793", + "-1", + "0", + "1", + "141592653589793", + "1415926535897932384626433832795028841971", + "141592653589793238462643383279502884197169399375105820974944592307816406286", +} + +func TestIntJSONEncodingTextMarshaller(t *testing.T) { + for _, num := range intVals { + var tx Int + tx.SetString(num, 0) + b, err := json.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Int + if err := json.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } +} + +func TestIntXMLEncodingTextMarshaller(t *testing.T) { + for _, num := range intVals { + var tx Int + tx.SetString(num, 0) + b, err := xml.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Int + if err := xml.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } +} + +func TestIssue2607(t *testing.T) { + // This code sequence used to hang. + n := NewInt(10) + n.Rand(rand.New(rand.NewSource(9)), n) +} diff --git a/src/math/big/nat.go b/src/math/big/nat.go new file mode 100644 index 000000000..16a87f5c5 --- /dev/null +++ b/src/math/big/nat.go @@ -0,0 +1,1508 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package big implements multi-precision arithmetic (big numbers). +// The following numeric types are supported: +// +// - Int signed integers +// - Rat rational numbers +// +// Methods are typically of the form: +// +// func (z *Int) Op(x, y *Int) *Int (similar for *Rat) +// +// and implement operations z = x Op y with the result as receiver; if it +// is one of the operands it may be overwritten (and its memory reused). +// To enable chaining of operations, the result is also returned. Methods +// returning a result other than *Int or *Rat take one of the operands as +// the receiver. +// +package big + +// This file contains operations on unsigned multi-precision integers. +// These are the building blocks for the operations on signed integers +// and rationals. + +import ( + "errors" + "io" + "math" + "math/rand" + "sync" +) + +// An unsigned integer x of the form +// +// x = x[n-1]*_B^(n-1) + x[n-2]*_B^(n-2) + ... + x[1]*_B + x[0] +// +// with 0 <= x[i] < _B and 0 <= i < n is stored in a slice of length n, +// with the digits x[i] as the slice elements. +// +// A number is normalized if the slice contains no leading 0 digits. +// During arithmetic operations, denormalized values may occur but are +// always normalized before returning the final result. The normalized +// representation of 0 is the empty or nil slice (length = 0). +// +type nat []Word + +var ( + natOne = nat{1} + natTwo = nat{2} + natTen = nat{10} +) + +func (z nat) clear() { + for i := range z { + z[i] = 0 + } +} + +func (z nat) norm() nat { + i := len(z) + for i > 0 && z[i-1] == 0 { + i-- + } + return z[0:i] +} + +func (z nat) make(n int) nat { + if n <= cap(z) { + return z[0:n] // reuse z + } + // Choosing a good value for e has significant performance impact + // because it increases the chance that a value can be reused. + const e = 4 // extra capacity + return make(nat, n, n+e) +} + +func (z nat) setWord(x Word) nat { + if x == 0 { + return z.make(0) + } + z = z.make(1) + z[0] = x + return z +} + +func (z nat) setUint64(x uint64) nat { + // single-digit values + if w := Word(x); uint64(w) == x { + return z.setWord(w) + } + + // compute number of words n required to represent x + n := 0 + for t := x; t > 0; t >>= _W { + n++ + } + + // split x into n words + z = z.make(n) + for i := range z { + z[i] = Word(x & _M) + x >>= _W + } + + return z +} + +func (z nat) set(x nat) nat { + z = z.make(len(x)) + copy(z, x) + return z +} + +func (z nat) add(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.add(y, x) + case m == 0: + // n == 0 because m >= n; result is 0 + return z.make(0) + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m + 1) + c := addVV(z[0:n], x, y) + if m > n { + c = addVW(z[n:m], x[n:], c) + } + z[m] = c + + return z.norm() +} + +func (z nat) sub(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + panic("underflow") + case m == 0: + // n == 0 because m >= n; result is 0 + return z.make(0) + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m) + c := subVV(z[0:n], x, y) + if m > n { + c = subVW(z[n:], x[n:], c) + } + if c != 0 { + panic("underflow") + } + + return z.norm() +} + +func (x nat) cmp(y nat) (r int) { + m := len(x) + n := len(y) + if m != n || m == 0 { + switch { + case m < n: + r = -1 + case m > n: + r = 1 + } + return + } + + i := m - 1 + for i > 0 && x[i] == y[i] { + i-- + } + + switch { + case x[i] < y[i]: + r = -1 + case x[i] > y[i]: + r = 1 + } + return +} + +func (z nat) mulAddWW(x nat, y, r Word) nat { + m := len(x) + if m == 0 || y == 0 { + return z.setWord(r) // result is r + } + // m > 0 + + z = z.make(m + 1) + z[m] = mulAddVWW(z[0:m], x, y, r) + + return z.norm() +} + +// basicMul multiplies x and y and leaves the result in z. +// The (non-normalized) result is placed in z[0 : len(x) + len(y)]. +func basicMul(z, x, y nat) { + z[0 : len(x)+len(y)].clear() // initialize z + for i, d := range y { + if d != 0 { + z[len(x)+i] = addMulVVW(z[i:i+len(x)], x, d) + } + } +} + +// Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks. +// Factored out for readability - do not use outside karatsuba. +func karatsubaAdd(z, x nat, n int) { + if c := addVV(z[0:n], z, x); c != 0 { + addVW(z[n:n+n>>1], z[n:], c) + } +} + +// Like karatsubaAdd, but does subtract. +func karatsubaSub(z, x nat, n int) { + if c := subVV(z[0:n], z, x); c != 0 { + subVW(z[n:n+n>>1], z[n:], c) + } +} + +// Operands that are shorter than karatsubaThreshold are multiplied using +// "grade school" multiplication; for longer operands the Karatsuba algorithm +// is used. +var karatsubaThreshold int = 40 // computed by calibrate.go + +// karatsuba multiplies x and y and leaves the result in z. +// Both x and y must have the same length n and n must be a +// power of 2. The result vector z must have len(z) >= 6*n. +// The (non-normalized) result is placed in z[0 : 2*n]. +func karatsuba(z, x, y nat) { + n := len(y) + + // Switch to basic multiplication if numbers are odd or small. + // (n is always even if karatsubaThreshold is even, but be + // conservative) + if n&1 != 0 || n < karatsubaThreshold || n < 2 { + basicMul(z, x, y) + return + } + // n&1 == 0 && n >= karatsubaThreshold && n >= 2 + + // Karatsuba multiplication is based on the observation that + // for two numbers x and y with: + // + // x = x1*b + x0 + // y = y1*b + y0 + // + // the product x*y can be obtained with 3 products z2, z1, z0 + // instead of 4: + // + // x*y = x1*y1*b*b + (x1*y0 + x0*y1)*b + x0*y0 + // = z2*b*b + z1*b + z0 + // + // with: + // + // xd = x1 - x0 + // yd = y0 - y1 + // + // z1 = xd*yd + z2 + z0 + // = (x1-x0)*(y0 - y1) + z2 + z0 + // = x1*y0 - x1*y1 - x0*y0 + x0*y1 + z2 + z0 + // = x1*y0 - z2 - z0 + x0*y1 + z2 + z0 + // = x1*y0 + x0*y1 + + // split x, y into "digits" + n2 := n >> 1 // n2 >= 1 + x1, x0 := x[n2:], x[0:n2] // x = x1*b + y0 + y1, y0 := y[n2:], y[0:n2] // y = y1*b + y0 + + // z is used for the result and temporary storage: + // + // 6*n 5*n 4*n 3*n 2*n 1*n 0*n + // z = [z2 copy|z0 copy| xd*yd | yd:xd | x1*y1 | x0*y0 ] + // + // For each recursive call of karatsuba, an unused slice of + // z is passed in that has (at least) half the length of the + // caller's z. + + // compute z0 and z2 with the result "in place" in z + karatsuba(z, x0, y0) // z0 = x0*y0 + karatsuba(z[n:], x1, y1) // z2 = x1*y1 + + // compute xd (or the negative value if underflow occurs) + s := 1 // sign of product xd*yd + xd := z[2*n : 2*n+n2] + if subVV(xd, x1, x0) != 0 { // x1-x0 + s = -s + subVV(xd, x0, x1) // x0-x1 + } + + // compute yd (or the negative value if underflow occurs) + yd := z[2*n+n2 : 3*n] + if subVV(yd, y0, y1) != 0 { // y0-y1 + s = -s + subVV(yd, y1, y0) // y1-y0 + } + + // p = (x1-x0)*(y0-y1) == x1*y0 - x1*y1 - x0*y0 + x0*y1 for s > 0 + // p = (x0-x1)*(y0-y1) == x0*y0 - x0*y1 - x1*y0 + x1*y1 for s < 0 + p := z[n*3:] + karatsuba(p, xd, yd) + + // save original z2:z0 + // (ok to use upper half of z since we're done recursing) + r := z[n*4:] + copy(r, z[:n*2]) + + // add up all partial products + // + // 2*n n 0 + // z = [ z2 | z0 ] + // + [ z0 ] + // + [ z2 ] + // + [ p ] + // + karatsubaAdd(z[n2:], r, n) + karatsubaAdd(z[n2:], r[n:], n) + if s > 0 { + karatsubaAdd(z[n2:], p, n) + } else { + karatsubaSub(z[n2:], p, n) + } +} + +// alias returns true if x and y share the same base array. +func alias(x, y nat) bool { + return cap(x) > 0 && cap(y) > 0 && &x[0:cap(x)][cap(x)-1] == &y[0:cap(y)][cap(y)-1] +} + +// addAt implements z += x<<(_W*i); z must be long enough. +// (we don't use nat.add because we need z to stay the same +// slice, and we don't need to normalize z after each addition) +func addAt(z, x nat, i int) { + if n := len(x); n > 0 { + if c := addVV(z[i:i+n], z[i:], x); c != 0 { + j := i + n + if j < len(z) { + addVW(z[j:], z[j:], c) + } + } + } +} + +func max(x, y int) int { + if x > y { + return x + } + return y +} + +// karatsubaLen computes an approximation to the maximum k <= n such that +// k = p<<i for a number p <= karatsubaThreshold and an i >= 0. Thus, the +// result is the largest number that can be divided repeatedly by 2 before +// becoming about the value of karatsubaThreshold. +func karatsubaLen(n int) int { + i := uint(0) + for n > karatsubaThreshold { + n >>= 1 + i++ + } + return n << i +} + +func (z nat) mul(x, y nat) nat { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.mul(y, x) + case m == 0 || n == 0: + return z.make(0) + case n == 1: + return z.mulAddWW(x, y[0], 0) + } + // m >= n > 1 + + // determine if z can be reused + if alias(z, x) || alias(z, y) { + z = nil // z is an alias for x or y - cannot reuse + } + + // use basic multiplication if the numbers are small + if n < karatsubaThreshold { + z = z.make(m + n) + basicMul(z, x, y) + return z.norm() + } + // m >= n && n >= karatsubaThreshold && n >= 2 + + // determine Karatsuba length k such that + // + // x = xh*b + x0 (0 <= x0 < b) + // y = yh*b + y0 (0 <= y0 < b) + // b = 1<<(_W*k) ("base" of digits xi, yi) + // + k := karatsubaLen(n) + // k <= n + + // multiply x0 and y0 via Karatsuba + x0 := x[0:k] // x0 is not normalized + y0 := y[0:k] // y0 is not normalized + z = z.make(max(6*k, m+n)) // enough space for karatsuba of x0*y0 and full result of x*y + karatsuba(z, x0, y0) + z = z[0 : m+n] // z has final length but may be incomplete + z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) + + // If xh != 0 or yh != 0, add the missing terms to z. For + // + // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) + // yh = y1*b (0 <= y1 < b) + // + // the missing terms are + // + // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 + // + // since all the yi for i > 1 are 0 by choice of k: If any of them + // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would + // be a larger valid threshold contradicting the assumption about k. + // + if k < n || m != n { + var t nat + + // add x0*y1*b + x0 := x0.norm() + y1 := y[k:] // y1 is normalized because y is + t = t.mul(x0, y1) // update t so we don't lose t's underlying array + addAt(z, t, k) + + // add xi*y0<<i, xi*y1*b<<(i+k) + y0 := y0.norm() + for i := k; i < len(x); i += k { + xi := x[i:] + if len(xi) > k { + xi = xi[:k] + } + xi = xi.norm() + t = t.mul(xi, y0) + addAt(z, t, i) + t = t.mul(xi, y1) + addAt(z, t, i+k) + } + } + + return z.norm() +} + +// mulRange computes the product of all the unsigned integers in the +// range [a, b] inclusively. If a > b (empty range), the result is 1. +func (z nat) mulRange(a, b uint64) nat { + switch { + case a == 0: + // cut long ranges short (optimization) + return z.setUint64(0) + case a > b: + return z.setUint64(1) + case a == b: + return z.setUint64(a) + case a+1 == b: + return z.mul(nat(nil).setUint64(a), nat(nil).setUint64(b)) + } + m := (a + b) / 2 + return z.mul(nat(nil).mulRange(a, m), nat(nil).mulRange(m+1, b)) +} + +// q = (x-r)/y, with 0 <= r < y +func (z nat) divW(x nat, y Word) (q nat, r Word) { + m := len(x) + switch { + case y == 0: + panic("division by zero") + case y == 1: + q = z.set(x) // result is x + return + case m == 0: + q = z.make(0) // result is 0 + return + } + // m > 0 + z = z.make(m) + r = divWVW(z, 0, x, y) + q = z.norm() + return +} + +func (z nat) div(z2, u, v nat) (q, r nat) { + if len(v) == 0 { + panic("division by zero") + } + + if u.cmp(v) < 0 { + q = z.make(0) + r = z2.set(u) + return + } + + if len(v) == 1 { + var r2 Word + q, r2 = z.divW(u, v[0]) + r = z2.setWord(r2) + return + } + + q, r = z.divLarge(z2, u, v) + return +} + +// q = (uIn-r)/v, with 0 <= r < y +// Uses z as storage for q, and u as storage for r if possible. +// See Knuth, Volume 2, section 4.3.1, Algorithm D. +// Preconditions: +// len(v) >= 2 +// len(uIn) >= len(v) +func (z nat) divLarge(u, uIn, v nat) (q, r nat) { + n := len(v) + m := len(uIn) - n + + // determine if z can be reused + // TODO(gri) should find a better solution - this if statement + // is very costly (see e.g. time pidigits -s -n 10000) + if alias(z, uIn) || alias(z, v) { + z = nil // z is an alias for uIn or v - cannot reuse + } + q = z.make(m + 1) + + qhatv := make(nat, n+1) + if alias(u, uIn) || alias(u, v) { + u = nil // u is an alias for uIn or v - cannot reuse + } + u = u.make(len(uIn) + 1) + u.clear() + + // D1. + shift := leadingZeros(v[n-1]) + if shift > 0 { + // do not modify v, it may be used by another goroutine simultaneously + v1 := make(nat, n) + shlVU(v1, v, shift) + v = v1 + } + u[len(uIn)] = shlVU(u[0:len(uIn)], uIn, shift) + + // D2. + for j := m; j >= 0; j-- { + // D3. + qhat := Word(_M) + if u[j+n] != v[n-1] { + var rhat Word + qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1]) + + // x1 | x2 = q̂v_{n-2} + x1, x2 := mulWW(qhat, v[n-2]) + // test if q̂v_{n-2} > br̂ + u_{j+n-2} + for greaterThan(x1, x2, rhat, u[j+n-2]) { + qhat-- + prevRhat := rhat + rhat += v[n-1] + // v[n-1] >= 0, so this tests for overflow. + if rhat < prevRhat { + break + } + x1, x2 = mulWW(qhat, v[n-2]) + } + } + + // D4. + qhatv[n] = mulAddVWW(qhatv[0:n], v, qhat, 0) + + c := subVV(u[j:j+len(qhatv)], u[j:], qhatv) + if c != 0 { + c := addVV(u[j:j+n], u[j:], v) + u[j+n] += c + qhat-- + } + + q[j] = qhat + } + + q = q.norm() + shrVU(u, u, shift) + r = u.norm() + + return q, r +} + +// Length of x in bits. x must be normalized. +func (x nat) bitLen() int { + if i := len(x) - 1; i >= 0 { + return i*_W + bitLen(x[i]) + } + return 0 +} + +// MaxBase is the largest number base accepted for string conversions. +const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1 + +func hexValue(ch rune) Word { + d := int(MaxBase + 1) // illegal base + switch { + case '0' <= ch && ch <= '9': + d = int(ch - '0') + case 'a' <= ch && ch <= 'z': + d = int(ch - 'a' + 10) + case 'A' <= ch && ch <= 'Z': + d = int(ch - 'A' + 10) + } + return Word(d) +} + +// scan sets z to the natural number corresponding to the longest possible prefix +// read from r representing an unsigned integer in a given conversion base. +// It returns z, the actual conversion base used, and an error, if any. In the +// error case, the value of z is undefined. The syntax follows the syntax of +// unsigned integer literals in Go. +// +// The base argument must be 0 or a value from 2 through MaxBase. If the base +// is 0, the string prefix determines the actual conversion base. A prefix of +// ``0x'' or ``0X'' selects base 16; the ``0'' prefix selects base 8, and a +// ``0b'' or ``0B'' prefix selects base 2. Otherwise the selected base is 10. +// +func (z nat) scan(r io.RuneScanner, base int) (nat, int, error) { + // reject illegal bases + if base < 0 || base == 1 || MaxBase < base { + return z, 0, errors.New("illegal number base") + } + + // one char look-ahead + ch, _, err := r.ReadRune() + if err != nil { + return z, 0, err + } + + // determine base if necessary + b := Word(base) + if base == 0 { + b = 10 + if ch == '0' { + switch ch, _, err = r.ReadRune(); err { + case nil: + b = 8 + switch ch { + case 'x', 'X': + b = 16 + case 'b', 'B': + b = 2 + } + if b == 2 || b == 16 { + if ch, _, err = r.ReadRune(); err != nil { + return z, 0, err + } + } + case io.EOF: + return z.make(0), 10, nil + default: + return z, 10, err + } + } + } + + // convert string + // - group as many digits d as possible together into a "super-digit" dd with "super-base" bb + // - only when bb does not fit into a word anymore, do a full number mulAddWW using bb and dd + z = z.make(0) + bb := Word(1) + dd := Word(0) + for max := _M / b; ; { + d := hexValue(ch) + if d >= b { + r.UnreadRune() // ch does not belong to number anymore + break + } + + if bb <= max { + bb *= b + dd = dd*b + d + } else { + // bb * b would overflow + z = z.mulAddWW(z, bb, dd) + bb = b + dd = d + } + + if ch, _, err = r.ReadRune(); err != nil { + if err != io.EOF { + return z, int(b), err + } + break + } + } + + switch { + case bb > 1: + // there was at least one mantissa digit + z = z.mulAddWW(z, bb, dd) + case base == 0 && b == 8: + // there was only the octal prefix 0 (possibly followed by digits > 7); + // return base 10, not 8 + return z, 10, nil + case base != 0 || b != 8: + // there was neither a mantissa digit nor the octal prefix 0 + return z, int(b), errors.New("syntax error scanning number") + } + + return z.norm(), int(b), nil +} + +// Character sets for string conversion. +const ( + lowercaseDigits = "0123456789abcdefghijklmnopqrstuvwxyz" + uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" +) + +// decimalString returns a decimal representation of x. +// It calls x.string with the charset "0123456789". +func (x nat) decimalString() string { + return x.string(lowercaseDigits[0:10]) +} + +// string converts x to a string using digits from a charset; a digit with +// value d is represented by charset[d]. The conversion base is determined +// by len(charset), which must be >= 2 and <= 256. +func (x nat) string(charset string) string { + b := Word(len(charset)) + + // special cases + switch { + case b < 2 || MaxBase > 256: + panic("illegal base") + case len(x) == 0: + return string(charset[0]) + } + + // allocate buffer for conversion + i := int(float64(x.bitLen())/math.Log2(float64(b))) + 1 // off by one at most + s := make([]byte, i) + + // convert power of two and non power of two bases separately + if b == b&-b { + // shift is base-b digit size in bits + shift := trailingZeroBits(b) // shift > 0 because b >= 2 + mask := Word(1)<<shift - 1 + w := x[0] + nbits := uint(_W) // number of unprocessed bits in w + + // convert less-significant words + for k := 1; k < len(x); k++ { + // convert full digits + for nbits >= shift { + i-- + s[i] = charset[w&mask] + w >>= shift + nbits -= shift + } + + // convert any partial leading digit and advance to next word + if nbits == 0 { + // no partial digit remaining, just advance + w = x[k] + nbits = _W + } else { + // partial digit in current (k-1) and next (k) word + w |= x[k] << nbits + i-- + s[i] = charset[w&mask] + + // advance + w = x[k] >> (shift - nbits) + nbits = _W - (shift - nbits) + } + } + + // convert digits of most-significant word (omit leading zeros) + for nbits >= 0 && w != 0 { + i-- + s[i] = charset[w&mask] + w >>= shift + nbits -= shift + } + + } else { + // determine "big base"; i.e., the largest possible value bb + // that is a power of base b and still fits into a Word + // (as in 10^19 for 19 decimal digits in a 64bit Word) + bb := b // big base is b**ndigits + ndigits := 1 // number of base b digits + for max := Word(_M / b); bb <= max; bb *= b { + ndigits++ // maximize ndigits where bb = b**ndigits, bb <= _M + } + + // construct table of successive squares of bb*leafSize to use in subdivisions + // result (table != nil) <=> (len(x) > leafSize > 0) + table := divisors(len(x), b, ndigits, bb) + + // preserve x, create local copy for use by convertWords + q := nat(nil).set(x) + + // convert q to string s in base b + q.convertWords(s, charset, b, ndigits, bb, table) + + // strip leading zeros + // (x != 0; thus s must contain at least one non-zero digit + // and the loop will terminate) + i = 0 + for zero := charset[0]; s[i] == zero; { + i++ + } + } + + return string(s[i:]) +} + +// Convert words of q to base b digits in s. If q is large, it is recursively "split in half" +// by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using +// repeated nat/Word division. +// +// The iterative method processes n Words by n divW() calls, each of which visits every Word in the +// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. +// Recursive conversion divides q by its approximate square root, yielding two parts, each half +// the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s +// plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and +// is made better by splitting the subblocks recursively. Best is to split blocks until one more +// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the +// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the +// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and +// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for +// specific hardware. +// +func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) { + // split larger blocks recursively + if table != nil { + // len(q) > leafSize > 0 + var r nat + index := len(table) - 1 + for len(q) > leafSize { + // find divisor close to sqrt(q) if possible, but in any case < q + maxLength := q.bitLen() // ~= log2 q, or at of least largest possible q of this bit length + minLength := maxLength >> 1 // ~= log2 sqrt(q) + for index > 0 && table[index-1].nbits > minLength { + index-- // desired + } + if table[index].nbits >= maxLength && table[index].bbb.cmp(q) >= 0 { + index-- + if index < 0 { + panic("internal inconsistency") + } + } + + // split q into the two digit number (q'*bbb + r) to form independent subblocks + q, r = q.div(r, q, table[index].bbb) + + // convert subblocks and collect results in s[:h] and s[h:] + h := len(s) - table[index].ndigits + r.convertWords(s[h:], charset, b, ndigits, bb, table[0:index]) + s = s[:h] // == q.convertWords(s, charset, b, ndigits, bb, table[0:index+1]) + } + } + + // having split any large blocks now process the remaining (small) block iteratively + i := len(s) + var r Word + if b == 10 { + // hard-coding for 10 here speeds this up by 1.25x (allows for / and % by constants) + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) + for j := 0; j < ndigits && i > 0; j++ { + i-- + // avoid % computation since r%10 == r - int(r/10)*10; + // this appears to be faster for BenchmarkString10000Base10 + // and smaller strings (but a bit slower for larger ones) + t := r / 10 + s[i] = charset[r-t<<3-t-t] // TODO(gri) replace w/ t*10 once compiler produces better code + r = t + } + } + } else { + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) + for j := 0; j < ndigits && i > 0; j++ { + i-- + s[i] = charset[r%b] + r /= b + } + } + } + + // prepend high-order zeroes + zero := charset[0] + for i > 0 { // while need more leading zeroes + i-- + s[i] = zero + } +} + +// Split blocks greater than leafSize Words (or set to 0 to disable recursive conversion) +// Benchmark and configure leafSize using: go test -bench="Leaf" +// 8 and 16 effective on 3.0 GHz Xeon "Clovertown" CPU (128 byte cache lines) +// 8 and 16 effective on 2.66 GHz Core 2 Duo "Penryn" CPU +var leafSize int = 8 // number of Word-size binary values treat as a monolithic block + +type divisor struct { + bbb nat // divisor + nbits int // bit length of divisor (discounting leading zeroes) ~= log2(bbb) + ndigits int // digit length of divisor in terms of output base digits +} + +var cacheBase10 struct { + sync.Mutex + table [64]divisor // cached divisors for base 10 +} + +// expWW computes x**y +func (z nat) expWW(x, y Word) nat { + return z.expNN(nat(nil).setWord(x), nat(nil).setWord(y), nil) +} + +// construct table of powers of bb*leafSize to use in subdivisions +func divisors(m int, b Word, ndigits int, bb Word) []divisor { + // only compute table when recursive conversion is enabled and x is large + if leafSize == 0 || m <= leafSize { + return nil + } + + // determine k where (bb**leafSize)**(2**k) >= sqrt(x) + k := 1 + for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 { + k++ + } + + // reuse and extend existing table of divisors or create new table as appropriate + var table []divisor // for b == 10, table overlaps with cacheBase10.table + if b == 10 { + cacheBase10.Lock() + table = cacheBase10.table[0:k] // reuse old table for this conversion + } else { + table = make([]divisor, k) // create new table for this conversion + } + + // extend table + if table[k-1].ndigits == 0 { + // add new entries as needed + var larger nat + for i := 0; i < k; i++ { + if table[i].ndigits == 0 { + if i == 0 { + table[0].bbb = nat(nil).expWW(bb, Word(leafSize)) + table[0].ndigits = ndigits * leafSize + } else { + table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb) + table[i].ndigits = 2 * table[i-1].ndigits + } + + // optimization: exploit aggregated extra bits in macro blocks + larger = nat(nil).set(table[i].bbb) + for mulAddVWW(larger, larger, b, 0) == 0 { + table[i].bbb = table[i].bbb.set(larger) + table[i].ndigits++ + } + + table[i].nbits = table[i].bbb.bitLen() + } + } + } + + if b == 10 { + cacheBase10.Unlock() + } + + return table +} + +const deBruijn32 = 0x077CB531 + +var deBruijn32Lookup = []byte{ + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, +} + +const deBruijn64 = 0x03f79d71b4ca8b09 + +var deBruijn64Lookup = []byte{ + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +} + +// trailingZeroBits returns the number of consecutive least significant zero +// bits of x. +func trailingZeroBits(x Word) uint { + // x & -x leaves only the right-most bit set in the word. Let k be the + // index of that bit. Since only a single bit is set, the value is two + // to the power of k. Multiplying by a power of two is equivalent to + // left shifting, in this case by k bits. The de Bruijn constant is + // such that all six bit, consecutive substrings are distinct. + // Therefore, if we have a left shifted version of this constant we can + // find by how many bits it was shifted by looking at which six bit + // substring ended up at the top of the word. + // (Knuth, volume 4, section 7.3.1) + switch _W { + case 32: + return uint(deBruijn32Lookup[((x&-x)*deBruijn32)>>27]) + case 64: + return uint(deBruijn64Lookup[((x&-x)*(deBruijn64&_M))>>58]) + default: + panic("unknown word size") + } +} + +// trailingZeroBits returns the number of consecutive least significant zero +// bits of x. +func (x nat) trailingZeroBits() uint { + if len(x) == 0 { + return 0 + } + var i uint + for x[i] == 0 { + i++ + } + // x[i] != 0 + return i*_W + trailingZeroBits(x[i]) +} + +// z = x << s +func (z nat) shl(x nat, s uint) nat { + m := len(x) + if m == 0 { + return z.make(0) + } + // m > 0 + + n := m + int(s/_W) + z = z.make(n + 1) + z[n] = shlVU(z[n-m:n], x, s%_W) + z[0 : n-m].clear() + + return z.norm() +} + +// z = x >> s +func (z nat) shr(x nat, s uint) nat { + m := len(x) + n := m - int(s/_W) + if n <= 0 { + return z.make(0) + } + // n > 0 + + z = z.make(n) + shrVU(z, x[m-n:], s%_W) + + return z.norm() +} + +func (z nat) setBit(x nat, i uint, b uint) nat { + j := int(i / _W) + m := Word(1) << (i % _W) + n := len(x) + switch b { + case 0: + z = z.make(n) + copy(z, x) + if j >= n { + // no need to grow + return z + } + z[j] &^= m + return z.norm() + case 1: + if j >= n { + z = z.make(j + 1) + z[n:].clear() + } else { + z = z.make(n) + } + copy(z, x) + z[j] |= m + // no need to normalize + return z + } + panic("set bit is not 0 or 1") +} + +func (z nat) bit(i uint) uint { + j := int(i / _W) + if j >= len(z) { + return 0 + } + return uint(z[j] >> (i % _W) & 1) +} + +func (z nat) and(x, y nat) nat { + m := len(x) + n := len(y) + if m > n { + m = n + } + // m <= n + + z = z.make(m) + for i := 0; i < m; i++ { + z[i] = x[i] & y[i] + } + + return z.norm() +} + +func (z nat) andNot(x, y nat) nat { + m := len(x) + n := len(y) + if n > m { + n = m + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] &^ y[i] + } + copy(z[n:m], x[n:m]) + + return z.norm() +} + +func (z nat) or(x, y nat) nat { + m := len(x) + n := len(y) + s := x + if m < n { + n, m = m, n + s = y + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] | y[i] + } + copy(z[n:m], s[n:m]) + + return z.norm() +} + +func (z nat) xor(x, y nat) nat { + m := len(x) + n := len(y) + s := x + if m < n { + n, m = m, n + s = y + } + // m >= n + + z = z.make(m) + for i := 0; i < n; i++ { + z[i] = x[i] ^ y[i] + } + copy(z[n:m], s[n:m]) + + return z.norm() +} + +// greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2) +func greaterThan(x1, x2, y1, y2 Word) bool { + return x1 > y1 || x1 == y1 && x2 > y2 +} + +// modW returns x % d. +func (x nat) modW(d Word) (r Word) { + // TODO(agl): we don't actually need to store the q value. + var q nat + q = q.make(len(x)) + return divWVW(q, 0, x, d) +} + +// random creates a random integer in [0..limit), using the space in z if +// possible. n is the bit length of limit. +func (z nat) random(rand *rand.Rand, limit nat, n int) nat { + if alias(z, limit) { + z = nil // z is an alias for limit - cannot reuse + } + z = z.make(len(limit)) + + bitLengthOfMSW := uint(n % _W) + if bitLengthOfMSW == 0 { + bitLengthOfMSW = _W + } + mask := Word((1 << bitLengthOfMSW) - 1) + + for { + switch _W { + case 32: + for i := range z { + z[i] = Word(rand.Uint32()) + } + case 64: + for i := range z { + z[i] = Word(rand.Uint32()) | Word(rand.Uint32())<<32 + } + default: + panic("unknown word size") + } + z[len(limit)-1] &= mask + if z.cmp(limit) < 0 { + break + } + } + + return z.norm() +} + +// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; +// otherwise it sets z to x**y. The result is the value of z. +func (z nat) expNN(x, y, m nat) nat { + if alias(z, x) || alias(z, y) { + // We cannot allow in-place modification of x or y. + z = nil + } + + // x**y mod 1 == 0 + if len(m) == 1 && m[0] == 1 { + return z.setWord(0) + } + // m == 0 || m > 1 + + // x**0 == 1 + if len(y) == 0 { + return z.setWord(1) + } + // y > 0 + + if len(m) != 0 { + // We likely end up being as long as the modulus. + z = z.make(len(m)) + } + z = z.set(x) + + // If the base is non-trivial and the exponent is large, we use + // 4-bit, windowed exponentiation. This involves precomputing 14 values + // (x^2...x^15) but then reduces the number of multiply-reduces by a + // third. Even for a 32-bit exponent, this reduces the number of + // operations. + if len(x) > 1 && len(y) > 1 && len(m) > 0 { + return z.expNNWindowed(x, y, m) + } + + v := y[len(y)-1] // v > 0 because y is normalized and y > 0 + shift := leadingZeros(v) + 1 + v <<= shift + var q nat + + const mask = 1 << (_W - 1) + + // We walk through the bits of the exponent one by one. Each time we + // see a bit, we square, thus doubling the power. If the bit is a one, + // we also multiply by x, thus adding one to the power. + + w := _W - int(shift) + // zz and r are used to avoid allocating in mul and div as + // otherwise the arguments would alias. + var zz, r nat + for j := 0; j < w; j++ { + zz = zz.mul(z, z) + zz, z = z, zz + + if v&mask != 0 { + zz = zz.mul(z, x) + zz, z = z, zz + } + + if len(m) != 0 { + zz, r = zz.div(r, z, m) + zz, r, q, z = q, z, zz, r + } + + v <<= 1 + } + + for i := len(y) - 2; i >= 0; i-- { + v = y[i] + + for j := 0; j < _W; j++ { + zz = zz.mul(z, z) + zz, z = z, zz + + if v&mask != 0 { + zz = zz.mul(z, x) + zz, z = z, zz + } + + if len(m) != 0 { + zz, r = zz.div(r, z, m) + zz, r, q, z = q, z, zz, r + } + + v <<= 1 + } + } + + return z.norm() +} + +// expNNWindowed calculates x**y mod m using a fixed, 4-bit window. +func (z nat) expNNWindowed(x, y, m nat) nat { + // zz and r are used to avoid allocating in mul and div as otherwise + // the arguments would alias. + var zz, r nat + + const n = 4 + // powers[i] contains x^i. + var powers [1 << n]nat + powers[0] = natOne + powers[1] = x + for i := 2; i < 1<<n; i += 2 { + p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1] + *p = p.mul(*p2, *p2) + zz, r = zz.div(r, *p, m) + *p, r = r, *p + *p1 = p1.mul(*p, x) + zz, r = zz.div(r, *p1, m) + *p1, r = r, *p1 + } + + z = z.setWord(1) + + for i := len(y) - 1; i >= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + // Unrolled loop for significant performance + // gain. Use go test -bench=".*" in crypto/rsa + // to check performance before making changes. + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + } + + zz = zz.mul(z, powers[yi>>(_W-n)]) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + yi <<= n + } + } + + return z.norm() +} + +// probablyPrime performs reps Miller-Rabin tests to check whether n is prime. +// If it returns true, n is prime with probability 1 - 1/4^reps. +// If it returns false, n is not prime. +func (n nat) probablyPrime(reps int) bool { + if len(n) == 0 { + return false + } + + if len(n) == 1 { + if n[0] < 2 { + return false + } + + if n[0]%2 == 0 { + return n[0] == 2 + } + + // We have to exclude these cases because we reject all + // multiples of these numbers below. + switch n[0] { + case 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53: + return true + } + } + + const primesProduct32 = 0xC0CFD797 // Π {p ∈ primes, 2 < p <= 29} + const primesProduct64 = 0xE221F97C30E94E1D // Π {p ∈ primes, 2 < p <= 53} + + var r Word + switch _W { + case 32: + r = n.modW(primesProduct32) + case 64: + r = n.modW(primesProduct64 & _M) + default: + panic("Unknown word size") + } + + if r%3 == 0 || r%5 == 0 || r%7 == 0 || r%11 == 0 || + r%13 == 0 || r%17 == 0 || r%19 == 0 || r%23 == 0 || r%29 == 0 { + return false + } + + if _W == 64 && (r%31 == 0 || r%37 == 0 || r%41 == 0 || + r%43 == 0 || r%47 == 0 || r%53 == 0) { + return false + } + + nm1 := nat(nil).sub(n, natOne) + // determine q, k such that nm1 = q << k + k := nm1.trailingZeroBits() + q := nat(nil).shr(nm1, k) + + nm3 := nat(nil).sub(nm1, natTwo) + rand := rand.New(rand.NewSource(int64(n[0]))) + + var x, y, quotient nat + nm3Len := nm3.bitLen() + +NextRandom: + for i := 0; i < reps; i++ { + x = x.random(rand, nm3, nm3Len) + x = x.add(x, natTwo) + y = y.expNN(x, q, n) + if y.cmp(natOne) == 0 || y.cmp(nm1) == 0 { + continue + } + for j := uint(1); j < k; j++ { + y = y.mul(y, y) + quotient, y = quotient.div(y, y, n) + if y.cmp(nm1) == 0 { + continue NextRandom + } + if y.cmp(natOne) == 0 { + return false + } + } + return false + } + + return true +} + +// bytes writes the value of z into buf using big-endian encoding. +// len(buf) must be >= len(z)*_S. The value of z is encoded in the +// slice buf[i:]. The number i of unused bytes at the beginning of +// buf is returned as result. +func (z nat) bytes(buf []byte) (i int) { + i = len(buf) + for _, d := range z { + for j := 0; j < _S; j++ { + i-- + buf[i] = byte(d) + d >>= 8 + } + } + + for i < len(buf) && buf[i] == 0 { + i++ + } + + return +} + +// setBytes interprets buf as the bytes of a big-endian unsigned +// integer, sets z to that value, and returns z. +func (z nat) setBytes(buf []byte) nat { + z = z.make((len(buf) + _S - 1) / _S) + + k := 0 + s := uint(0) + var d Word + for i := len(buf); i > 0; i-- { + d |= Word(buf[i-1]) << s + if s += 8; s == _S*8 { + z[k] = d + k++ + s = 0 + d = 0 + } + } + if k < len(z) { + z[k] = d + } + + return z.norm() +} diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go new file mode 100644 index 000000000..a2ae53385 --- /dev/null +++ b/src/math/big/nat_test.go @@ -0,0 +1,771 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "io" + "runtime" + "strings" + "testing" +) + +var cmpTests = []struct { + x, y nat + r int +}{ + {nil, nil, 0}, + {nil, nat(nil), 0}, + {nat(nil), nil, 0}, + {nat(nil), nat(nil), 0}, + {nat{0}, nat{0}, 0}, + {nat{0}, nat{1}, -1}, + {nat{1}, nat{0}, 1}, + {nat{1}, nat{1}, 0}, + {nat{0, _M}, nat{1}, 1}, + {nat{1}, nat{0, _M}, -1}, + {nat{1, _M}, nat{0, _M}, 1}, + {nat{0, _M}, nat{1, _M}, -1}, + {nat{16, 571956, 8794, 68}, nat{837, 9146, 1, 754489}, -1}, + {nat{34986, 41, 105, 1957}, nat{56, 7458, 104, 1957}, 1}, +} + +func TestCmp(t *testing.T) { + for i, a := range cmpTests { + r := a.x.cmp(a.y) + if r != a.r { + t.Errorf("#%d got r = %v; want %v", i, r, a.r) + } + } +} + +type funNN func(z, x, y nat) nat +type argNN struct { + z, x, y nat +} + +var sumNN = []argNN{ + {}, + {nat{1}, nil, nat{1}}, + {nat{1111111110}, nat{123456789}, nat{987654321}}, + {nat{0, 0, 0, 1}, nil, nat{0, 0, 0, 1}}, + {nat{0, 0, 0, 1111111110}, nat{0, 0, 0, 123456789}, nat{0, 0, 0, 987654321}}, + {nat{0, 0, 0, 1}, nat{0, 0, _M}, nat{0, 0, 1}}, +} + +var prodNN = []argNN{ + {}, + {nil, nil, nil}, + {nil, nat{991}, nil}, + {nat{991}, nat{991}, nat{1}}, + {nat{991 * 991}, nat{991}, nat{991}}, + {nat{0, 0, 991 * 991}, nat{0, 991}, nat{0, 991}}, + {nat{1 * 991, 2 * 991, 3 * 991, 4 * 991}, nat{1, 2, 3, 4}, nat{991}}, + {nat{4, 11, 20, 30, 20, 11, 4}, nat{1, 2, 3, 4}, nat{4, 3, 2, 1}}, + // 3^100 * 3^28 = 3^128 + { + natFromString("11790184577738583171520872861412518665678211592275841109096961"), + natFromString("515377520732011331036461129765621272702107522001"), + natFromString("22876792454961"), + }, + // z = 111....1 (70000 digits) + // x = 10^(99*700) + ... + 10^1400 + 10^700 + 1 + // y = 111....1 (700 digits, larger than Karatsuba threshold on 32-bit and 64-bit) + { + natFromString(strings.Repeat("1", 70000)), + natFromString("1" + strings.Repeat(strings.Repeat("0", 699)+"1", 99)), + natFromString(strings.Repeat("1", 700)), + }, + // z = 111....1 (20000 digits) + // x = 10^10000 + 1 + // y = 111....1 (10000 digits) + { + natFromString(strings.Repeat("1", 20000)), + natFromString("1" + strings.Repeat("0", 9999) + "1"), + natFromString(strings.Repeat("1", 10000)), + }, +} + +func natFromString(s string) nat { + x, _, err := nat(nil).scan(strings.NewReader(s), 0) + if err != nil { + panic(err) + } + return x +} + +func TestSet(t *testing.T) { + for _, a := range sumNN { + z := nat(nil).set(a.z) + if z.cmp(a.z) != 0 { + t.Errorf("got z = %v; want %v", z, a.z) + } + } +} + +func testFunNN(t *testing.T, msg string, f funNN, a argNN) { + z := f(nil, a.x, a.y) + if z.cmp(a.z) != 0 { + t.Errorf("%s%+v\n\tgot z = %v; want %v", msg, a, z, a.z) + } +} + +func TestFunNN(t *testing.T) { + for _, a := range sumNN { + arg := a + testFunNN(t, "add", nat.add, arg) + + arg = argNN{a.z, a.y, a.x} + testFunNN(t, "add symmetric", nat.add, arg) + + arg = argNN{a.x, a.z, a.y} + testFunNN(t, "sub", nat.sub, arg) + + arg = argNN{a.y, a.z, a.x} + testFunNN(t, "sub symmetric", nat.sub, arg) + } + + for _, a := range prodNN { + arg := a + testFunNN(t, "mul", nat.mul, arg) + + arg = argNN{a.z, a.y, a.x} + testFunNN(t, "mul symmetric", nat.mul, arg) + } +} + +var mulRangesN = []struct { + a, b uint64 + prod string +}{ + {0, 0, "0"}, + {1, 1, "1"}, + {1, 2, "2"}, + {1, 3, "6"}, + {10, 10, "10"}, + {0, 100, "0"}, + {0, 1e9, "0"}, + {1, 0, "1"}, // empty range + {100, 1, "1"}, // empty range + {1, 10, "3628800"}, // 10! + {1, 20, "2432902008176640000"}, // 20! + {1, 100, + "933262154439441526816992388562667004907159682643816214685929" + + "638952175999932299156089414639761565182862536979208272237582" + + "51185210916864000000000000000000000000", // 100! + }, +} + +func TestMulRangeN(t *testing.T) { + for i, r := range mulRangesN { + prod := nat(nil).mulRange(r.a, r.b).decimalString() + if prod != r.prod { + t.Errorf("#%d: got %s; want %s", i, prod, r.prod) + } + } +} + +// allocBytes returns the number of bytes allocated by invoking f. +func allocBytes(f func()) uint64 { + var stats runtime.MemStats + runtime.ReadMemStats(&stats) + t := stats.TotalAlloc + f() + runtime.ReadMemStats(&stats) + return stats.TotalAlloc - t +} + +// TestMulUnbalanced tests that multiplying numbers of different lengths +// does not cause deep recursion and in turn allocate too much memory. +// Test case for issue 3807. +func TestMulUnbalanced(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + x := rndNat(50000) + y := rndNat(40) + allocSize := allocBytes(func() { + nat(nil).mul(x, y) + }) + inputSize := uint64(len(x)+len(y)) * _S + if ratio := allocSize / uint64(inputSize); ratio > 10 { + t.Errorf("multiplication uses too much memory (%d > %d times the size of inputs)", allocSize, ratio) + } +} + +func rndNat(n int) nat { + return nat(rndV(n)).norm() +} + +func BenchmarkMul(b *testing.B) { + mulx := rndNat(1e4) + muly := rndNat(1e4) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var z nat + z.mul(mulx, muly) + } +} + +func toString(x nat, charset string) string { + base := len(charset) + + // special cases + switch { + case base < 2: + panic("illegal base") + case len(x) == 0: + return string(charset[0]) + } + + // allocate buffer for conversion + i := x.bitLen()/log2(Word(base)) + 1 // +1: round up + s := make([]byte, i) + + // don't destroy x + q := nat(nil).set(x) + + // convert + for len(q) > 0 { + i-- + var r Word + q, r = q.divW(q, Word(base)) + s[i] = charset[r] + } + + return string(s[i:]) +} + +var strTests = []struct { + x nat // nat value to be converted + c string // conversion charset + s string // expected result +}{ + {nil, "01", "0"}, + {nat{1}, "01", "1"}, + {nat{0xc5}, "01", "11000101"}, + {nat{03271}, lowercaseDigits[0:8], "3271"}, + {nat{10}, lowercaseDigits[0:10], "10"}, + {nat{1234567890}, uppercaseDigits[0:10], "1234567890"}, + {nat{0xdeadbeef}, lowercaseDigits[0:16], "deadbeef"}, + {nat{0xdeadbeef}, uppercaseDigits[0:16], "DEADBEEF"}, + {nat{0x229be7}, lowercaseDigits[0:17], "1a2b3c"}, + {nat{0x309663e6}, uppercaseDigits[0:32], "O9COV6"}, +} + +func TestString(t *testing.T) { + for _, a := range strTests { + s := a.x.string(a.c) + if s != a.s { + t.Errorf("string%+v\n\tgot s = %s; want %s", a, s, a.s) + } + + x, b, err := nat(nil).scan(strings.NewReader(a.s), len(a.c)) + if x.cmp(a.x) != 0 { + t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != len(a.c) { + t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, len(a.c)) + } + if err != nil { + t.Errorf("scan%+v\n\tgot error = %s", a, err) + } + } +} + +var natScanTests = []struct { + s string // string to be scanned + base int // input base + x nat // expected nat + b int // expected base + ok bool // expected success + next rune // next character (or 0, if at EOF) +}{ + // error: illegal base + {base: -1}, + {base: 1}, + {base: 37}, + + // error: no mantissa + {}, + {s: "?"}, + {base: 10}, + {base: 36}, + {s: "?", base: 10}, + {s: "0x"}, + {s: "345", base: 2}, + + // no errors + {"0", 0, nil, 10, true, 0}, + {"0", 10, nil, 10, true, 0}, + {"0", 36, nil, 36, true, 0}, + {"1", 0, nat{1}, 10, true, 0}, + {"1", 10, nat{1}, 10, true, 0}, + {"0 ", 0, nil, 10, true, ' '}, + {"08", 0, nil, 10, true, '8'}, + {"018", 0, nat{1}, 8, true, '8'}, + {"0b1", 0, nat{1}, 2, true, 0}, + {"0b11000101", 0, nat{0xc5}, 2, true, 0}, + {"03271", 0, nat{03271}, 8, true, 0}, + {"10ab", 0, nat{10}, 10, true, 'a'}, + {"1234567890", 0, nat{1234567890}, 10, true, 0}, + {"xyz", 36, nat{(33*36+34)*36 + 35}, 36, true, 0}, + {"xyz?", 36, nat{(33*36+34)*36 + 35}, 36, true, '?'}, + {"0x", 16, nil, 16, true, 'x'}, + {"0xdeadbeef", 0, nat{0xdeadbeef}, 16, true, 0}, + {"0XDEADBEEF", 0, nat{0xdeadbeef}, 16, true, 0}, +} + +func TestScanBase(t *testing.T) { + for _, a := range natScanTests { + r := strings.NewReader(a.s) + x, b, err := nat(nil).scan(r, a.base) + if err == nil && !a.ok { + t.Errorf("scan%+v\n\texpected error", a) + } + if err != nil { + if a.ok { + t.Errorf("scan%+v\n\tgot error = %s", a, err) + } + continue + } + if x.cmp(a.x) != 0 { + t.Errorf("scan%+v\n\tgot z = %v; want %v", a, x, a.x) + } + if b != a.b { + t.Errorf("scan%+v\n\tgot b = %d; want %d", a, b, a.base) + } + next, _, err := r.ReadRune() + if err == io.EOF { + next = 0 + err = nil + } + if err == nil && next != a.next { + t.Errorf("scan%+v\n\tgot next = %q; want %q", a, next, a.next) + } + } +} + +var pi = "3" + + "14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651" + + "32823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461" + + "28475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920" + + "96282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179" + + "31051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798" + + "60943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901" + + "22495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837" + + "29780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083" + + "81420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909" + + "21642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151" + + "55748572424541506959508295331168617278558890750983817546374649393192550604009277016711390098488240128583616035" + + "63707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104" + + "75216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992" + + "45863150302861829745557067498385054945885869269956909272107975093029553211653449872027559602364806654991198818" + + "34797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548" + + "16136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179" + + "04946016534668049886272327917860857843838279679766814541009538837863609506800642251252051173929848960841284886" + + "26945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645" + + "99581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745" + + "53050682034962524517493996514314298091906592509372216964615157098583874105978859597729754989301617539284681382" + + "68683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244" + + "13654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767" + + "88952521385225499546667278239864565961163548862305774564980355936345681743241125150760694794510965960940252288" + + "79710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821" + + "68299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610" + + "21359695362314429524849371871101457654035902799344037420073105785390621983874478084784896833214457138687519435" + + "06430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675" + + "14269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672" + + "21825625996615014215030680384477345492026054146659252014974428507325186660021324340881907104863317346496514539" + + "05796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007" + + "23055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816" + + "90915280173506712748583222871835209353965725121083579151369882091444210067510334671103141267111369908658516398" + + "31501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064" + + "20467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325" + + "97463667305836041428138830320382490375898524374417029132765618093773444030707469211201913020330380197621101100" + + "44929321516084244485963766983895228684783123552658213144957685726243344189303968642624341077322697802807318915" + + "44110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201" + + "85581007293606598764861179104533488503461136576867532494416680396265797877185560845529654126654085306143444318" + + "58676975145661406800700237877659134401712749470420562230538994561314071127000407854733269939081454664645880797" + + "27082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923" + + "09907965473761255176567513575178296664547791745011299614890304639947132962107340437518957359614589019389713111" + + "79042978285647503203198691514028708085990480109412147221317947647772622414254854540332157185306142288137585043" + + "06332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120" + + "91807638327166416274888800786925602902284721040317211860820419000422966171196377921337575114959501566049631862" + + "94726547364252308177036751590673502350728354056704038674351362222477158915049530984448933309634087807693259939" + + "78054193414473774418426312986080998886874132604721569516239658645730216315981931951673538129741677294786724229" + + "24654366800980676928238280689964004824354037014163149658979409243237896907069779422362508221688957383798623001" + + "59377647165122893578601588161755782973523344604281512627203734314653197777416031990665541876397929334419521541" + + "34189948544473456738316249934191318148092777710386387734317720754565453220777092120190516609628049092636019759" + + "88281613323166636528619326686336062735676303544776280350450777235547105859548702790814356240145171806246436267" + + "94561275318134078330336254232783944975382437205835311477119926063813346776879695970309833913077109870408591337" + +// Test case for BenchmarkScanPi. +func TestScanPi(t *testing.T) { + var x nat + z, _, err := x.scan(strings.NewReader(pi), 10) + if err != nil { + t.Errorf("scanning pi: %s", err) + } + if s := z.decimalString(); s != pi { + t.Errorf("scanning pi: got %s", s) + } +} + +func TestScanPiParallel(t *testing.T) { + const n = 2 + c := make(chan int) + for i := 0; i < n; i++ { + go func() { + TestScanPi(t) + c <- 0 + }() + } + for i := 0; i < n; i++ { + <-c + } +} + +func BenchmarkScanPi(b *testing.B) { + for i := 0; i < b.N; i++ { + var x nat + x.scan(strings.NewReader(pi), 10) + } +} + +func BenchmarkStringPiParallel(b *testing.B) { + var x nat + x, _, _ = x.scan(strings.NewReader(pi), 0) + if x.decimalString() != pi { + panic("benchmark incorrect: conversion failed") + } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + x.decimalString() + } + }) +} + +func BenchmarkScan10Base2(b *testing.B) { ScanHelper(b, 2, 10, 10) } +func BenchmarkScan100Base2(b *testing.B) { ScanHelper(b, 2, 10, 100) } +func BenchmarkScan1000Base2(b *testing.B) { ScanHelper(b, 2, 10, 1000) } +func BenchmarkScan10000Base2(b *testing.B) { ScanHelper(b, 2, 10, 10000) } +func BenchmarkScan100000Base2(b *testing.B) { ScanHelper(b, 2, 10, 100000) } + +func BenchmarkScan10Base8(b *testing.B) { ScanHelper(b, 8, 10, 10) } +func BenchmarkScan100Base8(b *testing.B) { ScanHelper(b, 8, 10, 100) } +func BenchmarkScan1000Base8(b *testing.B) { ScanHelper(b, 8, 10, 1000) } +func BenchmarkScan10000Base8(b *testing.B) { ScanHelper(b, 8, 10, 10000) } +func BenchmarkScan100000Base8(b *testing.B) { ScanHelper(b, 8, 10, 100000) } + +func BenchmarkScan10Base10(b *testing.B) { ScanHelper(b, 10, 10, 10) } +func BenchmarkScan100Base10(b *testing.B) { ScanHelper(b, 10, 10, 100) } +func BenchmarkScan1000Base10(b *testing.B) { ScanHelper(b, 10, 10, 1000) } +func BenchmarkScan10000Base10(b *testing.B) { ScanHelper(b, 10, 10, 10000) } +func BenchmarkScan100000Base10(b *testing.B) { ScanHelper(b, 10, 10, 100000) } + +func BenchmarkScan10Base16(b *testing.B) { ScanHelper(b, 16, 10, 10) } +func BenchmarkScan100Base16(b *testing.B) { ScanHelper(b, 16, 10, 100) } +func BenchmarkScan1000Base16(b *testing.B) { ScanHelper(b, 16, 10, 1000) } +func BenchmarkScan10000Base16(b *testing.B) { ScanHelper(b, 16, 10, 10000) } +func BenchmarkScan100000Base16(b *testing.B) { ScanHelper(b, 16, 10, 100000) } + +func ScanHelper(b *testing.B, base int, x, y Word) { + b.StopTimer() + var z nat + z = z.expWW(x, y) + + var s string + s = z.string(lowercaseDigits[0:base]) + if t := toString(z, lowercaseDigits[0:base]); t != s { + b.Fatalf("scanning: got %s; want %s", s, t) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + z.scan(strings.NewReader(s), base) + } +} + +func BenchmarkString10Base2(b *testing.B) { StringHelper(b, 2, 10, 10) } +func BenchmarkString100Base2(b *testing.B) { StringHelper(b, 2, 10, 100) } +func BenchmarkString1000Base2(b *testing.B) { StringHelper(b, 2, 10, 1000) } +func BenchmarkString10000Base2(b *testing.B) { StringHelper(b, 2, 10, 10000) } +func BenchmarkString100000Base2(b *testing.B) { StringHelper(b, 2, 10, 100000) } + +func BenchmarkString10Base8(b *testing.B) { StringHelper(b, 8, 10, 10) } +func BenchmarkString100Base8(b *testing.B) { StringHelper(b, 8, 10, 100) } +func BenchmarkString1000Base8(b *testing.B) { StringHelper(b, 8, 10, 1000) } +func BenchmarkString10000Base8(b *testing.B) { StringHelper(b, 8, 10, 10000) } +func BenchmarkString100000Base8(b *testing.B) { StringHelper(b, 8, 10, 100000) } + +func BenchmarkString10Base10(b *testing.B) { StringHelper(b, 10, 10, 10) } +func BenchmarkString100Base10(b *testing.B) { StringHelper(b, 10, 10, 100) } +func BenchmarkString1000Base10(b *testing.B) { StringHelper(b, 10, 10, 1000) } +func BenchmarkString10000Base10(b *testing.B) { StringHelper(b, 10, 10, 10000) } +func BenchmarkString100000Base10(b *testing.B) { StringHelper(b, 10, 10, 100000) } + +func BenchmarkString10Base16(b *testing.B) { StringHelper(b, 16, 10, 10) } +func BenchmarkString100Base16(b *testing.B) { StringHelper(b, 16, 10, 100) } +func BenchmarkString1000Base16(b *testing.B) { StringHelper(b, 16, 10, 1000) } +func BenchmarkString10000Base16(b *testing.B) { StringHelper(b, 16, 10, 10000) } +func BenchmarkString100000Base16(b *testing.B) { StringHelper(b, 16, 10, 100000) } + +func StringHelper(b *testing.B, base int, x, y Word) { + b.StopTimer() + var z nat + z = z.expWW(x, y) + z.string(lowercaseDigits[0:base]) // warm divisor cache + b.StartTimer() + + for i := 0; i < b.N; i++ { + _ = z.string(lowercaseDigits[0:base]) + } +} + +func BenchmarkLeafSize0(b *testing.B) { LeafSizeHelper(b, 10, 0) } // test without splitting +func BenchmarkLeafSize1(b *testing.B) { LeafSizeHelper(b, 10, 1) } +func BenchmarkLeafSize2(b *testing.B) { LeafSizeHelper(b, 10, 2) } +func BenchmarkLeafSize3(b *testing.B) { LeafSizeHelper(b, 10, 3) } +func BenchmarkLeafSize4(b *testing.B) { LeafSizeHelper(b, 10, 4) } +func BenchmarkLeafSize5(b *testing.B) { LeafSizeHelper(b, 10, 5) } +func BenchmarkLeafSize6(b *testing.B) { LeafSizeHelper(b, 10, 6) } +func BenchmarkLeafSize7(b *testing.B) { LeafSizeHelper(b, 10, 7) } +func BenchmarkLeafSize8(b *testing.B) { LeafSizeHelper(b, 10, 8) } +func BenchmarkLeafSize9(b *testing.B) { LeafSizeHelper(b, 10, 9) } +func BenchmarkLeafSize10(b *testing.B) { LeafSizeHelper(b, 10, 10) } +func BenchmarkLeafSize11(b *testing.B) { LeafSizeHelper(b, 10, 11) } +func BenchmarkLeafSize12(b *testing.B) { LeafSizeHelper(b, 10, 12) } +func BenchmarkLeafSize13(b *testing.B) { LeafSizeHelper(b, 10, 13) } +func BenchmarkLeafSize14(b *testing.B) { LeafSizeHelper(b, 10, 14) } +func BenchmarkLeafSize15(b *testing.B) { LeafSizeHelper(b, 10, 15) } +func BenchmarkLeafSize16(b *testing.B) { LeafSizeHelper(b, 10, 16) } +func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths +func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) } + +func LeafSizeHelper(b *testing.B, base Word, size int) { + b.StopTimer() + originalLeafSize := leafSize + resetTable(cacheBase10.table[:]) + leafSize = size + b.StartTimer() + + for d := 1; d <= 10000; d *= 10 { + b.StopTimer() + var z nat + z = z.expWW(base, Word(d)) // build target number + _ = z.string(lowercaseDigits[0:base]) // warm divisor cache + b.StartTimer() + + for i := 0; i < b.N; i++ { + _ = z.string(lowercaseDigits[0:base]) + } + } + + b.StopTimer() + resetTable(cacheBase10.table[:]) + leafSize = originalLeafSize + b.StartTimer() +} + +func resetTable(table []divisor) { + if table != nil && table[0].bbb != nil { + for i := 0; i < len(table); i++ { + table[i].bbb = nil + table[i].nbits = 0 + table[i].ndigits = 0 + } + } +} + +func TestStringPowers(t *testing.T) { + var b, p Word + for b = 2; b <= 16; b++ { + for p = 0; p <= 512; p++ { + x := nat(nil).expWW(b, p) + xs := x.string(lowercaseDigits[0:b]) + xs2 := toString(x, lowercaseDigits[0:b]) + if xs != xs2 { + t.Errorf("failed at %d ** %d in base %d: %s != %s", b, p, b, xs, xs2) + } + } + if b >= 3 && testing.Short() { + break + } + } +} + +func TestLeadingZeros(t *testing.T) { + var x Word = _B >> 1 + for i := 0; i <= _W; i++ { + if int(leadingZeros(x)) != i { + t.Errorf("failed at %x: got %d want %d", x, leadingZeros(x), i) + } + x >>= 1 + } +} + +type shiftTest struct { + in nat + shift uint + out nat +} + +var leftShiftTests = []shiftTest{ + {nil, 0, nil}, + {nil, 1, nil}, + {natOne, 0, natOne}, + {natOne, 1, natTwo}, + {nat{1 << (_W - 1)}, 1, nat{0}}, + {nat{1 << (_W - 1), 0}, 1, nat{0, 1}}, +} + +func TestShiftLeft(t *testing.T) { + for i, test := range leftShiftTests { + var z nat + z = z.shl(test.in, test.shift) + for j, d := range test.out { + if j >= len(z) || z[j] != d { + t.Errorf("#%d: got: %v want: %v", i, z, test.out) + break + } + } + } +} + +var rightShiftTests = []shiftTest{ + {nil, 0, nil}, + {nil, 1, nil}, + {natOne, 0, natOne}, + {natOne, 1, nil}, + {natTwo, 1, natOne}, + {nat{0, 1}, 1, nat{1 << (_W - 1)}}, + {nat{2, 1, 1}, 1, nat{1<<(_W-1) + 1, 1 << (_W - 1)}}, +} + +func TestShiftRight(t *testing.T) { + for i, test := range rightShiftTests { + var z nat + z = z.shr(test.in, test.shift) + for j, d := range test.out { + if j >= len(z) || z[j] != d { + t.Errorf("#%d: got: %v want: %v", i, z, test.out) + break + } + } + } +} + +type modWTest struct { + in string + dividend string + out string +} + +var modWTests32 = []modWTest{ + {"23492635982634928349238759823742", "252341", "220170"}, +} + +var modWTests64 = []modWTest{ + {"6527895462947293856291561095690465243862946", "524326975699234", "375066989628668"}, +} + +func runModWTests(t *testing.T, tests []modWTest) { + for i, test := range tests { + in, _ := new(Int).SetString(test.in, 10) + d, _ := new(Int).SetString(test.dividend, 10) + out, _ := new(Int).SetString(test.out, 10) + + r := in.abs.modW(d.abs[0]) + if r != out.abs[0] { + t.Errorf("#%d failed: got %d want %s", i, r, out) + } + } +} + +func TestModW(t *testing.T) { + if _W >= 32 { + runModWTests(t, modWTests32) + } + if _W >= 64 { + runModWTests(t, modWTests64) + } +} + +func TestTrailingZeroBits(t *testing.T) { + x := Word(1) + for i := uint(0); i <= _W; i++ { + n := trailingZeroBits(x) + if n != i%_W { + t.Errorf("got trailingZeroBits(%#x) = %d; want %d", x, n, i%_W) + } + x <<= 1 + } + + y := nat(nil).set(natOne) + for i := uint(0); i <= 3*_W; i++ { + n := y.trailingZeroBits() + if n != i { + t.Errorf("got 0x%s.trailingZeroBits() = %d; want %d", y.string(lowercaseDigits[0:16]), n, i) + } + y = y.shl(y, 1) + } +} + +var expNNTests = []struct { + x, y, m string + out string +}{ + {"0", "0", "0", "1"}, + {"0", "0", "1", "0"}, + {"1", "1", "1", "0"}, + {"2", "1", "1", "0"}, + {"2", "2", "1", "0"}, + {"10", "100000000000", "1", "0"}, + {"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"}, + {"0x8000000000000000", "2", "6719", "4944"}, + {"0x8000000000000000", "3", "6719", "5447"}, + {"0x8000000000000000", "1000", "6719", "1603"}, + {"0x8000000000000000", "1000000", "6719", "3199"}, + { + "2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347", + "298472983472983471903246121093472394872319615612417471234712061", + "29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464", + "23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291", + }, +} + +func TestExpNN(t *testing.T) { + for i, test := range expNNTests { + x, _, _ := nat(nil).scan(strings.NewReader(test.x), 0) + y, _, _ := nat(nil).scan(strings.NewReader(test.y), 0) + out, _, _ := nat(nil).scan(strings.NewReader(test.out), 0) + + var m nat + + if len(test.m) > 0 { + m, _, _ = nat(nil).scan(strings.NewReader(test.m), 0) + } + + z := nat(nil).expNN(x, y, m) + if z.cmp(out) != 0 { + t.Errorf("#%d got %s want %s", i, z.decimalString(), out.decimalString()) + } + } +} + +func ExpHelper(b *testing.B, x, y Word) { + var z nat + for i := 0; i < b.N; i++ { + z.expWW(x, y) + } +} + +func BenchmarkExp3Power0x10(b *testing.B) { ExpHelper(b, 3, 0x10) } +func BenchmarkExp3Power0x40(b *testing.B) { ExpHelper(b, 3, 0x40) } +func BenchmarkExp3Power0x100(b *testing.B) { ExpHelper(b, 3, 0x100) } +func BenchmarkExp3Power0x400(b *testing.B) { ExpHelper(b, 3, 0x400) } +func BenchmarkExp3Power0x1000(b *testing.B) { ExpHelper(b, 3, 0x1000) } +func BenchmarkExp3Power0x4000(b *testing.B) { ExpHelper(b, 3, 0x4000) } +func BenchmarkExp3Power0x10000(b *testing.B) { ExpHelper(b, 3, 0x10000) } +func BenchmarkExp3Power0x40000(b *testing.B) { ExpHelper(b, 3, 0x40000) } +func BenchmarkExp3Power0x100000(b *testing.B) { ExpHelper(b, 3, 0x100000) } +func BenchmarkExp3Power0x400000(b *testing.B) { ExpHelper(b, 3, 0x400000) } diff --git a/src/math/big/rat.go b/src/math/big/rat.go new file mode 100644 index 000000000..c5339fe44 --- /dev/null +++ b/src/math/big/rat.go @@ -0,0 +1,716 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements multi-precision rational numbers. + +package big + +import ( + "encoding/binary" + "errors" + "fmt" + "math" + "strings" +) + +// A Rat represents a quotient a/b of arbitrary precision. +// The zero value for a Rat represents the value 0. +type Rat struct { + // To make zero values for Rat work w/o initialization, + // a zero value of b (len(b) == 0) acts like b == 1. + // a.neg determines the sign of the Rat, b.neg is ignored. + a, b Int +} + +// NewRat creates a new Rat with numerator a and denominator b. +func NewRat(a, b int64) *Rat { + return new(Rat).SetFrac64(a, b) +} + +// SetFloat64 sets z to exactly f and returns z. +// If f is not finite, SetFloat returns nil. +func (z *Rat) SetFloat64(f float64) *Rat { + const expMask = 1<<11 - 1 + bits := math.Float64bits(f) + mantissa := bits & (1<<52 - 1) + exp := int((bits >> 52) & expMask) + switch exp { + case expMask: // non-finite + return nil + case 0: // denormal + exp -= 1022 + default: // normal + mantissa |= 1 << 52 + exp -= 1023 + } + + shift := 52 - exp + + // Optimization (?): partially pre-normalise. + for mantissa&1 == 0 && shift > 0 { + mantissa >>= 1 + shift-- + } + + z.a.SetUint64(mantissa) + z.a.neg = f < 0 + z.b.Set(intOne) + if shift > 0 { + z.b.Lsh(&z.b, uint(shift)) + } else { + z.a.Lsh(&z.a, uint(-shift)) + } + return z.norm() +} + +// quotToFloat32 returns the non-negative float32 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat32(a, b nat) (f float32, exact bool) { + const ( + // float size in bits + Fsize = 32 + + // mantissa + Msize = 23 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) + // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). + // This is 2 or 3 more than the float32 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low32(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) + } + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<<shift - 1) + haveRem = haveRem || lostbits != 0 + mantissa >>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<Msize2 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. + + f = float32(math.Ldexp(float64(mantissa), exp-Msize1)) + if math.IsInf(float64(f), 0) { + exact = false + } + return +} + +// quotToFloat64 returns the non-negative float64 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat64(a, b nat) (f float64, exact bool) { + const ( + // float size in bits + Fsize = 64 + + // mantissa + Msize = 52 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) + // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). + // This is 2 or 3 more than the float64 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low64(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) + } + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<<shift - 1) + haveRem = haveRem || lostbits != 0 + mantissa >>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<Msize2 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. + + f = math.Ldexp(float64(mantissa), exp-Msize1) + if math.IsInf(f, 0) { + exact = false + } + return +} + +// Float32 returns the nearest float32 value for x and a bool indicating +// whether f represents x exactly. If the magnitude of x is too large to +// be represented by a float32, f is an infinity and exact is false. +// The sign of f always matches the sign of x, even if f == 0. +func (x *Rat) Float32() (f float32, exact bool) { + b := x.b.abs + if len(b) == 0 { + b = b.set(natOne) // materialize denominator + } + f, exact = quotToFloat32(x.a.abs, b) + if x.a.neg { + f = -f + } + return +} + +// Float64 returns the nearest float64 value for x and a bool indicating +// whether f represents x exactly. If the magnitude of x is too large to +// be represented by a float64, f is an infinity and exact is false. +// The sign of f always matches the sign of x, even if f == 0. +func (x *Rat) Float64() (f float64, exact bool) { + b := x.b.abs + if len(b) == 0 { + b = b.set(natOne) // materialize denominator + } + f, exact = quotToFloat64(x.a.abs, b) + if x.a.neg { + f = -f + } + return +} + +// SetFrac sets z to a/b and returns z. +func (z *Rat) SetFrac(a, b *Int) *Rat { + z.a.neg = a.neg != b.neg + babs := b.abs + if len(babs) == 0 { + panic("division by zero") + } + if &z.a == b || alias(z.a.abs, babs) { + babs = nat(nil).set(babs) // make a copy + } + z.a.abs = z.a.abs.set(a.abs) + z.b.abs = z.b.abs.set(babs) + return z.norm() +} + +// SetFrac64 sets z to a/b and returns z. +func (z *Rat) SetFrac64(a, b int64) *Rat { + z.a.SetInt64(a) + if b == 0 { + panic("division by zero") + } + if b < 0 { + b = -b + z.a.neg = !z.a.neg + } + z.b.abs = z.b.abs.setUint64(uint64(b)) + return z.norm() +} + +// SetInt sets z to x (by making a copy of x) and returns z. +func (z *Rat) SetInt(x *Int) *Rat { + z.a.Set(x) + z.b.abs = z.b.abs.make(0) + return z +} + +// SetInt64 sets z to x and returns z. +func (z *Rat) SetInt64(x int64) *Rat { + z.a.SetInt64(x) + z.b.abs = z.b.abs.make(0) + return z +} + +// Set sets z to x (by making a copy of x) and returns z. +func (z *Rat) Set(x *Rat) *Rat { + if z != x { + z.a.Set(&x.a) + z.b.Set(&x.b) + } + return z +} + +// Abs sets z to |x| (the absolute value of x) and returns z. +func (z *Rat) Abs(x *Rat) *Rat { + z.Set(x) + z.a.neg = false + return z +} + +// Neg sets z to -x and returns z. +func (z *Rat) Neg(x *Rat) *Rat { + z.Set(x) + z.a.neg = len(z.a.abs) > 0 && !z.a.neg // 0 has no sign + return z +} + +// Inv sets z to 1/x and returns z. +func (z *Rat) Inv(x *Rat) *Rat { + if len(x.a.abs) == 0 { + panic("division by zero") + } + z.Set(x) + a := z.b.abs + if len(a) == 0 { + a = a.set(natOne) // materialize numerator + } + b := z.a.abs + if b.cmp(natOne) == 0 { + b = b.make(0) // normalize denominator + } + z.a.abs, z.b.abs = a, b // sign doesn't change + return z +} + +// Sign returns: +// +// -1 if x < 0 +// 0 if x == 0 +// +1 if x > 0 +// +func (x *Rat) Sign() int { + return x.a.Sign() +} + +// IsInt returns true if the denominator of x is 1. +func (x *Rat) IsInt() bool { + return len(x.b.abs) == 0 || x.b.abs.cmp(natOne) == 0 +} + +// Num returns the numerator of x; it may be <= 0. +// The result is a reference to x's numerator; it +// may change if a new value is assigned to x, and vice versa. +// The sign of the numerator corresponds to the sign of x. +func (x *Rat) Num() *Int { + return &x.a +} + +// Denom returns the denominator of x; it is always > 0. +// The result is a reference to x's denominator; it +// may change if a new value is assigned to x, and vice versa. +func (x *Rat) Denom() *Int { + x.b.neg = false // the result is always >= 0 + if len(x.b.abs) == 0 { + x.b.abs = x.b.abs.set(natOne) // materialize denominator + } + return &x.b +} + +func (z *Rat) norm() *Rat { + switch { + case len(z.a.abs) == 0: + // z == 0 - normalize sign and denominator + z.a.neg = false + z.b.abs = z.b.abs.make(0) + case len(z.b.abs) == 0: + // z is normalized int - nothing to do + case z.b.abs.cmp(natOne) == 0: + // z is int - normalize denominator + z.b.abs = z.b.abs.make(0) + default: + neg := z.a.neg + z.a.neg = false + z.b.neg = false + if f := NewInt(0).binaryGCD(&z.a, &z.b); f.Cmp(intOne) != 0 { + z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs) + z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs) + if z.b.abs.cmp(natOne) == 0 { + // z is int - normalize denominator + z.b.abs = z.b.abs.make(0) + } + } + z.a.neg = neg + } + return z +} + +// mulDenom sets z to the denominator product x*y (by taking into +// account that 0 values for x or y must be interpreted as 1) and +// returns z. +func mulDenom(z, x, y nat) nat { + switch { + case len(x) == 0: + return z.set(y) + case len(y) == 0: + return z.set(x) + } + return z.mul(x, y) +} + +// scaleDenom computes x*f. +// If f == 0 (zero value of denominator), the result is (a copy of) x. +func scaleDenom(x *Int, f nat) *Int { + var z Int + if len(f) == 0 { + return z.Set(x) + } + z.abs = z.abs.mul(x.abs, f) + z.neg = x.neg + return &z +} + +// Cmp compares x and y and returns: +// +// -1 if x < y +// 0 if x == y +// +1 if x > y +// +func (x *Rat) Cmp(y *Rat) int { + return scaleDenom(&x.a, y.b.abs).Cmp(scaleDenom(&y.a, x.b.abs)) +} + +// Add sets z to the sum x+y and returns z. +func (z *Rat) Add(x, y *Rat) *Rat { + a1 := scaleDenom(&x.a, y.b.abs) + a2 := scaleDenom(&y.a, x.b.abs) + z.a.Add(a1, a2) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Sub sets z to the difference x-y and returns z. +func (z *Rat) Sub(x, y *Rat) *Rat { + a1 := scaleDenom(&x.a, y.b.abs) + a2 := scaleDenom(&y.a, x.b.abs) + z.a.Sub(a1, a2) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Mul sets z to the product x*y and returns z. +func (z *Rat) Mul(x, y *Rat) *Rat { + z.a.Mul(&x.a, &y.a) + z.b.abs = mulDenom(z.b.abs, x.b.abs, y.b.abs) + return z.norm() +} + +// Quo sets z to the quotient x/y and returns z. +// If y == 0, a division-by-zero run-time panic occurs. +func (z *Rat) Quo(x, y *Rat) *Rat { + if len(y.a.abs) == 0 { + panic("division by zero") + } + a := scaleDenom(&x.a, y.b.abs) + b := scaleDenom(&y.a, x.b.abs) + z.a.abs = a.abs + z.b.abs = b.abs + z.a.neg = a.neg != b.neg + return z.norm() +} + +func ratTok(ch rune) bool { + return strings.IndexRune("+-/0123456789.eE", ch) >= 0 +} + +// Scan is a support routine for fmt.Scanner. It accepts the formats +// 'e', 'E', 'f', 'F', 'g', 'G', and 'v'. All formats are equivalent. +func (z *Rat) Scan(s fmt.ScanState, ch rune) error { + tok, err := s.Token(true, ratTok) + if err != nil { + return err + } + if strings.IndexRune("efgEFGv", ch) < 0 { + return errors.New("Rat.Scan: invalid verb") + } + if _, ok := z.SetString(string(tok)); !ok { + return errors.New("Rat.Scan: invalid syntax") + } + return nil +} + +// SetString sets z to the value of s and returns z and a boolean indicating +// success. s can be given as a fraction "a/b" or as a floating-point number +// optionally followed by an exponent. If the operation failed, the value of +// z is undefined but the returned value is nil. +func (z *Rat) SetString(s string) (*Rat, bool) { + if len(s) == 0 { + return nil, false + } + + // check for a quotient + sep := strings.Index(s, "/") + if sep >= 0 { + if _, ok := z.a.SetString(s[0:sep], 10); !ok { + return nil, false + } + s = s[sep+1:] + var err error + if z.b.abs, _, err = z.b.abs.scan(strings.NewReader(s), 10); err != nil { + return nil, false + } + if len(z.b.abs) == 0 { + return nil, false + } + return z.norm(), true + } + + // check for a decimal point + sep = strings.Index(s, ".") + // check for an exponent + e := strings.IndexAny(s, "eE") + var exp Int + if e >= 0 { + if e < sep { + // The E must come after the decimal point. + return nil, false + } + if _, ok := exp.SetString(s[e+1:], 10); !ok { + return nil, false + } + s = s[0:e] + } + if sep >= 0 { + s = s[0:sep] + s[sep+1:] + exp.Sub(&exp, NewInt(int64(len(s)-sep))) + } + + if _, ok := z.a.SetString(s, 10); !ok { + return nil, false + } + powTen := nat(nil).expNN(natTen, exp.abs, nil) + if exp.neg { + z.b.abs = powTen + z.norm() + } else { + z.a.abs = z.a.abs.mul(z.a.abs, powTen) + z.b.abs = z.b.abs.make(0) + } + + return z, true +} + +// String returns a string representation of x in the form "a/b" (even if b == 1). +func (x *Rat) String() string { + s := "/1" + if len(x.b.abs) != 0 { + s = "/" + x.b.abs.decimalString() + } + return x.a.String() + s +} + +// RatString returns a string representation of x in the form "a/b" if b != 1, +// and in the form "a" if b == 1. +func (x *Rat) RatString() string { + if x.IsInt() { + return x.a.String() + } + return x.String() +} + +// FloatString returns a string representation of x in decimal form with prec +// digits of precision after the decimal point and the last digit rounded. +func (x *Rat) FloatString(prec int) string { + if x.IsInt() { + s := x.a.String() + if prec > 0 { + s += "." + strings.Repeat("0", prec) + } + return s + } + // x.b.abs != 0 + + q, r := nat(nil).div(nat(nil), x.a.abs, x.b.abs) + + p := natOne + if prec > 0 { + p = nat(nil).expNN(natTen, nat(nil).setUint64(uint64(prec)), nil) + } + + r = r.mul(r, p) + r, r2 := r.div(nat(nil), r, x.b.abs) + + // see if we need to round up + r2 = r2.add(r2, r2) + if x.b.abs.cmp(r2) <= 0 { + r = r.add(r, natOne) + if r.cmp(p) >= 0 { + q = nat(nil).add(q, natOne) + r = nat(nil).sub(r, p) + } + } + + s := q.decimalString() + if x.a.neg { + s = "-" + s + } + + if prec > 0 { + rs := r.decimalString() + leadingZeros := prec - len(rs) + s += "." + strings.Repeat("0", leadingZeros) + rs + } + + return s +} + +// Gob codec version. Permits backward-compatible changes to the encoding. +const ratGobVersion byte = 1 + +// GobEncode implements the gob.GobEncoder interface. +func (x *Rat) GobEncode() ([]byte, error) { + if x == nil { + return nil, nil + } + buf := make([]byte, 1+4+(len(x.a.abs)+len(x.b.abs))*_S) // extra bytes for version and sign bit (1), and numerator length (4) + i := x.b.abs.bytes(buf) + j := x.a.abs.bytes(buf[0:i]) + n := i - j + if int(uint32(n)) != n { + // this should never happen + return nil, errors.New("Rat.GobEncode: numerator too large") + } + binary.BigEndian.PutUint32(buf[j-4:j], uint32(n)) + j -= 1 + 4 + b := ratGobVersion << 1 // make space for sign bit + if x.a.neg { + b |= 1 + } + buf[j] = b + return buf[j:], nil +} + +// GobDecode implements the gob.GobDecoder interface. +func (z *Rat) GobDecode(buf []byte) error { + if len(buf) == 0 { + // Other side sent a nil or default value. + *z = Rat{} + return nil + } + b := buf[0] + if b>>1 != ratGobVersion { + return errors.New(fmt.Sprintf("Rat.GobDecode: encoding version %d not supported", b>>1)) + } + const j = 1 + 4 + i := j + binary.BigEndian.Uint32(buf[j-4:j]) + z.a.neg = b&1 != 0 + z.a.abs = z.a.abs.setBytes(buf[j:i]) + z.b.abs = z.b.abs.setBytes(buf[i:]) + return nil +} + +// MarshalText implements the encoding.TextMarshaler interface. +func (r *Rat) MarshalText() (text []byte, err error) { + return []byte(r.RatString()), nil +} + +// UnmarshalText implements the encoding.TextUnmarshaler interface. +func (r *Rat) UnmarshalText(text []byte) error { + if _, ok := r.SetString(string(text)); !ok { + return fmt.Errorf("math/big: cannot unmarshal %q into a *big.Rat", text) + } + return nil +} diff --git a/src/math/big/rat_test.go b/src/math/big/rat_test.go new file mode 100644 index 000000000..5dbbb3510 --- /dev/null +++ b/src/math/big/rat_test.go @@ -0,0 +1,1160 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package big + +import ( + "bytes" + "encoding/gob" + "encoding/json" + "encoding/xml" + "fmt" + "math" + "strconv" + "strings" + "testing" +) + +func TestZeroRat(t *testing.T) { + var x, y, z Rat + y.SetFrac64(0, 42) + + if x.Cmp(&y) != 0 { + t.Errorf("x and y should be both equal and zero") + } + + if s := x.String(); s != "0/1" { + t.Errorf("got x = %s, want 0/1", s) + } + + if s := x.RatString(); s != "0" { + t.Errorf("got x = %s, want 0", s) + } + + z.Add(&x, &y) + if s := z.RatString(); s != "0" { + t.Errorf("got x+y = %s, want 0", s) + } + + z.Sub(&x, &y) + if s := z.RatString(); s != "0" { + t.Errorf("got x-y = %s, want 0", s) + } + + z.Mul(&x, &y) + if s := z.RatString(); s != "0" { + t.Errorf("got x*y = %s, want 0", s) + } + + // check for division by zero + defer func() { + if s := recover(); s == nil || s.(string) != "division by zero" { + panic(s) + } + }() + z.Quo(&x, &y) +} + +var setStringTests = []struct { + in, out string + ok bool +}{ + {"0", "0", true}, + {"-0", "0", true}, + {"1", "1", true}, + {"-1", "-1", true}, + {"1.", "1", true}, + {"1e0", "1", true}, + {"1.e1", "10", true}, + {in: "1e", ok: false}, + {in: "1.e", ok: false}, + {in: "1e+14e-5", ok: false}, + {in: "1e4.5", ok: false}, + {in: "r", ok: false}, + {in: "a/b", ok: false}, + {in: "a.b", ok: false}, + {"-0.1", "-1/10", true}, + {"-.1", "-1/10", true}, + {"2/4", "1/2", true}, + {".25", "1/4", true}, + {"-1/5", "-1/5", true}, + {"8129567.7690E14", "812956776900000000000", true}, + {"78189e+4", "781890000", true}, + {"553019.8935e+8", "55301989350000", true}, + {"98765432109876543210987654321e-10", "98765432109876543210987654321/10000000000", true}, + {"9877861857500000E-7", "3951144743/4", true}, + {"2169378.417e-3", "2169378417/1000000", true}, + {"884243222337379604041632732738665534", "884243222337379604041632732738665534", true}, + {"53/70893980658822810696", "53/70893980658822810696", true}, + {"106/141787961317645621392", "53/70893980658822810696", true}, + {"204211327800791583.81095", "4084226556015831676219/20000", true}, + {in: "1/0", ok: false}, +} + +func TestRatSetString(t *testing.T) { + for i, test := range setStringTests { + x, ok := new(Rat).SetString(test.in) + + if ok { + if !test.ok { + t.Errorf("#%d SetString(%q) expected failure", i, test.in) + } else if x.RatString() != test.out { + t.Errorf("#%d SetString(%q) got %s want %s", i, test.in, x.RatString(), test.out) + } + } else if x != nil { + t.Errorf("#%d SetString(%q) got %p want nil", i, test.in, x) + } + } +} + +func TestRatScan(t *testing.T) { + var buf bytes.Buffer + for i, test := range setStringTests { + x := new(Rat) + buf.Reset() + buf.WriteString(test.in) + + _, err := fmt.Fscanf(&buf, "%v", x) + if err == nil != test.ok { + if test.ok { + t.Errorf("#%d error: %s", i, err) + } else { + t.Errorf("#%d expected error", i) + } + continue + } + if err == nil && x.RatString() != test.out { + t.Errorf("#%d got %s want %s", i, x.RatString(), test.out) + } + } +} + +var floatStringTests = []struct { + in string + prec int + out string +}{ + {"0", 0, "0"}, + {"0", 4, "0.0000"}, + {"1", 0, "1"}, + {"1", 2, "1.00"}, + {"-1", 0, "-1"}, + {".25", 2, "0.25"}, + {".25", 1, "0.3"}, + {".25", 3, "0.250"}, + {"-1/3", 3, "-0.333"}, + {"-2/3", 4, "-0.6667"}, + {"0.96", 1, "1.0"}, + {"0.999", 2, "1.00"}, + {"0.9", 0, "1"}, + {".25", -1, "0"}, + {".55", -1, "1"}, +} + +func TestFloatString(t *testing.T) { + for i, test := range floatStringTests { + x, _ := new(Rat).SetString(test.in) + + if x.FloatString(test.prec) != test.out { + t.Errorf("#%d got %s want %s", i, x.FloatString(test.prec), test.out) + } + } +} + +func TestRatSign(t *testing.T) { + zero := NewRat(0, 1) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + s := x.Sign() + e := x.Cmp(zero) + if s != e { + t.Errorf("got %d; want %d for z = %v", s, e, &x) + } + } +} + +var ratCmpTests = []struct { + rat1, rat2 string + out int +}{ + {"0", "0/1", 0}, + {"1/1", "1", 0}, + {"-1", "-2/2", 0}, + {"1", "0", 1}, + {"0/1", "1/1", -1}, + {"-5/1434770811533343057144", "-5/1434770811533343057145", -1}, + {"49832350382626108453/8964749413", "49832350382626108454/8964749413", -1}, + {"-37414950961700930/7204075375675961", "37414950961700930/7204075375675961", -1}, + {"37414950961700930/7204075375675961", "74829901923401860/14408150751351922", 0}, +} + +func TestRatCmp(t *testing.T) { + for i, test := range ratCmpTests { + x, _ := new(Rat).SetString(test.rat1) + y, _ := new(Rat).SetString(test.rat2) + + out := x.Cmp(y) + if out != test.out { + t.Errorf("#%d got out = %v; want %v", i, out, test.out) + } + } +} + +func TestIsInt(t *testing.T) { + one := NewInt(1) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + i := x.IsInt() + e := x.Denom().Cmp(one) == 0 + if i != e { + t.Errorf("got IsInt(%v) == %v; want %v", x, i, e) + } + } +} + +func TestRatAbs(t *testing.T) { + zero := new(Rat) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + e := new(Rat).Set(x) + if e.Cmp(zero) < 0 { + e.Sub(zero, e) + } + z := new(Rat).Abs(x) + if z.Cmp(e) != 0 { + t.Errorf("got Abs(%v) = %v; want %v", x, z, e) + } + } +} + +func TestRatNeg(t *testing.T) { + zero := new(Rat) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + e := new(Rat).Sub(zero, x) + z := new(Rat).Neg(x) + if z.Cmp(e) != 0 { + t.Errorf("got Neg(%v) = %v; want %v", x, z, e) + } + } +} + +func TestRatInv(t *testing.T) { + zero := new(Rat) + for _, a := range setStringTests { + x, ok := new(Rat).SetString(a.in) + if !ok { + continue + } + if x.Cmp(zero) == 0 { + continue // avoid division by zero + } + e := new(Rat).SetFrac(x.Denom(), x.Num()) + z := new(Rat).Inv(x) + if z.Cmp(e) != 0 { + t.Errorf("got Inv(%v) = %v; want %v", x, z, e) + } + } +} + +type ratBinFun func(z, x, y *Rat) *Rat +type ratBinArg struct { + x, y, z string +} + +func testRatBin(t *testing.T, i int, name string, f ratBinFun, a ratBinArg) { + x, _ := new(Rat).SetString(a.x) + y, _ := new(Rat).SetString(a.y) + z, _ := new(Rat).SetString(a.z) + out := f(new(Rat), x, y) + + if out.Cmp(z) != 0 { + t.Errorf("%s #%d got %s want %s", name, i, out, z) + } +} + +var ratBinTests = []struct { + x, y string + sum, prod string +}{ + {"0", "0", "0", "0"}, + {"0", "1", "1", "0"}, + {"-1", "0", "-1", "0"}, + {"-1", "1", "0", "-1"}, + {"1", "1", "2", "1"}, + {"1/2", "1/2", "1", "1/4"}, + {"1/4", "1/3", "7/12", "1/12"}, + {"2/5", "-14/3", "-64/15", "-28/15"}, + {"4707/49292519774798173060", "-3367/70976135186689855734", "84058377121001851123459/1749296273614329067191168098769082663020", "-1760941/388732505247628681598037355282018369560"}, + {"-61204110018146728334/3", "-31052192278051565633/2", "-215564796870448153567/6", "950260896245257153059642991192710872711/3"}, + {"-854857841473707320655/4237645934602118692642972629634714039", "-18/31750379913563777419", "-27/133467566250814981", "15387441146526731771790/134546868362786310073779084329032722548987800600710485341"}, + {"618575745270541348005638912139/19198433543745179392300736", "-19948846211000086/637313996471", "27674141753240653/30123979153216", "-6169936206128396568797607742807090270137721977/6117715203873571641674006593837351328"}, + {"-3/26206484091896184128", "5/2848423294177090248", "15310893822118706237/9330894968229805033368778458685147968", "-5/24882386581946146755650075889827061248"}, + {"26946729/330400702820", "41563965/225583428284", "1238218672302860271/4658307703098666660055", "224002580204097/14906584649915733312176"}, + {"-8259900599013409474/7", "-84829337473700364773/56707961321161574960", "-468402123685491748914621885145127724451/396955729248131024720", "350340947706464153265156004876107029701/198477864624065512360"}, + {"575775209696864/1320203974639986246357", "29/712593081308", "410331716733912717985762465/940768218243776489278275419794956", "808/45524274987585732633"}, + {"1786597389946320496771/2066653520653241", "6269770/1992362624741777", "3559549865190272133656109052308126637/4117523232840525481453983149257", "8967230/3296219033"}, + {"-36459180403360509753/32150500941194292113930", "9381566963714/9633539", "301622077145533298008420642898530153/309723104686531919656937098270", "-3784609207827/3426986245"}, +} + +func TestRatBin(t *testing.T) { + for i, test := range ratBinTests { + arg := ratBinArg{test.x, test.y, test.sum} + testRatBin(t, i, "Add", (*Rat).Add, arg) + + arg = ratBinArg{test.y, test.x, test.sum} + testRatBin(t, i, "Add symmetric", (*Rat).Add, arg) + + arg = ratBinArg{test.sum, test.x, test.y} + testRatBin(t, i, "Sub", (*Rat).Sub, arg) + + arg = ratBinArg{test.sum, test.y, test.x} + testRatBin(t, i, "Sub symmetric", (*Rat).Sub, arg) + + arg = ratBinArg{test.x, test.y, test.prod} + testRatBin(t, i, "Mul", (*Rat).Mul, arg) + + arg = ratBinArg{test.y, test.x, test.prod} + testRatBin(t, i, "Mul symmetric", (*Rat).Mul, arg) + + if test.x != "0" { + arg = ratBinArg{test.prod, test.x, test.y} + testRatBin(t, i, "Quo", (*Rat).Quo, arg) + } + + if test.y != "0" { + arg = ratBinArg{test.prod, test.y, test.x} + testRatBin(t, i, "Quo symmetric", (*Rat).Quo, arg) + } + } +} + +func TestIssue820(t *testing.T) { + x := NewRat(3, 1) + y := NewRat(2, 1) + z := y.Quo(x, y) + q := NewRat(3, 2) + if z.Cmp(q) != 0 { + t.Errorf("got %s want %s", z, q) + } + + y = NewRat(3, 1) + x = NewRat(2, 1) + z = y.Quo(x, y) + q = NewRat(2, 3) + if z.Cmp(q) != 0 { + t.Errorf("got %s want %s", z, q) + } + + x = NewRat(3, 1) + z = x.Quo(x, x) + q = NewRat(3, 3) + if z.Cmp(q) != 0 { + t.Errorf("got %s want %s", z, q) + } +} + +var setFrac64Tests = []struct { + a, b int64 + out string +}{ + {0, 1, "0"}, + {0, -1, "0"}, + {1, 1, "1"}, + {-1, 1, "-1"}, + {1, -1, "-1"}, + {-1, -1, "1"}, + {-9223372036854775808, -9223372036854775808, "1"}, +} + +func TestRatSetFrac64Rat(t *testing.T) { + for i, test := range setFrac64Tests { + x := new(Rat).SetFrac64(test.a, test.b) + if x.RatString() != test.out { + t.Errorf("#%d got %s want %s", i, x.RatString(), test.out) + } + } +} + +func TestRatGobEncoding(t *testing.T) { + var medium bytes.Buffer + enc := gob.NewEncoder(&medium) + dec := gob.NewDecoder(&medium) + for _, test := range encodingTests { + medium.Reset() // empty buffer for each test case (in case of failures) + var tx Rat + tx.SetString(test + ".14159265") + if err := enc.Encode(&tx); err != nil { + t.Errorf("encoding of %s failed: %s", &tx, err) + } + var rx Rat + if err := dec.Decode(&rx); err != nil { + t.Errorf("decoding of %s failed: %s", &tx, err) + } + if rx.Cmp(&tx) != 0 { + t.Errorf("transmission of %s failed: got %s want %s", &tx, &rx, &tx) + } + } +} + +// Sending a nil Rat pointer (inside a slice) on a round trip through gob should yield a zero. +// TODO: top-level nils. +func TestGobEncodingNilRatInSlice(t *testing.T) { + buf := new(bytes.Buffer) + enc := gob.NewEncoder(buf) + dec := gob.NewDecoder(buf) + + var in = make([]*Rat, 1) + err := enc.Encode(&in) + if err != nil { + t.Errorf("gob encode failed: %q", err) + } + var out []*Rat + err = dec.Decode(&out) + if err != nil { + t.Fatalf("gob decode failed: %q", err) + } + if len(out) != 1 { + t.Fatalf("wrong len; want 1 got %d", len(out)) + } + var zero Rat + if out[0].Cmp(&zero) != 0 { + t.Errorf("transmission of (*Int)(nill) failed: got %s want 0", out) + } +} + +var ratNums = []string{ + "-141592653589793238462643383279502884197169399375105820974944592307816406286", + "-1415926535897932384626433832795028841971", + "-141592653589793", + "-1", + "0", + "1", + "141592653589793", + "1415926535897932384626433832795028841971", + "141592653589793238462643383279502884197169399375105820974944592307816406286", +} + +var ratDenoms = []string{ + "1", + "718281828459045", + "7182818284590452353602874713526624977572", + "718281828459045235360287471352662497757247093699959574966967627724076630353", +} + +func TestRatJSONEncoding(t *testing.T) { + for _, num := range ratNums { + for _, denom := range ratDenoms { + var tx Rat + tx.SetString(num + "/" + denom) + b, err := json.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Rat + if err := json.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("JSON encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} + +func TestRatXMLEncoding(t *testing.T) { + for _, num := range ratNums { + for _, denom := range ratDenoms { + var tx Rat + tx.SetString(num + "/" + denom) + b, err := xml.Marshal(&tx) + if err != nil { + t.Errorf("marshaling of %s failed: %s", &tx, err) + continue + } + var rx Rat + if err := xml.Unmarshal(b, &rx); err != nil { + t.Errorf("unmarshaling of %s failed: %s", &tx, err) + continue + } + if rx.Cmp(&tx) != 0 { + t.Errorf("XML encoding of %s failed: got %s want %s", &tx, &rx, &tx) + } + } + } +} + +func TestIssue2379(t *testing.T) { + // 1) no aliasing + q := NewRat(3, 2) + x := new(Rat) + x.SetFrac(NewInt(3), NewInt(2)) + if x.Cmp(q) != 0 { + t.Errorf("1) got %s want %s", x, q) + } + + // 2) aliasing of numerator + x = NewRat(2, 3) + x.SetFrac(NewInt(3), x.Num()) + if x.Cmp(q) != 0 { + t.Errorf("2) got %s want %s", x, q) + } + + // 3) aliasing of denominator + x = NewRat(2, 3) + x.SetFrac(x.Denom(), NewInt(2)) + if x.Cmp(q) != 0 { + t.Errorf("3) got %s want %s", x, q) + } + + // 4) aliasing of numerator and denominator + x = NewRat(2, 3) + x.SetFrac(x.Denom(), x.Num()) + if x.Cmp(q) != 0 { + t.Errorf("4) got %s want %s", x, q) + } + + // 5) numerator and denominator are the same + q = NewRat(1, 1) + x = new(Rat) + n := NewInt(7) + x.SetFrac(n, n) + if x.Cmp(q) != 0 { + t.Errorf("5) got %s want %s", x, q) + } +} + +func TestIssue3521(t *testing.T) { + a := new(Int) + b := new(Int) + a.SetString("64375784358435883458348587", 0) + b.SetString("4789759874531", 0) + + // 0) a raw zero value has 1 as denominator + zero := new(Rat) + one := NewInt(1) + if zero.Denom().Cmp(one) != 0 { + t.Errorf("0) got %s want %s", zero.Denom(), one) + } + + // 1a) a zero value remains zero independent of denominator + x := new(Rat) + x.Denom().Set(new(Int).Neg(b)) + if x.Cmp(zero) != 0 { + t.Errorf("1a) got %s want %s", x, zero) + } + + // 1b) a zero value may have a denominator != 0 and != 1 + x.Num().Set(a) + qab := new(Rat).SetFrac(a, b) + if x.Cmp(qab) != 0 { + t.Errorf("1b) got %s want %s", x, qab) + } + + // 2a) an integral value becomes a fraction depending on denominator + x.SetFrac64(10, 2) + x.Denom().SetInt64(3) + q53 := NewRat(5, 3) + if x.Cmp(q53) != 0 { + t.Errorf("2a) got %s want %s", x, q53) + } + + // 2b) an integral value becomes a fraction depending on denominator + x = NewRat(10, 2) + x.Denom().SetInt64(3) + if x.Cmp(q53) != 0 { + t.Errorf("2b) got %s want %s", x, q53) + } + + // 3) changing the numerator/denominator of a Rat changes the Rat + x.SetFrac(a, b) + a = x.Num() + b = x.Denom() + a.SetInt64(5) + b.SetInt64(3) + if x.Cmp(q53) != 0 { + t.Errorf("3) got %s want %s", x, q53) + } +} + +// Test inputs to Rat.SetString. The prefix "long:" causes the test +// to be skipped in --test.short mode. (The threshold is about 500us.) +var float64inputs = []string{ + // Constants plundered from strconv/testfp.txt. + + // Table 1: Stress Inputs for Conversion to 53-bit Binary, < 1/2 ULP + "5e+125", + "69e+267", + "999e-026", + "7861e-034", + "75569e-254", + "928609e-261", + "9210917e+080", + "84863171e+114", + "653777767e+273", + "5232604057e-298", + "27235667517e-109", + "653532977297e-123", + "3142213164987e-294", + "46202199371337e-072", + "231010996856685e-073", + "9324754620109615e+212", + "78459735791271921e+049", + "272104041512242479e+200", + "6802601037806061975e+198", + "20505426358836677347e-221", + "836168422905420598437e-234", + "4891559871276714924261e+222", + + // Table 2: Stress Inputs for Conversion to 53-bit Binary, > 1/2 ULP + "9e-265", + "85e-037", + "623e+100", + "3571e+263", + "81661e+153", + "920657e-023", + "4603285e-024", + "87575437e-309", + "245540327e+122", + "6138508175e+120", + "83356057653e+193", + "619534293513e+124", + "2335141086879e+218", + "36167929443327e-159", + "609610927149051e-255", + "3743626360493413e-165", + "94080055902682397e-242", + "899810892172646163e+283", + "7120190517612959703e+120", + "25188282901709339043e-252", + "308984926168550152811e-052", + "6372891218502368041059e+064", + + // Table 14: Stress Inputs for Conversion to 24-bit Binary, <1/2 ULP + "5e-20", + "67e+14", + "985e+15", + "7693e-42", + "55895e-16", + "996622e-44", + "7038531e-32", + "60419369e-46", + "702990899e-20", + "6930161142e-48", + "25933168707e+13", + "596428896559e+20", + + // Table 15: Stress Inputs for Conversion to 24-bit Binary, >1/2 ULP + "3e-23", + "57e+18", + "789e-35", + "2539e-18", + "76173e+28", + "887745e-11", + "5382571e-37", + "82381273e-35", + "750486563e-38", + "3752432815e-39", + "75224575729e-45", + "459926601011e+15", + + // Constants plundered from strconv/atof_test.go. + + "0", + "1", + "+1", + "1e23", + "1E23", + "100000000000000000000000", + "1e-100", + "123456700", + "99999999999999974834176", + "100000000000000000000001", + "100000000000000008388608", + "100000000000000016777215", + "100000000000000016777216", + "-1", + "-0.1", + "-0", // NB: exception made for this input + "1e-20", + "625e-3", + + // largest float64 + "1.7976931348623157e308", + "-1.7976931348623157e308", + // next float64 - too large + "1.7976931348623159e308", + "-1.7976931348623159e308", + // the border is ...158079 + // borderline - okay + "1.7976931348623158e308", + "-1.7976931348623158e308", + // borderline - too large + "1.797693134862315808e308", + "-1.797693134862315808e308", + + // a little too large + "1e308", + "2e308", + "1e309", + + // way too large + "1e310", + "-1e310", + "1e400", + "-1e400", + "long:1e400000", + "long:-1e400000", + + // denormalized + "1e-305", + "1e-306", + "1e-307", + "1e-308", + "1e-309", + "1e-310", + "1e-322", + // smallest denormal + "5e-324", + "4e-324", + "3e-324", + // too small + "2e-324", + // way too small + "1e-350", + "long:1e-400000", + // way too small, negative + "-1e-350", + "long:-1e-400000", + + // try to overflow exponent + // [Disabled: too slow and memory-hungry with rationals.] + // "1e-4294967296", + // "1e+4294967296", + // "1e-18446744073709551616", + // "1e+18446744073709551616", + + // http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/ + "2.2250738585072012e-308", + // http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ + "2.2250738585072011e-308", + + // A very large number (initially wrongly parsed by the fast algorithm). + "4.630813248087435e+307", + + // A different kind of very large number. + "22.222222222222222", + "long:2." + strings.Repeat("2", 4000) + "e+1", + + // Exactly halfway between 1 and math.Nextafter(1, 2). + // Round to even (down). + "1.00000000000000011102230246251565404236316680908203125", + // Slightly lower; still round down. + "1.00000000000000011102230246251565404236316680908203124", + // Slightly higher; round up. + "1.00000000000000011102230246251565404236316680908203126", + // Slightly higher, but you have to read all the way to the end. + "long:1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", + + // Smallest denormal, 2^(-1022-52) + "4.940656458412465441765687928682213723651e-324", + // Half of smallest denormal, 2^(-1022-53) + "2.470328229206232720882843964341106861825e-324", + // A little more than the exact half of smallest denormal + // 2^-1075 + 2^-1100. (Rounds to 1p-1074.) + "2.470328302827751011111470718709768633275e-324", + // The exact halfway between smallest normal and largest denormal: + // 2^-1022 - 2^-1075. (Rounds to 2^-1022.) + "2.225073858507201136057409796709131975935e-308", + + "1152921504606846975", // 1<<60 - 1 + "-1152921504606846975", // -(1<<60 - 1) + "1152921504606846977", // 1<<60 + 1 + "-1152921504606846977", // -(1<<60 + 1) + + "1/3", +} + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +} + +func TestFloat32SpecialCases(t *testing.T) { + for _, input := range float64inputs { + if strings.HasPrefix(input, "long:") { + if testing.Short() { + continue + } + input = input[len("long:"):] + } + + r, ok := new(Rat).SetString(input) + if !ok { + t.Errorf("Rat.SetString(%q) failed", input) + continue + } + f, exact := r.Float32() + + // 1. Check string -> Rat -> float32 conversions are + // consistent with strconv.ParseFloat. + // Skip this check if the input uses "a/b" rational syntax. + if !strings.Contains(input, "/") { + e64, _ := strconv.ParseFloat(input, 32) + e := float32(e64) + + // Careful: negative Rats too small for + // float64 become -0, but Rat obviously cannot + // preserve the sign from SetString("-0"). + switch { + case math.Float32bits(e) == math.Float32bits(f): + // Ok: bitwise equal. + case f == 0 && r.Num().BitLen() == 0: + // Ok: Rat(0) is equivalent to both +/- float64(0). + default: + t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e) + } + } + + if !isFinite(float64(f)) { + continue + } + + // 2. Check f is best approximation to r. + if !checkIsBestApprox32(t, f, r) { + // Append context information. + t.Errorf("(input was %q)", input) + } + + // 3. Check f->R->f roundtrip is non-lossy. + checkNonLossyRoundtrip32(t, f) + + // 4. Check exactness using slow algorithm. + if wasExact := new(Rat).SetFloat64(float64(f)).Cmp(r) == 0; wasExact != exact { + t.Errorf("Rat.SetString(%q).Float32().exact = %t, want %t", input, exact, wasExact) + } + } +} + +func TestFloat64SpecialCases(t *testing.T) { + for _, input := range float64inputs { + if strings.HasPrefix(input, "long:") { + if testing.Short() { + continue + } + input = input[len("long:"):] + } + + r, ok := new(Rat).SetString(input) + if !ok { + t.Errorf("Rat.SetString(%q) failed", input) + continue + } + f, exact := r.Float64() + + // 1. Check string -> Rat -> float64 conversions are + // consistent with strconv.ParseFloat. + // Skip this check if the input uses "a/b" rational syntax. + if !strings.Contains(input, "/") { + e, _ := strconv.ParseFloat(input, 64) + + // Careful: negative Rats too small for + // float64 become -0, but Rat obviously cannot + // preserve the sign from SetString("-0"). + switch { + case math.Float64bits(e) == math.Float64bits(f): + // Ok: bitwise equal. + case f == 0 && r.Num().BitLen() == 0: + // Ok: Rat(0) is equivalent to both +/- float64(0). + default: + t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e) + } + } + + if !isFinite(f) { + continue + } + + // 2. Check f is best approximation to r. + if !checkIsBestApprox64(t, f, r) { + // Append context information. + t.Errorf("(input was %q)", input) + } + + // 3. Check f->R->f roundtrip is non-lossy. + checkNonLossyRoundtrip64(t, f) + + // 4. Check exactness using slow algorithm. + if wasExact := new(Rat).SetFloat64(f).Cmp(r) == 0; wasExact != exact { + t.Errorf("Rat.SetString(%q).Float64().exact = %t, want %t", input, exact, wasExact) + } + } +} + +func TestFloat32Distribution(t *testing.T) { + // Generate a distribution of (sign, mantissa, exp) values + // broader than the float32 range, and check Rat.Float32() + // always picks the closest float32 approximation. + var add = []int64{ + 0, + 1, + 3, + 5, + 7, + 9, + 11, + } + var winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64) + if testing.Short() { + winc, einc = 5, 15 // quick test (~60ms on x86-64) + } + + for _, sign := range "+-" { + for _, a := range add { + for wid := uint64(0); wid < 30; wid += winc { + b := 1<<wid + a + if sign == '-' { + b = -b + } + for exp := -150; exp < 150; exp += einc { + num, den := NewInt(b), NewInt(1) + if exp > 0 { + num.Lsh(num, uint(exp)) + } else { + den.Lsh(den, uint(-exp)) + } + r := new(Rat).SetFrac(num, den) + f, _ := r.Float32() + + if !checkIsBestApprox32(t, f, r) { + // Append context information. + t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)", + b, exp, f, f, math.Ldexp(float64(b), exp), r) + } + + checkNonLossyRoundtrip32(t, f) + } + } + } + } +} + +func TestFloat64Distribution(t *testing.T) { + // Generate a distribution of (sign, mantissa, exp) values + // broader than the float64 range, and check Rat.Float64() + // always picks the closest float64 approximation. + var add = []int64{ + 0, + 1, + 3, + 5, + 7, + 9, + 11, + } + var winc, einc = uint64(1), 1 // soak test (~75s on x86-64) + if testing.Short() { + winc, einc = 10, 500 // quick test (~12ms on x86-64) + } + + for _, sign := range "+-" { + for _, a := range add { + for wid := uint64(0); wid < 60; wid += winc { + b := 1<<wid + a + if sign == '-' { + b = -b + } + for exp := -1100; exp < 1100; exp += einc { + num, den := NewInt(b), NewInt(1) + if exp > 0 { + num.Lsh(num, uint(exp)) + } else { + den.Lsh(den, uint(-exp)) + } + r := new(Rat).SetFrac(num, den) + f, _ := r.Float64() + + if !checkIsBestApprox64(t, f, r) { + // Append context information. + t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)", + b, exp, f, f, math.Ldexp(float64(b), exp), r) + } + + checkNonLossyRoundtrip64(t, f) + } + } + } + } +} + +// TestSetFloat64NonFinite checks that SetFloat64 of a non-finite value +// returns nil. +func TestSetFloat64NonFinite(t *testing.T) { + for _, f := range []float64{math.NaN(), math.Inf(+1), math.Inf(-1)} { + var r Rat + if r2 := r.SetFloat64(f); r2 != nil { + t.Errorf("SetFloat64(%g) was %v, want nil", f, r2) + } + } +} + +// checkNonLossyRoundtrip32 checks that a float->Rat->float roundtrip is +// non-lossy for finite f. +func checkNonLossyRoundtrip32(t *testing.T, f float32) { + if !isFinite(float64(f)) { + return + } + r := new(Rat).SetFloat64(float64(f)) + if r == nil { + t.Errorf("Rat.SetFloat64(float64(%g) (%b)) == nil", f, f) + return + } + f2, exact := r.Float32() + if f != f2 || !exact { + t.Errorf("Rat.SetFloat64(float64(%g)).Float32() = %g (%b), %v, want %g (%b), %v; delta = %b", + f, f2, f2, exact, f, f, true, f2-f) + } +} + +// checkNonLossyRoundtrip64 checks that a float->Rat->float roundtrip is +// non-lossy for finite f. +func checkNonLossyRoundtrip64(t *testing.T, f float64) { + if !isFinite(f) { + return + } + r := new(Rat).SetFloat64(f) + if r == nil { + t.Errorf("Rat.SetFloat64(%g (%b)) == nil", f, f) + return + } + f2, exact := r.Float64() + if f != f2 || !exact { + t.Errorf("Rat.SetFloat64(%g).Float64() = %g (%b), %v, want %g (%b), %v; delta = %b", + f, f2, f2, exact, f, f, true, f2-f) + } +} + +// delta returns the absolute difference between r and f. +func delta(r *Rat, f float64) *Rat { + d := new(Rat).Sub(r, new(Rat).SetFloat64(f)) + return d.Abs(d) +} + +// checkIsBestApprox32 checks that f is the best possible float32 +// approximation of r. +// Returns true on success. +func checkIsBestApprox32(t *testing.T, f float32, r *Rat) bool { + if math.Abs(float64(f)) >= math.MaxFloat32 { + // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat32). + // But we have tests for these special cases. + return true + } + + // r must be strictly between f0 and f1, the floats bracketing f. + f0 := math.Nextafter32(f, float32(math.Inf(-1))) + f1 := math.Nextafter32(f, float32(math.Inf(+1))) + + // For f to be correct, r must be closer to f than to f0 or f1. + df := delta(r, float64(f)) + df0 := delta(r, float64(f0)) + df1 := delta(r, float64(f1)) + if df.Cmp(df0) > 0 { + t.Errorf("Rat(%v).Float32() = %g (%b), but previous float32 %g (%b) is closer", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) > 0 { + t.Errorf("Rat(%v).Float32() = %g (%b), but next float32 %g (%b) is closer", r, f, f, f1, f1) + return false + } + if df.Cmp(df0) == 0 && !isEven32(f) { + t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) == 0 && !isEven32(f) { + t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) + return false + } + return true +} + +// checkIsBestApprox64 checks that f is the best possible float64 +// approximation of r. +// Returns true on success. +func checkIsBestApprox64(t *testing.T, f float64, r *Rat) bool { + if math.Abs(f) >= math.MaxFloat64 { + // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat64). + // But we have tests for these special cases. + return true + } + + // r must be strictly between f0 and f1, the floats bracketing f. + f0 := math.Nextafter(f, math.Inf(-1)) + f1 := math.Nextafter(f, math.Inf(+1)) + + // For f to be correct, r must be closer to f than to f0 or f1. + df := delta(r, f) + df0 := delta(r, f0) + df1 := delta(r, f1) + if df.Cmp(df0) > 0 { + t.Errorf("Rat(%v).Float64() = %g (%b), but previous float64 %g (%b) is closer", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) > 0 { + t.Errorf("Rat(%v).Float64() = %g (%b), but next float64 %g (%b) is closer", r, f, f, f1, f1) + return false + } + if df.Cmp(df0) == 0 && !isEven64(f) { + t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) == 0 && !isEven64(f) { + t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) + return false + } + return true +} + +func isEven32(f float32) bool { return math.Float32bits(f)&1 == 0 } +func isEven64(f float64) bool { return math.Float64bits(f)&1 == 0 } + +func TestIsFinite(t *testing.T) { + finites := []float64{ + 1.0 / 3, + 4891559871276714924261e+222, + math.MaxFloat64, + math.SmallestNonzeroFloat64, + -math.MaxFloat64, + -math.SmallestNonzeroFloat64, + } + for _, f := range finites { + if !isFinite(f) { + t.Errorf("!IsFinite(%g (%b))", f, f) + } + } + nonfinites := []float64{ + math.NaN(), + math.Inf(-1), + math.Inf(+1), + } + for _, f := range nonfinites { + if isFinite(f) { + t.Errorf("IsFinite(%g, (%b))", f, f) + } + } +} diff --git a/src/math/bits.go b/src/math/bits.go new file mode 100644 index 000000000..d85ee9cb1 --- /dev/null +++ b/src/math/bits.go @@ -0,0 +1,59 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +const ( + uvnan = 0x7FF8000000000001 + uvinf = 0x7FF0000000000000 + uvneginf = 0xFFF0000000000000 + mask = 0x7FF + shift = 64 - 11 - 1 + bias = 1023 +) + +// Inf returns positive infinity if sign >= 0, negative infinity if sign < 0. +func Inf(sign int) float64 { + var v uint64 + if sign >= 0 { + v = uvinf + } else { + v = uvneginf + } + return Float64frombits(v) +} + +// NaN returns an IEEE 754 ``not-a-number'' value. +func NaN() float64 { return Float64frombits(uvnan) } + +// IsNaN reports whether f is an IEEE 754 ``not-a-number'' value. +func IsNaN(f float64) (is bool) { + // IEEE 754 says that only NaNs satisfy f != f. + // To avoid the floating-point hardware, could use: + // x := Float64bits(f); + // return uint32(x>>shift)&mask == mask && x != uvinf && x != uvneginf + return f != f +} + +// IsInf reports whether f is an infinity, according to sign. +// If sign > 0, IsInf reports whether f is positive infinity. +// If sign < 0, IsInf reports whether f is negative infinity. +// If sign == 0, IsInf reports whether f is either infinity. +func IsInf(f float64, sign int) bool { + // Test for infinity by comparing against maximum float. + // To avoid the floating-point hardware, could use: + // x := Float64bits(f); + // return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf; + return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64 +} + +// normalize returns a normal number y and exponent exp +// satisfying x == y × 2**exp. It assumes x is finite and non-zero. +func normalize(x float64) (y float64, exp int) { + const SmallestNormal = 2.2250738585072014e-308 // 2**-1022 + if Abs(x) < SmallestNormal { + return x * (1 << 52), -52 + } + return x, 0 +} diff --git a/src/math/cbrt.go b/src/math/cbrt.go new file mode 100644 index 000000000..272e30923 --- /dev/null +++ b/src/math/cbrt.go @@ -0,0 +1,76 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + The algorithm is based in part on "Optimal Partitioning of + Newton's Method for Calculating Roots", by Gunter Meinardus + and G. D. Taylor, Mathematics of Computation © 1980 American + Mathematical Society. + (http://www.jstor.org/stable/2006387?seq=9, accessed 11-Feb-2010) +*/ + +// Cbrt returns the cube root of x. +// +// Special cases are: +// Cbrt(±0) = ±0 +// Cbrt(±Inf) = ±Inf +// Cbrt(NaN) = NaN +func Cbrt(x float64) float64 { + const ( + A1 = 1.662848358e-01 + A2 = 1.096040958e+00 + A3 = 4.105032829e-01 + A4 = 5.649335816e-01 + B1 = 2.639607233e-01 + B2 = 8.699282849e-01 + B3 = 1.629083358e-01 + B4 = 2.824667908e-01 + C1 = 4.190115298e-01 + C2 = 6.904625373e-01 + C3 = 6.46502159e-02 + C4 = 1.412333954e-01 + ) + // special cases + switch { + case x == 0 || IsNaN(x) || IsInf(x, 0): + return x + } + sign := false + if x < 0 { + x = -x + sign = true + } + // Reduce argument and estimate cube root + f, e := Frexp(x) // 0.5 <= f < 1.0 + m := e % 3 + if m > 0 { + m -= 3 + e -= m // e is multiple of 3 + } + switch m { + case 0: // 0.5 <= f < 1.0 + f = A1*f + A2 - A3/(A4+f) + case -1: + f *= 0.5 // 0.25 <= f < 0.5 + f = B1*f + B2 - B3/(B4+f) + default: // m == -2 + f *= 0.25 // 0.125 <= f < 0.25 + f = C1*f + C2 - C3/(C4+f) + } + y := Ldexp(f, e/3) // e/3 = exponent of cube root + + // Iterate + s := y * y * y + t := s + x + y *= (t + x) / (s + t) + // Reiterate + s = (y*y*y - x) / x + y -= y * (((14.0/81.0)*s-(2.0/9.0))*s + (1.0 / 3.0)) * s + if sign { + y = -y + } + return y +} diff --git a/src/math/cmplx/abs.go b/src/math/cmplx/abs.go new file mode 100644 index 000000000..f3cd1073e --- /dev/null +++ b/src/math/cmplx/abs.go @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cmplx provides basic constants and mathematical functions for +// complex numbers. +package cmplx + +import "math" + +// Abs returns the absolute value (also called the modulus) of x. +func Abs(x complex128) float64 { return math.Hypot(real(x), imag(x)) } diff --git a/src/math/cmplx/asin.go b/src/math/cmplx/asin.go new file mode 100644 index 000000000..61880a257 --- /dev/null +++ b/src/math/cmplx/asin.go @@ -0,0 +1,170 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex circular arc sine +// +// DESCRIPTION: +// +// Inverse complex sine: +// 2 +// w = -i clog( iz + csqrt( 1 - z ) ). +// +// casin(z) = -i casinh(iz) +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 10100 2.1e-15 3.4e-16 +// IEEE -10,+10 30000 2.2e-14 2.7e-15 +// Larger relative error can be observed for z near zero. +// Also tested by csin(casin(z)) = z. + +// Asin returns the inverse sine of x. +func Asin(x complex128) complex128 { + if imag(x) == 0 { + if math.Abs(real(x)) > 1 { + return complex(math.Pi/2, 0) // DOMAIN error + } + return complex(math.Asin(real(x)), 0) + } + ct := complex(-imag(x), real(x)) // i * x + xx := x * x + x1 := complex(1-real(xx), -imag(xx)) // 1 - x*x + x2 := Sqrt(x1) // x2 = sqrt(1 - x*x) + w := Log(ct + x2) + return complex(imag(w), -real(w)) // -i * w +} + +// Asinh returns the inverse hyperbolic sine of x. +func Asinh(x complex128) complex128 { + // TODO check range + if imag(x) == 0 { + if math.Abs(real(x)) > 1 { + return complex(math.Pi/2, 0) // DOMAIN error + } + return complex(math.Asinh(real(x)), 0) + } + xx := x * x + x1 := complex(1+real(xx), imag(xx)) // 1 + x*x + return Log(x + Sqrt(x1)) // log(x + sqrt(1 + x*x)) +} + +// Complex circular arc cosine +// +// DESCRIPTION: +// +// w = arccos z = PI/2 - arcsin z. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 5200 1.6e-15 2.8e-16 +// IEEE -10,+10 30000 1.8e-14 2.2e-15 + +// Acos returns the inverse cosine of x. +func Acos(x complex128) complex128 { + w := Asin(x) + return complex(math.Pi/2-real(w), -imag(w)) +} + +// Acosh returns the inverse hyperbolic cosine of x. +func Acosh(x complex128) complex128 { + w := Acos(x) + if imag(w) <= 0 { + return complex(-imag(w), real(w)) // i * w + } + return complex(imag(w), -real(w)) // -i * w +} + +// Complex circular arc tangent +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// 1 ( 2x ) +// Re w = - arctan(-----------) + k PI +// 2 ( 2 2) +// (1 - x - y ) +// +// ( 2 2) +// 1 (x + (y+1) ) +// Im w = - log(------------) +// 4 ( 2 2) +// (x + (y-1) ) +// +// Where k is an arbitrary integer. +// +// catan(z) = -i catanh(iz). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 5900 1.3e-16 7.8e-18 +// IEEE -10,+10 30000 2.3e-15 8.5e-17 +// The check catan( ctan(z) ) = z, with |x| and |y| < PI/2, +// had peak relative error 1.5e-16, rms relative error +// 2.9e-17. See also clog(). + +// Atan returns the inverse tangent of x. +func Atan(x complex128) complex128 { + if real(x) == 0 && imag(x) > 1 { + return NaN() + } + + x2 := real(x) * real(x) + a := 1 - x2 - imag(x)*imag(x) + if a == 0 { + return NaN() + } + t := 0.5 * math.Atan2(2*real(x), a) + w := reducePi(t) + + t = imag(x) - 1 + b := x2 + t*t + if b == 0 { + return NaN() + } + t = imag(x) + 1 + c := (x2 + t*t) / b + return complex(w, 0.25*math.Log(c)) +} + +// Atanh returns the inverse hyperbolic tangent of x. +func Atanh(x complex128) complex128 { + z := complex(-imag(x), real(x)) // z = i * x + z = Atan(z) + return complex(imag(z), -real(z)) // z = -i * z +} diff --git a/src/math/cmplx/cmath_test.go b/src/math/cmplx/cmath_test.go new file mode 100644 index 000000000..f285646af --- /dev/null +++ b/src/math/cmplx/cmath_test.go @@ -0,0 +1,866 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import ( + "math" + "testing" +) + +var vc26 = []complex128{ + (4.97901192488367350108546816 + 7.73887247457810456552351752i), + (7.73887247457810456552351752 - 0.27688005719200159404635997i), + (-0.27688005719200159404635997 - 5.01060361827107492160848778i), + (-5.01060361827107492160848778 + 9.63629370719841737980004837i), + (9.63629370719841737980004837 + 2.92637723924396464525443662i), + (2.92637723924396464525443662 + 5.22908343145930665230025625i), + (5.22908343145930665230025625 + 2.72793991043601025126008608i), + (2.72793991043601025126008608 + 1.82530809168085506044576505i), + (1.82530809168085506044576505 - 8.68592476857560136238589621i), + (-8.68592476857560136238589621 + 4.97901192488367350108546816i), +} +var vc = []complex128{ + (4.9790119248836735e+00 + 7.7388724745781045e+00i), + (7.7388724745781045e+00 - 2.7688005719200159e-01i), + (-2.7688005719200159e-01 - 5.0106036182710749e+00i), + (-5.0106036182710749e+00 + 9.6362937071984173e+00i), + (9.6362937071984173e+00 + 2.9263772392439646e+00i), + (2.9263772392439646e+00 + 5.2290834314593066e+00i), + (5.2290834314593066e+00 + 2.7279399104360102e+00i), + (2.7279399104360102e+00 + 1.8253080916808550e+00i), + (1.8253080916808550e+00 - 8.6859247685756013e+00i), + (-8.6859247685756013e+00 + 4.9790119248836735e+00i), +} + +// The expected results below were computed by the high precision calculators +// at http://keisan.casio.com/. More exact input values (array vc[], above) +// were obtained by printing them with "%.26f". The answers were calculated +// to 26 digits (by using the "Digit number" drop-down control of each +// calculator). + +var abs = []float64{ + 9.2022120669932650313380972e+00, + 7.7438239742296106616261394e+00, + 5.0182478202557746902556648e+00, + 1.0861137372799545160704002e+01, + 1.0070841084922199607011905e+01, + 5.9922447613166942183705192e+00, + 5.8978784056736762299945176e+00, + 3.2822866700678709020367184e+00, + 8.8756430028990417290744307e+00, + 1.0011785496777731986390856e+01, +} + +var acos = []complex128{ + (1.0017679804707456328694569 - 2.9138232718554953784519807i), + (0.03606427612041407369636057 + 2.7358584434576260925091256i), + (1.6249365462333796703711823 + 2.3159537454335901187730929i), + (2.0485650849650740120660391 - 3.0795576791204117911123886i), + (0.29621132089073067282488147 - 3.0007392508200622519398814i), + (1.0664555914934156601503632 - 2.4872865024796011364747111i), + (0.48681307452231387690013905 - 2.463655912283054555225301i), + (0.6116977071277574248407752 - 1.8734458851737055262693056i), + (1.3649311280370181331184214 + 2.8793528632328795424123832i), + (2.6189310485682988308904501 - 2.9956543302898767795858704i), +} +var acosh = []complex128{ + (2.9138232718554953784519807 + 1.0017679804707456328694569i), + (2.7358584434576260925091256 - 0.03606427612041407369636057i), + (2.3159537454335901187730929 - 1.6249365462333796703711823i), + (3.0795576791204117911123886 + 2.0485650849650740120660391i), + (3.0007392508200622519398814 + 0.29621132089073067282488147i), + (2.4872865024796011364747111 + 1.0664555914934156601503632i), + (2.463655912283054555225301 + 0.48681307452231387690013905i), + (1.8734458851737055262693056 + 0.6116977071277574248407752i), + (2.8793528632328795424123832 - 1.3649311280370181331184214i), + (2.9956543302898767795858704 + 2.6189310485682988308904501i), +} +var asin = []complex128{ + (0.56902834632415098636186476 + 2.9138232718554953784519807i), + (1.5347320506744825455349611 - 2.7358584434576260925091256i), + (-0.054140219438483051139860579 - 2.3159537454335901187730929i), + (-0.47776875817017739283471738 + 3.0795576791204117911123886i), + (1.2745850059041659464064402 + 3.0007392508200622519398814i), + (0.50434073530148095908095852 + 2.4872865024796011364747111i), + (1.0839832522725827423311826 + 2.463655912283054555225301i), + (0.9590986196671391943905465 + 1.8734458851737055262693056i), + (0.20586519875787848611290031 - 2.8793528632328795424123832i), + (-1.0481347217734022116591284 + 2.9956543302898767795858704i), +} +var asinh = []complex128{ + (2.9113760469415295679342185 + 0.99639459545704326759805893i), + (2.7441755423994259061579029 - 0.035468308789000500601119392i), + (-2.2962136462520690506126678 - 1.5144663565690151885726707i), + (-3.0771233459295725965402455 + 1.0895577967194013849422294i), + (3.0048366100923647417557027 + 0.29346979169819220036454168i), + (2.4800059370795363157364643 + 1.0545868606049165710424232i), + (2.4718773838309585611141821 + 0.47502344364250803363708842i), + (1.8910743588080159144378396 + 0.56882925572563602341139174i), + (2.8735426423367341878069406 - 1.362376149648891420997548i), + (-2.9981750586172477217567878 + 0.5183571985225367505624207i), +} +var atan = []complex128{ + (1.5115747079332741358607654 + 0.091324403603954494382276776i), + (1.4424504323482602560806727 - 0.0045416132642803911503770933i), + (-1.5593488703630532674484026 - 0.20163295409248362456446431i), + (-1.5280619472445889867794105 + 0.081721556230672003746956324i), + (1.4759909163240799678221039 + 0.028602969320691644358773586i), + (1.4877353772046548932715555 + 0.14566877153207281663773599i), + (1.4206983927779191889826 + 0.076830486127880702249439993i), + (1.3162236060498933364869556 + 0.16031313000467530644933363i), + (1.5473450684303703578810093 - 0.11064907507939082484935782i), + (-1.4841462340185253987375812 + 0.049341850305024399493142411i), +} +var atanh = []complex128{ + (0.058375027938968509064640438 + 1.4793488495105334458167782i), + (0.12977343497790381229915667 - 1.5661009410463561327262499i), + (-0.010576456067347252072200088 - 1.3743698658402284549750563i), + (-0.042218595678688358882784918 + 1.4891433968166405606692604i), + (0.095218997991316722061828397 + 1.5416884098777110330499698i), + (0.079965459366890323857556487 + 1.4252510353873192700350435i), + (0.15051245471980726221708301 + 1.4907432533016303804884461i), + (0.25082072933993987714470373 + 1.392057665392187516442986i), + (0.022896108815797135846276662 - 1.4609224989282864208963021i), + (-0.08665624101841876130537396 + 1.5207902036935093480142159i), +} +var conj = []complex128{ + (4.9790119248836735e+00 - 7.7388724745781045e+00i), + (7.7388724745781045e+00 + 2.7688005719200159e-01i), + (-2.7688005719200159e-01 + 5.0106036182710749e+00i), + (-5.0106036182710749e+00 - 9.6362937071984173e+00i), + (9.6362937071984173e+00 - 2.9263772392439646e+00i), + (2.9263772392439646e+00 - 5.2290834314593066e+00i), + (5.2290834314593066e+00 - 2.7279399104360102e+00i), + (2.7279399104360102e+00 - 1.8253080916808550e+00i), + (1.8253080916808550e+00 + 8.6859247685756013e+00i), + (-8.6859247685756013e+00 - 4.9790119248836735e+00i), +} +var cos = []complex128{ + (3.024540920601483938336569e+02 + 1.1073797572517071650045357e+03i), + (1.192858682649064973252758e-01 + 2.7857554122333065540970207e-01i), + (7.2144394304528306603857962e+01 - 2.0500129667076044169954205e+01i), + (2.24921952538403984190541e+03 - 7.317363745602773587049329e+03i), + (-9.148222970032421760015498e+00 + 1.953124661113563541862227e+00i), + (-9.116081175857732248227078e+01 - 1.992669213569952232487371e+01i), + (3.795639179042704640002918e+00 + 6.623513350981458399309662e+00i), + (-2.9144840732498869560679084e+00 - 1.214620271628002917638748e+00i), + (-7.45123482501299743872481e+02 + 2.8641692314488080814066734e+03i), + (-5.371977967039319076416747e+01 + 4.893348341339375830564624e+01i), +} +var cosh = []complex128{ + (8.34638383523018249366948e+00 + 7.2181057886425846415112064e+01i), + (1.10421967379919366952251e+03 - 3.1379638689277575379469861e+02i), + (3.051485206773701584738512e-01 - 2.6805384730105297848044485e-01i), + (-7.33294728684187933370938e+01 + 1.574445942284918251038144e+01i), + (-7.478643293945957535757355e+03 + 1.6348382209913353929473321e+03i), + (4.622316522966235701630926e+00 - 8.088695185566375256093098e+00i), + (-8.544333183278877406197712e+01 + 3.7505836120128166455231717e+01i), + (-1.934457815021493925115198e+00 + 7.3725859611767228178358673e+00i), + (-2.352958770061749348353548e+00 - 2.034982010440878358915409e+00i), + (7.79756457532134748165069e+02 + 2.8549350716819176560377717e+03i), +} +var exp = []complex128{ + (1.669197736864670815125146e+01 + 1.4436895109507663689174096e+02i), + (2.2084389286252583447276212e+03 - 6.2759289284909211238261917e+02i), + (2.227538273122775173434327e-01 + 7.2468284028334191250470034e-01i), + (-6.5182985958153548997881627e-03 - 1.39965837915193860879044e-03i), + (-1.4957286524084015746110777e+04 + 3.269676455931135688988042e+03i), + (9.218158701983105935659273e+00 - 1.6223985291084956009304582e+01i), + (-1.7088175716853040841444505e+02 + 7.501382609870410713795546e+01i), + (-3.852461315830959613132505e+00 + 1.4808420423156073221970892e+01i), + (-4.586775503301407379786695e+00 - 4.178501081246873415144744e+00i), + (4.451337963005453491095747e-05 - 1.62977574205442915935263e-04i), +} +var log = []complex128{ + (2.2194438972179194425697051e+00 + 9.9909115046919291062461269e-01i), + (2.0468956191154167256337289e+00 - 3.5762575021856971295156489e-02i), + (1.6130808329853860438751244e+00 - 1.6259990074019058442232221e+00i), + (2.3851910394823008710032651e+00 + 2.0502936359659111755031062e+00i), + (2.3096442270679923004800651e+00 + 2.9483213155446756211881774e-01i), + (1.7904660933974656106951860e+00 + 1.0605860367252556281902109e+00i), + (1.7745926939841751666177512e+00 + 4.8084556083358307819310911e-01i), + (1.1885403350045342425648780e+00 + 5.8969634164776659423195222e-01i), + (2.1833107837679082586772505e+00 - 1.3636647724582455028314573e+00i), + (2.3037629487273259170991671e+00 + 2.6210913895386013290915234e+00i), +} +var log10 = []complex128{ + (9.6389223745559042474184943e-01 + 4.338997735671419492599631e-01i), + (8.8895547241376579493490892e-01 - 1.5531488990643548254864806e-02i), + (7.0055210462945412305244578e-01 - 7.0616239649481243222248404e-01i), + (1.0358753067322445311676952e+00 + 8.9043121238134980156490909e-01i), + (1.003065742975330237172029e+00 + 1.2804396782187887479857811e-01i), + (7.7758954439739162532085157e-01 + 4.6060666333341810869055108e-01i), + (7.7069581462315327037689152e-01 + 2.0882857371769952195512475e-01i), + (5.1617650901191156135137239e-01 + 2.5610186717615977620363299e-01i), + (9.4819982567026639742663212e-01 - 5.9223208584446952284914289e-01i), + (1.0005115362454417135973429e+00 + 1.1383255270407412817250921e+00i), +} + +type ff struct { + r, theta float64 +} + +var polar = []ff{ + {9.2022120669932650313380972e+00, 9.9909115046919291062461269e-01}, + {7.7438239742296106616261394e+00, -3.5762575021856971295156489e-02}, + {5.0182478202557746902556648e+00, -1.6259990074019058442232221e+00}, + {1.0861137372799545160704002e+01, 2.0502936359659111755031062e+00}, + {1.0070841084922199607011905e+01, 2.9483213155446756211881774e-01}, + {5.9922447613166942183705192e+00, 1.0605860367252556281902109e+00}, + {5.8978784056736762299945176e+00, 4.8084556083358307819310911e-01}, + {3.2822866700678709020367184e+00, 5.8969634164776659423195222e-01}, + {8.8756430028990417290744307e+00, -1.3636647724582455028314573e+00}, + {1.0011785496777731986390856e+01, 2.6210913895386013290915234e+00}, +} +var pow = []complex128{ + (-2.499956739197529585028819e+00 + 1.759751724335650228957144e+00i), + (7.357094338218116311191939e+04 - 5.089973412479151648145882e+04i), + (1.320777296067768517259592e+01 - 3.165621914333901498921986e+01i), + (-3.123287828297300934072149e-07 - 1.9849567521490553032502223E-7i), + (8.0622651468477229614813e+04 - 7.80028727944573092944363e+04i), + (-1.0268824572103165858577141e+00 - 4.716844738244989776610672e-01i), + (-4.35953819012244175753187e+01 + 2.2036445974645306917648585e+02i), + (8.3556092283250594950239e-01 - 1.2261571947167240272593282e+01i), + (1.582292972120769306069625e+03 + 1.273564263524278244782512e+04i), + (6.592208301642122149025369e-08 + 2.584887236651661903526389e-08i), +} +var sin = []complex128{ + (-1.1073801774240233539648544e+03 + 3.024539773002502192425231e+02i), + (1.0317037521400759359744682e+00 - 3.2208979799929570242818e-02i), + (-2.0501952097271429804261058e+01 - 7.2137981348240798841800967e+01i), + (7.3173638080346338642193078e+03 + 2.249219506193664342566248e+03i), + (-1.964375633631808177565226e+00 - 9.0958264713870404464159683e+00i), + (1.992783647158514838337674e+01 - 9.11555769410191350416942e+01i), + (-6.680335650741921444300349e+00 + 3.763353833142432513086117e+00i), + (1.2794028166657459148245993e+00 - 2.7669092099795781155109602e+00i), + (2.8641693949535259594188879e+03 + 7.451234399649871202841615e+02i), + (-4.893811726244659135553033e+01 - 5.371469305562194635957655e+01i), +} +var sinh = []complex128{ + (8.34559353341652565758198e+00 + 7.2187893208650790476628899e+01i), + (1.1042192548260646752051112e+03 - 3.1379650595631635858792056e+02i), + (-8.239469336509264113041849e-02 + 9.9273668758439489098514519e-01i), + (7.332295456982297798219401e+01 - 1.574585908122833444899023e+01i), + (-7.4786432301380582103534216e+03 + 1.63483823493980029604071e+03i), + (4.595842179016870234028347e+00 - 8.135290105518580753211484e+00i), + (-8.543842533574163435246793e+01 + 3.750798997857594068272375e+01i), + (-1.918003500809465688017307e+00 + 7.4358344619793504041350251e+00i), + (-2.233816733239658031433147e+00 - 2.143519070805995056229335e+00i), + (-7.797564130187551181105341e+02 - 2.8549352346594918614806877e+03i), +} +var sqrt = []complex128{ + (2.6628203086086130543813948e+00 + 1.4531345674282185229796902e+00i), + (2.7823278427251986247149295e+00 - 4.9756907317005224529115567e-02i), + (1.5397025302089642757361015e+00 - 1.6271336573016637535695727e+00i), + (1.7103411581506875260277898e+00 + 2.8170677122737589676157029e+00i), + (3.1390392472953103383607947e+00 + 4.6612625849858653248980849e-01i), + (2.1117080764822417640789287e+00 + 1.2381170223514273234967850e+00i), + (2.3587032281672256703926939e+00 + 5.7827111903257349935720172e-01i), + (1.7335262588873410476661577e+00 + 5.2647258220721269141550382e-01i), + (2.3131094974708716531499282e+00 - 1.8775429304303785570775490e+00i), + (8.1420535745048086240947359e-01 + 3.0575897587277248522656113e+00i), +} +var tan = []complex128{ + (-1.928757919086441129134525e-07 + 1.0000003267499169073251826e+00i), + (1.242412685364183792138948e+00 - 3.17149693883133370106696e+00i), + (-4.6745126251587795225571826e-05 - 9.9992439225263959286114298e-01i), + (4.792363401193648192887116e-09 + 1.0000000070589333451557723e+00i), + (2.345740824080089140287315e-03 + 9.947733046570988661022763e-01i), + (-2.396030789494815566088809e-05 + 9.9994781345418591429826779e-01i), + (-7.370204836644931340905303e-03 + 1.0043553413417138987717748e+00i), + (-3.691803847992048527007457e-02 + 9.6475071993469548066328894e-01i), + (-2.781955256713729368401878e-08 - 1.000000049848910609006646e+00i), + (9.4281590064030478879791249e-05 + 9.9999119340863718183758545e-01i), +} +var tanh = []complex128{ + (1.0000921981225144748819918e+00 + 2.160986245871518020231507e-05i), + (9.9999967727531993209562591e-01 - 1.9953763222959658873657676e-07i), + (-1.765485739548037260789686e+00 + 1.7024216325552852445168471e+00i), + (-9.999189442732736452807108e-01 + 3.64906070494473701938098e-05i), + (9.9999999224622333738729767e-01 - 3.560088949517914774813046e-09i), + (1.0029324933367326862499343e+00 - 4.948790309797102353137528e-03i), + (9.9996113064788012488693567e-01 - 4.226995742097032481451259e-05i), + (1.0074784189316340029873945e+00 - 4.194050814891697808029407e-03i), + (9.9385534229718327109131502e-01 + 5.144217985914355502713437e-02i), + (-1.0000000491604982429364892e+00 - 2.901873195374433112227349e-08i), +} + +// special cases +var vcAbsSC = []complex128{ + NaN(), +} +var absSC = []float64{ + math.NaN(), +} +var vcAcosSC = []complex128{ + NaN(), +} +var acosSC = []complex128{ + NaN(), +} +var vcAcoshSC = []complex128{ + NaN(), +} +var acoshSC = []complex128{ + NaN(), +} +var vcAsinSC = []complex128{ + NaN(), +} +var asinSC = []complex128{ + NaN(), +} +var vcAsinhSC = []complex128{ + NaN(), +} +var asinhSC = []complex128{ + NaN(), +} +var vcAtanSC = []complex128{ + NaN(), +} +var atanSC = []complex128{ + NaN(), +} +var vcAtanhSC = []complex128{ + NaN(), +} +var atanhSC = []complex128{ + NaN(), +} +var vcConjSC = []complex128{ + NaN(), +} +var conjSC = []complex128{ + NaN(), +} +var vcCosSC = []complex128{ + NaN(), +} +var cosSC = []complex128{ + NaN(), +} +var vcCoshSC = []complex128{ + NaN(), +} +var coshSC = []complex128{ + NaN(), +} +var vcExpSC = []complex128{ + NaN(), +} +var expSC = []complex128{ + NaN(), +} +var vcIsNaNSC = []complex128{ + complex(math.Inf(-1), math.Inf(-1)), + complex(math.Inf(-1), math.NaN()), + complex(math.NaN(), math.Inf(-1)), + complex(0, math.NaN()), + complex(math.NaN(), 0), + complex(math.Inf(1), math.Inf(1)), + complex(math.Inf(1), math.NaN()), + complex(math.NaN(), math.Inf(1)), + complex(math.NaN(), math.NaN()), +} +var isNaNSC = []bool{ + false, + false, + false, + true, + true, + false, + false, + false, + true, +} +var vcLogSC = []complex128{ + NaN(), +} +var logSC = []complex128{ + NaN(), +} +var vcLog10SC = []complex128{ + NaN(), +} +var log10SC = []complex128{ + NaN(), +} +var vcPolarSC = []complex128{ + NaN(), +} +var polarSC = []ff{ + {math.NaN(), math.NaN()}, +} +var vcPowSC = [][2]complex128{ + {NaN(), NaN()}, +} +var powSC = []complex128{ + NaN(), +} +var vcSinSC = []complex128{ + NaN(), +} +var sinSC = []complex128{ + NaN(), +} +var vcSinhSC = []complex128{ + NaN(), +} +var sinhSC = []complex128{ + NaN(), +} +var vcSqrtSC = []complex128{ + NaN(), +} +var sqrtSC = []complex128{ + NaN(), +} +var vcTanSC = []complex128{ + NaN(), +} +var tanSC = []complex128{ + NaN(), +} +var vcTanhSC = []complex128{ + NaN(), +} +var tanhSC = []complex128{ + NaN(), +} + +// functions borrowed from pkg/math/all_test.go +func tolerance(a, b, e float64) bool { + d := a - b + if d < 0 { + d = -d + } + + if a != 0 { + e = e * a + if e < 0 { + e = -e + } + } + return d < e +} +func soclose(a, b, e float64) bool { return tolerance(a, b, e) } +func veryclose(a, b float64) bool { return tolerance(a, b, 4e-16) } +func alike(a, b float64) bool { + switch { + case a != a && b != b: // math.IsNaN(a) && math.IsNaN(b): + return true + case a == b: + return math.Signbit(a) == math.Signbit(b) + } + return false +} + +func cTolerance(a, b complex128, e float64) bool { + d := Abs(a - b) + if a != 0 { + e = e * Abs(a) + if e < 0 { + e = -e + } + } + return d < e +} +func cSoclose(a, b complex128, e float64) bool { return cTolerance(a, b, e) } +func cVeryclose(a, b complex128) bool { return cTolerance(a, b, 4e-16) } +func cAlike(a, b complex128) bool { + switch { + case IsNaN(a) && IsNaN(b): + return true + case a == b: + return math.Signbit(real(a)) == math.Signbit(real(b)) && math.Signbit(imag(a)) == math.Signbit(imag(b)) + } + return false +} + +func TestAbs(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Abs(vc[i]); !veryclose(abs[i], f) { + t.Errorf("Abs(%g) = %g, want %g", vc[i], f, abs[i]) + } + } + for i := 0; i < len(vcAbsSC); i++ { + if f := Abs(vcAbsSC[i]); !alike(absSC[i], f) { + t.Errorf("Abs(%g) = %g, want %g", vcAbsSC[i], f, absSC[i]) + } + } +} +func TestAcos(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Acos(vc[i]); !cSoclose(acos[i], f, 1e-14) { + t.Errorf("Acos(%g) = %g, want %g", vc[i], f, acos[i]) + } + } + for i := 0; i < len(vcAcosSC); i++ { + if f := Acos(vcAcosSC[i]); !cAlike(acosSC[i], f) { + t.Errorf("Acos(%g) = %g, want %g", vcAcosSC[i], f, acosSC[i]) + } + } +} +func TestAcosh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Acosh(vc[i]); !cSoclose(acosh[i], f, 1e-14) { + t.Errorf("Acosh(%g) = %g, want %g", vc[i], f, acosh[i]) + } + } + for i := 0; i < len(vcAcoshSC); i++ { + if f := Acosh(vcAcoshSC[i]); !cAlike(acoshSC[i], f) { + t.Errorf("Acosh(%g) = %g, want %g", vcAcoshSC[i], f, acoshSC[i]) + } + } +} +func TestAsin(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Asin(vc[i]); !cSoclose(asin[i], f, 1e-14) { + t.Errorf("Asin(%g) = %g, want %g", vc[i], f, asin[i]) + } + } + for i := 0; i < len(vcAsinSC); i++ { + if f := Asin(vcAsinSC[i]); !cAlike(asinSC[i], f) { + t.Errorf("Asin(%g) = %g, want %g", vcAsinSC[i], f, asinSC[i]) + } + } +} +func TestAsinh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Asinh(vc[i]); !cSoclose(asinh[i], f, 4e-15) { + t.Errorf("Asinh(%g) = %g, want %g", vc[i], f, asinh[i]) + } + } + for i := 0; i < len(vcAsinhSC); i++ { + if f := Asinh(vcAsinhSC[i]); !cAlike(asinhSC[i], f) { + t.Errorf("Asinh(%g) = %g, want %g", vcAsinhSC[i], f, asinhSC[i]) + } + } +} +func TestAtan(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Atan(vc[i]); !cVeryclose(atan[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vc[i], f, atan[i]) + } + } + for i := 0; i < len(vcAtanSC); i++ { + if f := Atan(vcAtanSC[i]); !cAlike(atanSC[i], f) { + t.Errorf("Atan(%g) = %g, want %g", vcAtanSC[i], f, atanSC[i]) + } + } +} +func TestAtanh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Atanh(vc[i]); !cVeryclose(atanh[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", vc[i], f, atanh[i]) + } + } + for i := 0; i < len(vcAtanhSC); i++ { + if f := Atanh(vcAtanhSC[i]); !cAlike(atanhSC[i], f) { + t.Errorf("Atanh(%g) = %g, want %g", vcAtanhSC[i], f, atanhSC[i]) + } + } +} +func TestConj(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Conj(vc[i]); !cVeryclose(conj[i], f) { + t.Errorf("Conj(%g) = %g, want %g", vc[i], f, conj[i]) + } + } + for i := 0; i < len(vcConjSC); i++ { + if f := Conj(vcConjSC[i]); !cAlike(conjSC[i], f) { + t.Errorf("Conj(%g) = %g, want %g", vcConjSC[i], f, conjSC[i]) + } + } +} +func TestCos(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Cos(vc[i]); !cSoclose(cos[i], f, 3e-15) { + t.Errorf("Cos(%g) = %g, want %g", vc[i], f, cos[i]) + } + } + for i := 0; i < len(vcCosSC); i++ { + if f := Cos(vcCosSC[i]); !cAlike(cosSC[i], f) { + t.Errorf("Cos(%g) = %g, want %g", vcCosSC[i], f, cosSC[i]) + } + } +} +func TestCosh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Cosh(vc[i]); !cSoclose(cosh[i], f, 2e-15) { + t.Errorf("Cosh(%g) = %g, want %g", vc[i], f, cosh[i]) + } + } + for i := 0; i < len(vcCoshSC); i++ { + if f := Cosh(vcCoshSC[i]); !cAlike(coshSC[i], f) { + t.Errorf("Cosh(%g) = %g, want %g", vcCoshSC[i], f, coshSC[i]) + } + } +} +func TestExp(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Exp(vc[i]); !cSoclose(exp[i], f, 1e-15) { + t.Errorf("Exp(%g) = %g, want %g", vc[i], f, exp[i]) + } + } + for i := 0; i < len(vcExpSC); i++ { + if f := Exp(vcExpSC[i]); !cAlike(expSC[i], f) { + t.Errorf("Exp(%g) = %g, want %g", vcExpSC[i], f, expSC[i]) + } + } +} +func TestIsNaN(t *testing.T) { + for i := 0; i < len(vcIsNaNSC); i++ { + if f := IsNaN(vcIsNaNSC[i]); isNaNSC[i] != f { + t.Errorf("IsNaN(%v) = %v, want %v", vcIsNaNSC[i], f, isNaNSC[i]) + } + } +} +func TestLog(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Log(vc[i]); !cVeryclose(log[i], f) { + t.Errorf("Log(%g) = %g, want %g", vc[i], f, log[i]) + } + } + for i := 0; i < len(vcLogSC); i++ { + if f := Log(vcLogSC[i]); !cAlike(logSC[i], f) { + t.Errorf("Log(%g) = %g, want %g", vcLogSC[i], f, logSC[i]) + } + } +} +func TestLog10(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Log10(vc[i]); !cVeryclose(log10[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vc[i], f, log10[i]) + } + } + for i := 0; i < len(vcLog10SC); i++ { + if f := Log10(vcLog10SC[i]); !cAlike(log10SC[i], f) { + t.Errorf("Log10(%g) = %g, want %g", vcLog10SC[i], f, log10SC[i]) + } + } +} +func TestPolar(t *testing.T) { + for i := 0; i < len(vc); i++ { + if r, theta := Polar(vc[i]); !veryclose(polar[i].r, r) && !veryclose(polar[i].theta, theta) { + t.Errorf("Polar(%g) = %g, %g want %g, %g", vc[i], r, theta, polar[i].r, polar[i].theta) + } + } + for i := 0; i < len(vcPolarSC); i++ { + if r, theta := Polar(vcPolarSC[i]); !alike(polarSC[i].r, r) && !alike(polarSC[i].theta, theta) { + t.Errorf("Polar(%g) = %g, %g, want %g, %g", vcPolarSC[i], r, theta, polarSC[i].r, polarSC[i].theta) + } + } +} +func TestPow(t *testing.T) { + // Special cases for Pow(0, c). + var zero = complex(0, 0) + zeroPowers := [][2]complex128{ + {0, 1 + 0i}, + {1.5, 0 + 0i}, + {-1.5, complex(math.Inf(0), 0)}, + {-1.5 + 1.5i, Inf()}, + } + for _, zp := range zeroPowers { + if f := Pow(zero, zp[0]); f != zp[1] { + t.Errorf("Pow(%g, %g) = %g, want %g", zero, zp[0], f, zp[1]) + } + } + var a = complex(3.0, 3.0) + for i := 0; i < len(vc); i++ { + if f := Pow(a, vc[i]); !cSoclose(pow[i], f, 4e-15) { + t.Errorf("Pow(%g, %g) = %g, want %g", a, vc[i], f, pow[i]) + } + } + for i := 0; i < len(vcPowSC); i++ { + if f := Pow(vcPowSC[i][0], vcPowSC[i][0]); !cAlike(powSC[i], f) { + t.Errorf("Pow(%g, %g) = %g, want %g", vcPowSC[i][0], vcPowSC[i][0], f, powSC[i]) + } + } +} +func TestRect(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Rect(polar[i].r, polar[i].theta); !cVeryclose(vc[i], f) { + t.Errorf("Rect(%g, %g) = %g want %g", polar[i].r, polar[i].theta, f, vc[i]) + } + } + for i := 0; i < len(vcPolarSC); i++ { + if f := Rect(polarSC[i].r, polarSC[i].theta); !cAlike(vcPolarSC[i], f) { + t.Errorf("Rect(%g, %g) = %g, want %g", polarSC[i].r, polarSC[i].theta, f, vcPolarSC[i]) + } + } +} +func TestSin(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Sin(vc[i]); !cSoclose(sin[i], f, 2e-15) { + t.Errorf("Sin(%g) = %g, want %g", vc[i], f, sin[i]) + } + } + for i := 0; i < len(vcSinSC); i++ { + if f := Sin(vcSinSC[i]); !cAlike(sinSC[i], f) { + t.Errorf("Sin(%g) = %g, want %g", vcSinSC[i], f, sinSC[i]) + } + } +} +func TestSinh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Sinh(vc[i]); !cSoclose(sinh[i], f, 2e-15) { + t.Errorf("Sinh(%g) = %g, want %g", vc[i], f, sinh[i]) + } + } + for i := 0; i < len(vcSinhSC); i++ { + if f := Sinh(vcSinhSC[i]); !cAlike(sinhSC[i], f) { + t.Errorf("Sinh(%g) = %g, want %g", vcSinhSC[i], f, sinhSC[i]) + } + } +} +func TestSqrt(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Sqrt(vc[i]); !cVeryclose(sqrt[i], f) { + t.Errorf("Sqrt(%g) = %g, want %g", vc[i], f, sqrt[i]) + } + } + for i := 0; i < len(vcSqrtSC); i++ { + if f := Sqrt(vcSqrtSC[i]); !cAlike(sqrtSC[i], f) { + t.Errorf("Sqrt(%g) = %g, want %g", vcSqrtSC[i], f, sqrtSC[i]) + } + } +} +func TestTan(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Tan(vc[i]); !cSoclose(tan[i], f, 3e-15) { + t.Errorf("Tan(%g) = %g, want %g", vc[i], f, tan[i]) + } + } + for i := 0; i < len(vcTanSC); i++ { + if f := Tan(vcTanSC[i]); !cAlike(tanSC[i], f) { + t.Errorf("Tan(%g) = %g, want %g", vcTanSC[i], f, tanSC[i]) + } + } +} +func TestTanh(t *testing.T) { + for i := 0; i < len(vc); i++ { + if f := Tanh(vc[i]); !cSoclose(tanh[i], f, 2e-15) { + t.Errorf("Tanh(%g) = %g, want %g", vc[i], f, tanh[i]) + } + } + for i := 0; i < len(vcTanhSC); i++ { + if f := Tanh(vcTanhSC[i]); !cAlike(tanhSC[i], f) { + t.Errorf("Tanh(%g) = %g, want %g", vcTanhSC[i], f, tanhSC[i]) + } + } +} + +func BenchmarkAbs(b *testing.B) { + for i := 0; i < b.N; i++ { + Abs(complex(2.5, 3.5)) + } +} +func BenchmarkAcos(b *testing.B) { + for i := 0; i < b.N; i++ { + Acos(complex(2.5, 3.5)) + } +} +func BenchmarkAcosh(b *testing.B) { + for i := 0; i < b.N; i++ { + Acosh(complex(2.5, 3.5)) + } +} +func BenchmarkAsin(b *testing.B) { + for i := 0; i < b.N; i++ { + Asin(complex(2.5, 3.5)) + } +} +func BenchmarkAsinh(b *testing.B) { + for i := 0; i < b.N; i++ { + Asinh(complex(2.5, 3.5)) + } +} +func BenchmarkAtan(b *testing.B) { + for i := 0; i < b.N; i++ { + Atan(complex(2.5, 3.5)) + } +} +func BenchmarkAtanh(b *testing.B) { + for i := 0; i < b.N; i++ { + Atanh(complex(2.5, 3.5)) + } +} +func BenchmarkConj(b *testing.B) { + for i := 0; i < b.N; i++ { + Conj(complex(2.5, 3.5)) + } +} +func BenchmarkCos(b *testing.B) { + for i := 0; i < b.N; i++ { + Cos(complex(2.5, 3.5)) + } +} +func BenchmarkCosh(b *testing.B) { + for i := 0; i < b.N; i++ { + Cosh(complex(2.5, 3.5)) + } +} +func BenchmarkExp(b *testing.B) { + for i := 0; i < b.N; i++ { + Exp(complex(2.5, 3.5)) + } +} +func BenchmarkLog(b *testing.B) { + for i := 0; i < b.N; i++ { + Log(complex(2.5, 3.5)) + } +} +func BenchmarkLog10(b *testing.B) { + for i := 0; i < b.N; i++ { + Log10(complex(2.5, 3.5)) + } +} +func BenchmarkPhase(b *testing.B) { + for i := 0; i < b.N; i++ { + Phase(complex(2.5, 3.5)) + } +} +func BenchmarkPolar(b *testing.B) { + for i := 0; i < b.N; i++ { + Polar(complex(2.5, 3.5)) + } +} +func BenchmarkPow(b *testing.B) { + for i := 0; i < b.N; i++ { + Pow(complex(2.5, 3.5), complex(2.5, 3.5)) + } +} +func BenchmarkRect(b *testing.B) { + for i := 0; i < b.N; i++ { + Rect(2.5, 1.5) + } +} +func BenchmarkSin(b *testing.B) { + for i := 0; i < b.N; i++ { + Sin(complex(2.5, 3.5)) + } +} +func BenchmarkSinh(b *testing.B) { + for i := 0; i < b.N; i++ { + Sinh(complex(2.5, 3.5)) + } +} +func BenchmarkSqrt(b *testing.B) { + for i := 0; i < b.N; i++ { + Sqrt(complex(2.5, 3.5)) + } +} +func BenchmarkTan(b *testing.B) { + for i := 0; i < b.N; i++ { + Tan(complex(2.5, 3.5)) + } +} +func BenchmarkTanh(b *testing.B) { + for i := 0; i < b.N; i++ { + Tanh(complex(2.5, 3.5)) + } +} diff --git a/src/math/cmplx/conj.go b/src/math/cmplx/conj.go new file mode 100644 index 000000000..34a4277c1 --- /dev/null +++ b/src/math/cmplx/conj.go @@ -0,0 +1,8 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +// Conj returns the complex conjugate of x. +func Conj(x complex128) complex128 { return complex(real(x), -imag(x)) } diff --git a/src/math/cmplx/exp.go b/src/math/cmplx/exp.go new file mode 100644 index 000000000..485ed2c78 --- /dev/null +++ b/src/math/cmplx/exp.go @@ -0,0 +1,55 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex exponential function +// +// DESCRIPTION: +// +// Returns the complex exponential of the complex argument z. +// +// If +// z = x + iy, +// r = exp(x), +// then +// w = r cos y + i r sin y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 8700 3.7e-17 1.1e-17 +// IEEE -10,+10 30000 3.0e-16 8.7e-17 + +// Exp returns e**x, the base-e exponential of x. +func Exp(x complex128) complex128 { + r := math.Exp(real(x)) + s, c := math.Sincos(imag(x)) + return complex(r*c, r*s) +} diff --git a/src/math/cmplx/isinf.go b/src/math/cmplx/isinf.go new file mode 100644 index 000000000..d5a65b44b --- /dev/null +++ b/src/math/cmplx/isinf.go @@ -0,0 +1,21 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// IsInf returns true if either real(x) or imag(x) is an infinity. +func IsInf(x complex128) bool { + if math.IsInf(real(x), 0) || math.IsInf(imag(x), 0) { + return true + } + return false +} + +// Inf returns a complex infinity, complex(+Inf, +Inf). +func Inf() complex128 { + inf := math.Inf(1) + return complex(inf, inf) +} diff --git a/src/math/cmplx/isnan.go b/src/math/cmplx/isnan.go new file mode 100644 index 000000000..05d0cce63 --- /dev/null +++ b/src/math/cmplx/isnan.go @@ -0,0 +1,25 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// IsNaN returns true if either real(x) or imag(x) is NaN +// and neither is an infinity. +func IsNaN(x complex128) bool { + switch { + case math.IsInf(real(x), 0) || math.IsInf(imag(x), 0): + return false + case math.IsNaN(real(x)) || math.IsNaN(imag(x)): + return true + } + return false +} + +// NaN returns a complex ``not-a-number'' value. +func NaN() complex128 { + nan := math.NaN() + return complex(nan, nan) +} diff --git a/src/math/cmplx/log.go b/src/math/cmplx/log.go new file mode 100644 index 000000000..881a064d8 --- /dev/null +++ b/src/math/cmplx/log.go @@ -0,0 +1,64 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex natural logarithm +// +// DESCRIPTION: +// +// Returns complex logarithm to the base e (2.718...) of +// the complex argument z. +// +// If +// z = x + iy, r = sqrt( x**2 + y**2 ), +// then +// w = log(r) + i arctan(y/x). +// +// The arctangent ranges from -PI to +PI. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 7000 8.5e-17 1.9e-17 +// IEEE -10,+10 30000 5.0e-15 1.1e-16 +// +// Larger relative error can be observed for z near 1 +i0. +// In IEEE arithmetic the peak absolute error is 5.2e-16, rms +// absolute error 1.0e-16. + +// Log returns the natural logarithm of x. +func Log(x complex128) complex128 { + return complex(math.Log(Abs(x)), Phase(x)) +} + +// Log10 returns the decimal logarithm of x. +func Log10(x complex128) complex128 { + return math.Log10E * Log(x) +} diff --git a/src/math/cmplx/phase.go b/src/math/cmplx/phase.go new file mode 100644 index 000000000..03cece8a5 --- /dev/null +++ b/src/math/cmplx/phase.go @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// Phase returns the phase (also called the argument) of x. +// The returned value is in the range [-Pi, Pi]. +func Phase(x complex128) float64 { return math.Atan2(imag(x), real(x)) } diff --git a/src/math/cmplx/polar.go b/src/math/cmplx/polar.go new file mode 100644 index 000000000..9b192bc62 --- /dev/null +++ b/src/math/cmplx/polar.go @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +// Polar returns the absolute value r and phase θ of x, +// such that x = r * e**θi. +// The phase is in the range [-Pi, Pi]. +func Polar(x complex128) (r, θ float64) { + return Abs(x), Phase(x) +} diff --git a/src/math/cmplx/pow.go b/src/math/cmplx/pow.go new file mode 100644 index 000000000..1630b879b --- /dev/null +++ b/src/math/cmplx/pow.go @@ -0,0 +1,78 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex power function +// +// DESCRIPTION: +// +// Raises complex A to the complex Zth power. +// Definition is per AMS55 # 4.2.8, +// analytically equivalent to cpow(a,z) = cexp(z clog(a)). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 9.4e-15 1.5e-15 + +// Pow returns x**y, the base-x exponential of y. +// For generalized compatibility with math.Pow: +// Pow(0, ±0) returns 1+0i +// Pow(0, c) for real(c)<0 returns Inf+0i if imag(c) is zero, otherwise Inf+Inf i. +func Pow(x, y complex128) complex128 { + if x == 0 { // Guaranteed also true for x == -0. + r, i := real(y), imag(y) + switch { + case r == 0: + return 1 + case r < 0: + if i == 0 { + return complex(math.Inf(1), 0) + } + return Inf() + case r > 0: + return 0 + } + panic("not reached") + } + modulus := Abs(x) + if modulus == 0 { + return complex(0, 0) + } + r := math.Pow(modulus, real(y)) + arg := Phase(x) + theta := real(y) * arg + if imag(y) != 0 { + r *= math.Exp(-imag(y) * arg) + theta += imag(y) * math.Log(modulus) + } + s, c := math.Sincos(theta) + return complex(r*c, r*s) +} diff --git a/src/math/cmplx/rect.go b/src/math/cmplx/rect.go new file mode 100644 index 000000000..bf94d787e --- /dev/null +++ b/src/math/cmplx/rect.go @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// Rect returns the complex number x with polar coordinates r, θ. +func Rect(r, θ float64) complex128 { + s, c := math.Sincos(θ) + return complex(r*c, r*s) +} diff --git a/src/math/cmplx/sin.go b/src/math/cmplx/sin.go new file mode 100644 index 000000000..2c57536ed --- /dev/null +++ b/src/math/cmplx/sin.go @@ -0,0 +1,132 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex circular sine +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// w = sin x cosh y + i cos x sinh y. +// +// csin(z) = -i csinh(iz). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 8400 5.3e-17 1.3e-17 +// IEEE -10,+10 30000 3.8e-16 1.0e-16 +// Also tested by csin(casin(z)) = z. + +// Sin returns the sine of x. +func Sin(x complex128) complex128 { + s, c := math.Sincos(real(x)) + sh, ch := sinhcosh(imag(x)) + return complex(s*ch, c*sh) +} + +// Complex hyperbolic sine +// +// DESCRIPTION: +// +// csinh z = (cexp(z) - cexp(-z))/2 +// = sinh x * cos y + i cosh x * sin y . +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 3.1e-16 8.2e-17 + +// Sinh returns the hyperbolic sine of x. +func Sinh(x complex128) complex128 { + s, c := math.Sincos(imag(x)) + sh, ch := sinhcosh(real(x)) + return complex(c*sh, s*ch) +} + +// Complex circular cosine +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// w = cos x cosh y - i sin x sinh y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 8400 4.5e-17 1.3e-17 +// IEEE -10,+10 30000 3.8e-16 1.0e-16 + +// Cos returns the cosine of x. +func Cos(x complex128) complex128 { + s, c := math.Sincos(real(x)) + sh, ch := sinhcosh(imag(x)) + return complex(c*ch, -s*sh) +} + +// Complex hyperbolic cosine +// +// DESCRIPTION: +// +// ccosh(z) = cosh x cos y + i sinh x sin y . +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 2.9e-16 8.1e-17 + +// Cosh returns the hyperbolic cosine of x. +func Cosh(x complex128) complex128 { + s, c := math.Sincos(imag(x)) + sh, ch := sinhcosh(real(x)) + return complex(c*ch, s*sh) +} + +// calculate sinh and cosh +func sinhcosh(x float64) (sh, ch float64) { + if math.Abs(x) <= 0.5 { + return math.Sinh(x), math.Cosh(x) + } + e := math.Exp(x) + ei := 0.5 / e + e *= 0.5 + return e - ei, e + ei +} diff --git a/src/math/cmplx/sqrt.go b/src/math/cmplx/sqrt.go new file mode 100644 index 000000000..4ef6807ad --- /dev/null +++ b/src/math/cmplx/sqrt.go @@ -0,0 +1,104 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex square root +// +// DESCRIPTION: +// +// If z = x + iy, r = |z|, then +// +// 1/2 +// Re w = [ (r + x)/2 ] , +// +// 1/2 +// Im w = [ (r - x)/2 ] . +// +// Cancellation error in r-x or r+x is avoided by using the +// identity 2 Re w Im w = y. +// +// Note that -w is also a square root of z. The root chosen +// is always in the right half plane and Im w has the same sign as y. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 25000 3.2e-17 9.6e-18 +// IEEE -10,+10 1,000,000 2.9e-16 6.1e-17 + +// Sqrt returns the square root of x. +// The result r is chosen so that real(r) ≥ 0 and imag(r) has the same sign as imag(x). +func Sqrt(x complex128) complex128 { + if imag(x) == 0 { + if real(x) == 0 { + return complex(0, 0) + } + if real(x) < 0 { + return complex(0, math.Sqrt(-real(x))) + } + return complex(math.Sqrt(real(x)), 0) + } + if real(x) == 0 { + if imag(x) < 0 { + r := math.Sqrt(-0.5 * imag(x)) + return complex(r, -r) + } + r := math.Sqrt(0.5 * imag(x)) + return complex(r, r) + } + a := real(x) + b := imag(x) + var scale float64 + // Rescale to avoid internal overflow or underflow. + if math.Abs(a) > 4 || math.Abs(b) > 4 { + a *= 0.25 + b *= 0.25 + scale = 2 + } else { + a *= 1.8014398509481984e16 // 2**54 + b *= 1.8014398509481984e16 + scale = 7.450580596923828125e-9 // 2**-27 + } + r := math.Hypot(a, b) + var t float64 + if a > 0 { + t = math.Sqrt(0.5*r + 0.5*a) + r = scale * math.Abs((0.5*b)/t) + t *= scale + } else { + r = math.Sqrt(0.5*r - 0.5*a) + t = scale * math.Abs((0.5*b)/r) + r *= scale + } + if b < 0 { + return complex(t, -r) + } + return complex(t, r) +} diff --git a/src/math/cmplx/tan.go b/src/math/cmplx/tan.go new file mode 100644 index 000000000..9485315d8 --- /dev/null +++ b/src/math/cmplx/tan.go @@ -0,0 +1,184 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cmplx + +import "math" + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. +// The go code is a simplified version of the original C. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// Complex circular tangent +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// sin 2x + i sinh 2y +// w = --------------------. +// cos 2x + cosh 2y +// +// On the real axis the denominator is zero at odd multiples +// of PI/2. The denominator is evaluated by its Taylor +// series near these points. +// +// ctan(z) = -i ctanh(iz). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 5200 7.1e-17 1.6e-17 +// IEEE -10,+10 30000 7.2e-16 1.2e-16 +// Also tested by ctan * ccot = 1 and catan(ctan(z)) = z. + +// Tan returns the tangent of x. +func Tan(x complex128) complex128 { + d := math.Cos(2*real(x)) + math.Cosh(2*imag(x)) + if math.Abs(d) < 0.25 { + d = tanSeries(x) + } + if d == 0 { + return Inf() + } + return complex(math.Sin(2*real(x))/d, math.Sinh(2*imag(x))/d) +} + +// Complex hyperbolic tangent +// +// DESCRIPTION: +// +// tanh z = (sinh 2x + i sin 2y) / (cosh 2x + cos 2y) . +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -10,+10 30000 1.7e-14 2.4e-16 + +// Tanh returns the hyperbolic tangent of x. +func Tanh(x complex128) complex128 { + d := math.Cosh(2*real(x)) + math.Cos(2*imag(x)) + if d == 0 { + return Inf() + } + return complex(math.Sinh(2*real(x))/d, math.Sin(2*imag(x))/d) +} + +// Program to subtract nearest integer multiple of PI +func reducePi(x float64) float64 { + const ( + // extended precision value of PI: + DP1 = 3.14159265160560607910E0 // ?? 0x400921fb54000000 + DP2 = 1.98418714791870343106E-9 // ?? 0x3e210b4610000000 + DP3 = 1.14423774522196636802E-17 // ?? 0x3c6a62633145c06e + ) + t := x / math.Pi + if t >= 0 { + t += 0.5 + } else { + t -= 0.5 + } + t = float64(int64(t)) // int64(t) = the multiple + return ((x - t*DP1) - t*DP2) - t*DP3 +} + +// Taylor series expansion for cosh(2y) - cos(2x) +func tanSeries(z complex128) float64 { + const MACHEP = 1.0 / (1 << 53) + x := math.Abs(2 * real(z)) + y := math.Abs(2 * imag(z)) + x = reducePi(x) + x = x * x + y = y * y + x2 := 1.0 + y2 := 1.0 + f := 1.0 + rn := 0.0 + d := 0.0 + for { + rn += 1 + f *= rn + rn += 1 + f *= rn + x2 *= x + y2 *= y + t := y2 + x2 + t /= f + d += t + + rn += 1 + f *= rn + rn += 1 + f *= rn + x2 *= x + y2 *= y + t = y2 - x2 + t /= f + d += t + if math.Abs(t/d) <= MACHEP { + break + } + } + return d +} + +// Complex circular cotangent +// +// DESCRIPTION: +// +// If +// z = x + iy, +// +// then +// +// sin 2x - i sinh 2y +// w = --------------------. +// cosh 2y - cos 2x +// +// On the real axis, the denominator has zeros at even +// multiples of PI/2. Near these points it is evaluated +// by a Taylor series. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -10,+10 3000 6.5e-17 1.6e-17 +// IEEE -10,+10 30000 9.2e-16 1.2e-16 +// Also tested by ctan * ccot = 1 + i0. + +// Cot returns the cotangent of x. +func Cot(x complex128) complex128 { + d := math.Cosh(2*imag(x)) - math.Cos(2*real(x)) + if math.Abs(d) < 0.25 { + d = tanSeries(x) + } + if d == 0 { + return Inf() + } + return complex(math.Sin(2*real(x))/d, -math.Sinh(2*imag(x))/d) +} diff --git a/src/math/const.go b/src/math/const.go new file mode 100644 index 000000000..f1247c383 --- /dev/null +++ b/src/math/const.go @@ -0,0 +1,51 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package math provides basic constants and mathematical functions. +package math + +// Mathematical constants. +// Reference: http://oeis.org/Axxxxxx +const ( + E = 2.71828182845904523536028747135266249775724709369995957496696763 // A001113 + Pi = 3.14159265358979323846264338327950288419716939937510582097494459 // A000796 + Phi = 1.61803398874989484820458683436563811772030917980576286213544862 // A001622 + + Sqrt2 = 1.41421356237309504880168872420969807856967187537694807317667974 // A002193 + SqrtE = 1.64872127070012814684865078781416357165377610071014801157507931 // A019774 + SqrtPi = 1.77245385090551602729816748334114518279754945612238712821380779 // A002161 + SqrtPhi = 1.27201964951406896425242246173749149171560804184009624861664038 // A139339 + + Ln2 = 0.693147180559945309417232121458176568075500134360255254120680009 // A002162 + Log2E = 1 / Ln2 + Ln10 = 2.30258509299404568401799145468436420760110148862877297603332790 // A002392 + Log10E = 1 / Ln10 +) + +// Floating-point limit values. +// Max is the largest finite value representable by the type. +// SmallestNonzero is the smallest positive, non-zero value representable by the type. +const ( + MaxFloat32 = 3.40282346638528859811704183484516925440e+38 // 2**127 * (2**24 - 1) / 2**23 + SmallestNonzeroFloat32 = 1.401298464324817070923729583289916131280e-45 // 1 / 2**(127 - 1 + 23) + + MaxFloat64 = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52 + SmallestNonzeroFloat64 = 4.940656458412465441765687928682213723651e-324 // 1 / 2**(1023 - 1 + 52) +) + +// Integer limit values. +const ( + MaxInt8 = 1<<7 - 1 + MinInt8 = -1 << 7 + MaxInt16 = 1<<15 - 1 + MinInt16 = -1 << 15 + MaxInt32 = 1<<31 - 1 + MinInt32 = -1 << 31 + MaxInt64 = 1<<63 - 1 + MinInt64 = -1 << 63 + MaxUint8 = 1<<8 - 1 + MaxUint16 = 1<<16 - 1 + MaxUint32 = 1<<32 - 1 + MaxUint64 = 1<<64 - 1 +) diff --git a/src/math/copysign.go b/src/math/copysign.go new file mode 100644 index 000000000..719c64b9e --- /dev/null +++ b/src/math/copysign.go @@ -0,0 +1,12 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Copysign returns a value with the magnitude +// of x and the sign of y. +func Copysign(x, y float64) float64 { + const sign = 1 << 63 + return Float64frombits(Float64bits(x)&^sign | Float64bits(y)&sign) +} diff --git a/src/math/dim.go b/src/math/dim.go new file mode 100644 index 000000000..1c634d415 --- /dev/null +++ b/src/math/dim.go @@ -0,0 +1,72 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Dim returns the maximum of x-y or 0. +// +// Special cases are: +// Dim(+Inf, +Inf) = NaN +// Dim(-Inf, -Inf) = NaN +// Dim(x, NaN) = Dim(NaN, x) = NaN +func Dim(x, y float64) float64 + +func dim(x, y float64) float64 { + return max(x-y, 0) +} + +// Max returns the larger of x or y. +// +// Special cases are: +// Max(x, +Inf) = Max(+Inf, x) = +Inf +// Max(x, NaN) = Max(NaN, x) = NaN +// Max(+0, ±0) = Max(±0, +0) = +0 +// Max(-0, -0) = -0 +func Max(x, y float64) float64 + +func max(x, y float64) float64 { + // special cases + switch { + case IsInf(x, 1) || IsInf(y, 1): + return Inf(1) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0 && x == y: + if Signbit(x) { + return y + } + return x + } + if x > y { + return x + } + return y +} + +// Min returns the smaller of x or y. +// +// Special cases are: +// Min(x, -Inf) = Min(-Inf, x) = -Inf +// Min(x, NaN) = Min(NaN, x) = NaN +// Min(-0, ±0) = Min(±0, -0) = -0 +func Min(x, y float64) float64 + +func min(x, y float64) float64 { + // special cases + switch { + case IsInf(x, -1) || IsInf(y, -1): + return Inf(-1) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0 && x == y: + if Signbit(x) { + return x + } + return y + } + if x < y { + return x + } + return y +} diff --git a/src/math/dim_386.s b/src/math/dim_386.s new file mode 100644 index 000000000..c8194fed8 --- /dev/null +++ b/src/math/dim_386.s @@ -0,0 +1,14 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Dim(SB),NOSPLIT,$0 + JMP ·dim(SB) + +TEXT ·Max(SB),NOSPLIT,$0 + JMP ·max(SB) + +TEXT ·Min(SB),NOSPLIT,$0 + JMP ·min(SB) diff --git a/src/math/dim_amd64.s b/src/math/dim_amd64.s new file mode 100644 index 000000000..622cc3fba --- /dev/null +++ b/src/math/dim_amd64.s @@ -0,0 +1,144 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 + +// func Dim(x, y float64) float64 +TEXT ·Dim(SB),NOSPLIT,$0 + // (+Inf, +Inf) special case + MOVQ x+0(FP), BX + MOVQ y+8(FP), CX + MOVQ $PosInf, AX + CMPQ AX, BX + JNE dim2 + CMPQ AX, CX + JEQ bothInf +dim2: // (-Inf, -Inf) special case + MOVQ $NegInf, AX + CMPQ AX, BX + JNE dim3 + CMPQ AX, CX + JEQ bothInf +dim3: // (NaN, x) or (x, NaN) + MOVQ $~(1<<63), DX + MOVQ $NaN, AX + ANDQ DX, BX // x = |x| + CMPQ AX, BX + JLE isDimNaN + ANDQ DX, CX // y = |y| + CMPQ AX, CX + JLE isDimNaN + + MOVSD x+0(FP), X0 + SUBSD y+8(FP), X0 + MOVSD $(0.0), X1 + MAXSD X1, X0 + MOVSD X0, ret+16(FP) + RET +bothInf: // Dim(-Inf, -Inf) or Dim(+Inf, +Inf) + MOVQ $NaN, AX +isDimNaN: + MOVQ AX, ret+16(FP) + RET + +// func ·Max(x, y float64) float64 +TEXT ·Max(SB),NOSPLIT,$0 + // +Inf special cases + MOVQ $PosInf, AX + MOVQ x+0(FP), R8 + CMPQ AX, R8 + JEQ isPosInf + MOVQ y+8(FP), R9 + CMPQ AX, R9 + JEQ isPosInf + // NaN special cases + MOVQ $~(1<<63), DX // bit mask + MOVQ $NaN, AX + MOVQ R8, BX + ANDQ DX, BX // x = |x| + CMPQ AX, BX + JLE isMaxNaN + MOVQ R9, CX + ANDQ DX, CX // y = |y| + CMPQ AX, CX + JLE isMaxNaN + // ±0 special cases + ORQ CX, BX + JEQ isMaxZero + + MOVQ R8, X0 + MOVQ R9, X1 + MAXSD X1, X0 + MOVSD X0, ret+16(FP) + RET +isMaxNaN: // return NaN +isPosInf: // return +Inf + MOVQ AX, ret+16(FP) + RET +isMaxZero: + MOVQ $(1<<63), AX // -0.0 + CMPQ AX, R8 + JEQ +3(PC) + MOVQ R8, ret+16(FP) // return 0 + RET + MOVQ R9, ret+16(FP) // return other 0 + RET + +/* + MOVQ $0, AX + CMPQ AX, R8 + JNE +3(PC) + MOVQ R8, ret+16(FP) // return 0 + RET + MOVQ R9, ret+16(FP) // return other 0 + RET +*/ + +// func Min(x, y float64) float64 +TEXT ·Min(SB),NOSPLIT,$0 + // -Inf special cases + MOVQ $NegInf, AX + MOVQ x+0(FP), R8 + CMPQ AX, R8 + JEQ isNegInf + MOVQ y+8(FP), R9 + CMPQ AX, R9 + JEQ isNegInf + // NaN special cases + MOVQ $~(1<<63), DX + MOVQ $NaN, AX + MOVQ R8, BX + ANDQ DX, BX // x = |x| + CMPQ AX, BX + JLE isMinNaN + MOVQ R9, CX + ANDQ DX, CX // y = |y| + CMPQ AX, CX + JLE isMinNaN + // ±0 special cases + ORQ CX, BX + JEQ isMinZero + + MOVQ R8, X0 + MOVQ R9, X1 + MINSD X1, X0 + MOVSD X0, ret+16(FP) + RET +isMinNaN: // return NaN +isNegInf: // return -Inf + MOVQ AX, ret+16(FP) + RET +isMinZero: + MOVQ $(1<<63), AX // -0.0 + CMPQ AX, R8 + JEQ +3(PC) + MOVQ R9, ret+16(FP) // return other 0 + RET + MOVQ R8, ret+16(FP) // return -0 + RET + diff --git a/src/math/dim_amd64p32.s b/src/math/dim_amd64p32.s new file mode 100644 index 000000000..e5e34479d --- /dev/null +++ b/src/math/dim_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "dim_amd64.s" diff --git a/src/math/dim_arm.s b/src/math/dim_arm.s new file mode 100644 index 000000000..be6695068 --- /dev/null +++ b/src/math/dim_arm.s @@ -0,0 +1,14 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Dim(SB),NOSPLIT,$0 + B ·dim(SB) + +TEXT ·Min(SB),NOSPLIT,$0 + B ·min(SB) + +TEXT ·Max(SB),NOSPLIT,$0 + B ·max(SB) diff --git a/src/math/erf.go b/src/math/erf.go new file mode 100644 index 000000000..4cd80f80c --- /dev/null +++ b/src/math/erf.go @@ -0,0 +1,335 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point error function and complementary error function. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/s_erf.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// double erf(double x) +// double erfc(double x) +// x +// 2 |\ +// erf(x) = --------- | exp(-t*t)dt +// sqrt(pi) \| +// 0 +// +// erfc(x) = 1-erf(x) +// Note that +// erf(-x) = -erf(x) +// erfc(-x) = 2 - erfc(x) +// +// Method: +// 1. For |x| in [0, 0.84375] +// erf(x) = x + x*R(x**2) +// erfc(x) = 1 - erf(x) if x in [-.84375,0.25] +// = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] +// where R = P/Q where P is an odd poly of degree 8 and +// Q is an odd poly of degree 10. +// -57.90 +// | R - (erf(x)-x)/x | <= 2 +// +// +// Remark. The formula is derived by noting +// erf(x) = (2/sqrt(pi))*(x - x**3/3 + x**5/10 - x**7/42 + ....) +// and that +// 2/sqrt(pi) = 1.128379167095512573896158903121545171688 +// is close to one. The interval is chosen because the fix +// point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is +// near 0.6174), and by some experiment, 0.84375 is chosen to +// guarantee the error is less than one ulp for erf. +// +// 2. For |x| in [0.84375,1.25], let s = |x| - 1, and +// c = 0.84506291151 rounded to single (24 bits) +// erf(x) = sign(x) * (c + P1(s)/Q1(s)) +// erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 +// 1+(c+P1(s)/Q1(s)) if x < 0 +// |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 +// Remark: here we use the taylor series expansion at x=1. +// erf(1+s) = erf(1) + s*Poly(s) +// = 0.845.. + P1(s)/Q1(s) +// That is, we use rational approximation to approximate +// erf(1+s) - (c = (single)0.84506291151) +// Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] +// where +// P1(s) = degree 6 poly in s +// Q1(s) = degree 6 poly in s +// +// 3. For x in [1.25,1/0.35(~2.857143)], +// erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) +// erf(x) = 1 - erfc(x) +// where +// R1(z) = degree 7 poly in z, (z=1/x**2) +// S1(z) = degree 8 poly in z +// +// 4. For x in [1/0.35,28] +// erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 +// = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6<x<0 +// = 2.0 - tiny (if x <= -6) +// erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6, else +// erf(x) = sign(x)*(1.0 - tiny) +// where +// R2(z) = degree 6 poly in z, (z=1/x**2) +// S2(z) = degree 7 poly in z +// +// Note1: +// To compute exp(-x*x-0.5625+R/S), let s be a single +// precision number and s := x; then +// -x*x = -s*s + (s-x)*(s+x) +// exp(-x*x-0.5626+R/S) = +// exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S); +// Note2: +// Here 4 and 5 make use of the asymptotic series +// exp(-x*x) +// erfc(x) ~ ---------- * ( 1 + Poly(1/x**2) ) +// x*sqrt(pi) +// We use rational approximation to approximate +// g(s)=f(1/x**2) = log(erfc(x)*x) - x*x + 0.5625 +// Here is the error bound for R1/S1 and R2/S2 +// |R1/S1 - f(x)| < 2**(-62.57) +// |R2/S2 - f(x)| < 2**(-61.52) +// +// 5. For inf > x >= 28 +// erf(x) = sign(x) *(1 - tiny) (raise inexact) +// erfc(x) = tiny*tiny (raise underflow) if x > 0 +// = 2 - tiny if x<0 +// +// 7. Special case: +// erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, +// erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, +// erfc/erf(NaN) is NaN + +const ( + erx = 8.45062911510467529297e-01 // 0x3FEB0AC160000000 + // Coefficients for approximation to erf in [0, 0.84375] + efx = 1.28379167095512586316e-01 // 0x3FC06EBA8214DB69 + efx8 = 1.02703333676410069053e+00 // 0x3FF06EBA8214DB69 + pp0 = 1.28379167095512558561e-01 // 0x3FC06EBA8214DB68 + pp1 = -3.25042107247001499370e-01 // 0xBFD4CD7D691CB913 + pp2 = -2.84817495755985104766e-02 // 0xBF9D2A51DBD7194F + pp3 = -5.77027029648944159157e-03 // 0xBF77A291236668E4 + pp4 = -2.37630166566501626084e-05 // 0xBEF8EAD6120016AC + qq1 = 3.97917223959155352819e-01 // 0x3FD97779CDDADC09 + qq2 = 6.50222499887672944485e-02 // 0x3FB0A54C5536CEBA + qq3 = 5.08130628187576562776e-03 // 0x3F74D022C4D36B0F + qq4 = 1.32494738004321644526e-04 // 0x3F215DC9221C1A10 + qq5 = -3.96022827877536812320e-06 // 0xBED09C4342A26120 + // Coefficients for approximation to erf in [0.84375, 1.25] + pa0 = -2.36211856075265944077e-03 // 0xBF6359B8BEF77538 + pa1 = 4.14856118683748331666e-01 // 0x3FDA8D00AD92B34D + pa2 = -3.72207876035701323847e-01 // 0xBFD7D240FBB8C3F1 + pa3 = 3.18346619901161753674e-01 // 0x3FD45FCA805120E4 + pa4 = -1.10894694282396677476e-01 // 0xBFBC63983D3E28EC + pa5 = 3.54783043256182359371e-02 // 0x3FA22A36599795EB + pa6 = -2.16637559486879084300e-03 // 0xBF61BF380A96073F + qa1 = 1.06420880400844228286e-01 // 0x3FBB3E6618EEE323 + qa2 = 5.40397917702171048937e-01 // 0x3FE14AF092EB6F33 + qa3 = 7.18286544141962662868e-02 // 0x3FB2635CD99FE9A7 + qa4 = 1.26171219808761642112e-01 // 0x3FC02660E763351F + qa5 = 1.36370839120290507362e-02 // 0x3F8BEDC26B51DD1C + qa6 = 1.19844998467991074170e-02 // 0x3F888B545735151D + // Coefficients for approximation to erfc in [1.25, 1/0.35] + ra0 = -9.86494403484714822705e-03 // 0xBF843412600D6435 + ra1 = -6.93858572707181764372e-01 // 0xBFE63416E4BA7360 + ra2 = -1.05586262253232909814e+01 // 0xC0251E0441B0E726 + ra3 = -6.23753324503260060396e+01 // 0xC04F300AE4CBA38D + ra4 = -1.62396669462573470355e+02 // 0xC0644CB184282266 + ra5 = -1.84605092906711035994e+02 // 0xC067135CEBCCABB2 + ra6 = -8.12874355063065934246e+01 // 0xC054526557E4D2F2 + ra7 = -9.81432934416914548592e+00 // 0xC023A0EFC69AC25C + sa1 = 1.96512716674392571292e+01 // 0x4033A6B9BD707687 + sa2 = 1.37657754143519042600e+02 // 0x4061350C526AE721 + sa3 = 4.34565877475229228821e+02 // 0x407B290DD58A1A71 + sa4 = 6.45387271733267880336e+02 // 0x40842B1921EC2868 + sa5 = 4.29008140027567833386e+02 // 0x407AD02157700314 + sa6 = 1.08635005541779435134e+02 // 0x405B28A3EE48AE2C + sa7 = 6.57024977031928170135e+00 // 0x401A47EF8E484A93 + sa8 = -6.04244152148580987438e-02 // 0xBFAEEFF2EE749A62 + // Coefficients for approximation to erfc in [1/.35, 28] + rb0 = -9.86494292470009928597e-03 // 0xBF84341239E86F4A + rb1 = -7.99283237680523006574e-01 // 0xBFE993BA70C285DE + rb2 = -1.77579549177547519889e+01 // 0xC031C209555F995A + rb3 = -1.60636384855821916062e+02 // 0xC064145D43C5ED98 + rb4 = -6.37566443368389627722e+02 // 0xC083EC881375F228 + rb5 = -1.02509513161107724954e+03 // 0xC09004616A2E5992 + rb6 = -4.83519191608651397019e+02 // 0xC07E384E9BDC383F + sb1 = 3.03380607434824582924e+01 // 0x403E568B261D5190 + sb2 = 3.25792512996573918826e+02 // 0x40745CAE221B9F0A + sb3 = 1.53672958608443695994e+03 // 0x409802EB189D5118 + sb4 = 3.19985821950859553908e+03 // 0x40A8FFB7688C246A + sb5 = 2.55305040643316442583e+03 // 0x40A3F219CEDF3BE6 + sb6 = 4.74528541206955367215e+02 // 0x407DA874E79FE763 + sb7 = -2.24409524465858183362e+01 // 0xC03670E242712D62 +) + +// Erf returns the error function of x. +// +// Special cases are: +// Erf(+Inf) = 1 +// Erf(-Inf) = -1 +// Erf(NaN) = NaN +func Erf(x float64) float64 { + const ( + VeryTiny = 2.848094538889218e-306 // 0x0080000000000000 + Small = 1.0 / (1 << 28) // 2**-28 + ) + // special cases + switch { + case IsNaN(x): + return NaN() + case IsInf(x, 1): + return 1 + case IsInf(x, -1): + return -1 + } + sign := false + if x < 0 { + x = -x + sign = true + } + if x < 0.84375 { // |x| < 0.84375 + var temp float64 + if x < Small { // |x| < 2**-28 + if x < VeryTiny { + temp = 0.125 * (8.0*x + efx8*x) // avoid underflow + } else { + temp = x + efx*x + } + } else { + z := x * x + r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4))) + s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + y := r / s + temp = x + x*y + } + if sign { + return -temp + } + return temp + } + if x < 1.25 { // 0.84375 <= |x| < 1.25 + s := x - 1 + P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))) + Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))) + if sign { + return -erx - P/Q + } + return erx + P/Q + } + if x >= 6 { // inf > |x| >= 6 + if sign { + return -1 + } + return 1 + } + s := 1 / (x * x) + var R, S float64 + if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143 + R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))) + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + } else { // |x| >= 1 / 0.35 ~ 2.857143 + R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))) + S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))) + } + z := Float64frombits(Float64bits(x) & 0xffffffff00000000) // pseudo-single (20-bit) precision x + r := Exp(-z*z-0.5625) * Exp((z-x)*(z+x)+R/S) + if sign { + return r/x - 1 + } + return 1 - r/x +} + +// Erfc returns the complementary error function of x. +// +// Special cases are: +// Erfc(+Inf) = 0 +// Erfc(-Inf) = 2 +// Erfc(NaN) = NaN +func Erfc(x float64) float64 { + const Tiny = 1.0 / (1 << 56) // 2**-56 + // special cases + switch { + case IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + case IsInf(x, -1): + return 2 + } + sign := false + if x < 0 { + x = -x + sign = true + } + if x < 0.84375 { // |x| < 0.84375 + var temp float64 + if x < Tiny { // |x| < 2**-56 + temp = x + } else { + z := x * x + r := pp0 + z*(pp1+z*(pp2+z*(pp3+z*pp4))) + s := 1 + z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5)))) + y := r / s + if x < 0.25 { // |x| < 1/4 + temp = x + x*y + } else { + temp = 0.5 + (x*y + (x - 0.5)) + } + } + if sign { + return 1 + temp + } + return 1 - temp + } + if x < 1.25 { // 0.84375 <= |x| < 1.25 + s := x - 1 + P := pa0 + s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6))))) + Q := 1 + s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6))))) + if sign { + return 1 + erx + P/Q + } + return 1 - erx - P/Q + + } + if x < 28 { // |x| < 28 + s := 1 / (x * x) + var R, S float64 + if x < 1/0.35 { // |x| < 1 / 0.35 ~ 2.857143 + R = ra0 + s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*ra7)))))) + S = 1 + s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+s*sa8))))))) + } else { // |x| >= 1 / 0.35 ~ 2.857143 + if sign && x > 6 { + return 2 // x < -6 + } + R = rb0 + s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*rb6))))) + S = 1 + s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7)))))) + } + z := Float64frombits(Float64bits(x) & 0xffffffff00000000) // pseudo-single (20-bit) precision x + r := Exp(-z*z-0.5625) * Exp((z-x)*(z+x)+R/S) + if sign { + return 2 - r/x + } + return r / x + } + if sign { + return 2 + } + return 0 +} diff --git a/src/math/exp.go b/src/math/exp.go new file mode 100644 index 000000000..f31585fa7 --- /dev/null +++ b/src/math/exp.go @@ -0,0 +1,191 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Exp returns e**x, the base-e exponential of x. +// +// Special cases are: +// Exp(+Inf) = +Inf +// Exp(NaN) = NaN +// Very large values overflow to 0 or +Inf. +// Very small values underflow to 1. +func Exp(x float64) float64 + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_exp.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. +// +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// exp(x) +// Returns the exponential of x. +// +// Method +// 1. Argument reduction: +// Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. +// Given x, find r and integer k such that +// +// x = k*ln2 + r, |r| <= 0.5*ln2. +// +// Here r will be represented as r = hi-lo for better +// accuracy. +// +// 2. Approximation of exp(r) by a special rational function on +// the interval [0,0.34658]: +// Write +// R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... +// We use a special Remes algorithm on [0,0.34658] to generate +// a polynomial of degree 5 to approximate R. The maximum error +// of this polynomial approximation is bounded by 2**-59. In +// other words, +// R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 +// (where z=r*r, and the values of P1 to P5 are listed below) +// and +// | 5 | -59 +// | 2.0+P1*z+...+P5*z - R(z) | <= 2 +// | | +// The computation of exp(r) thus becomes +// 2*r +// exp(r) = 1 + ------- +// R - r +// r*R1(r) +// = 1 + r + ----------- (for better accuracy) +// 2 - R1(r) +// where +// 2 4 10 +// R1(r) = r - (P1*r + P2*r + ... + P5*r ). +// +// 3. Scale back to obtain exp(x): +// From step 1, we have +// exp(x) = 2**k * exp(r) +// +// Special cases: +// exp(INF) is INF, exp(NaN) is NaN; +// exp(-INF) is 0, and +// for finite argument, only exp(0)=1 is exact. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Misc. info. +// For IEEE double +// if x > 7.09782712893383973096e+02 then exp(x) overflow +// if x < -7.45133219101941108420e+02 then exp(x) underflow +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. + +func exp(x float64) float64 { + const ( + Ln2Hi = 6.93147180369123816490e-01 + Ln2Lo = 1.90821492927058770002e-10 + Log2e = 1.44269504088896338700e+00 + + Overflow = 7.09782712893383973096e+02 + Underflow = -7.45133219101941108420e+02 + NearZero = 1.0 / (1 << 28) // 2**-28 + ) + + // special cases + switch { + case IsNaN(x) || IsInf(x, 1): + return x + case IsInf(x, -1): + return 0 + case x > Overflow: + return Inf(1) + case x < Underflow: + return 0 + case -NearZero < x && x < NearZero: + return 1 + x + } + + // reduce; computed as r = hi - lo for extra precision. + var k int + switch { + case x < 0: + k = int(Log2e*x - 0.5) + case x > 0: + k = int(Log2e*x + 0.5) + } + hi := x - float64(k)*Ln2Hi + lo := float64(k) * Ln2Lo + + // compute + return expmulti(hi, lo, k) +} + +// Exp2 returns 2**x, the base-2 exponential of x. +// +// Special cases are the same as Exp. +func Exp2(x float64) float64 + +func exp2(x float64) float64 { + const ( + Ln2Hi = 6.93147180369123816490e-01 + Ln2Lo = 1.90821492927058770002e-10 + + Overflow = 1.0239999999999999e+03 + Underflow = -1.0740e+03 + ) + + // special cases + switch { + case IsNaN(x) || IsInf(x, 1): + return x + case IsInf(x, -1): + return 0 + case x > Overflow: + return Inf(1) + case x < Underflow: + return 0 + } + + // argument reduction; x = r×lg(e) + k with |r| ≤ ln(2)/2. + // computed as r = hi - lo for extra precision. + var k int + switch { + case x > 0: + k = int(x + 0.5) + case x < 0: + k = int(x - 0.5) + } + t := x - float64(k) + hi := t * Ln2Hi + lo := -t * Ln2Lo + + // compute + return expmulti(hi, lo, k) +} + +// exp1 returns e**r × 2**k where r = hi - lo and |r| ≤ ln(2)/2. +func expmulti(hi, lo float64, k int) float64 { + const ( + P1 = 1.66666666666666019037e-01 /* 0x3FC55555; 0x5555553E */ + P2 = -2.77777777770155933842e-03 /* 0xBF66C16C; 0x16BEBD93 */ + P3 = 6.61375632143793436117e-05 /* 0x3F11566A; 0xAF25DE2C */ + P4 = -1.65339022054652515390e-06 /* 0xBEBBBD41; 0xC5D26BF1 */ + P5 = 4.13813679705723846039e-08 /* 0x3E663769; 0x72BEA4D0 */ + ) + + r := hi - lo + t := r * r + c := r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) + y := 1 - ((lo - (r*c)/(2-c)) - hi) + // TODO(rsc): make sure Ldexp can handle boundary k + return Ldexp(y, k) +} diff --git a/src/math/exp2_386.s b/src/math/exp2_386.s new file mode 100644 index 000000000..7d11920c2 --- /dev/null +++ b/src/math/exp2_386.s @@ -0,0 +1,40 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Exp2(x float64) float64 +TEXT ·Exp2(SB),NOSPLIT,$0 +// test bits for not-finite + MOVL x_hi+4(FP), AX + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + FMOVD x+0(FP), F0 // F0=x + FMOVD F0, F1 // F0=x, F1=x + FRNDINT // F0=int(x), F1=x + FSUBD F0, F1 // F0=int(x), F1=x-int(x) + FXCHD F0, F1 // F0=x-int(x), F1=int(x) + F2XM1 // F0=2**(x-int(x))-1, F1=int(x) + FLD1 // F0=1, F1=2**(x-int(x))-1, F2=int(x) + FADDDP F0, F1 // F0=2**(x-int(x)), F1=int(x) + FSCALE // F0=2**x, F1=int(x) + FMOVDP F0, F1 // F0=2**x + FMOVDP F0, ret+8(FP) + RET +not_finite: +// test bits for -Inf + MOVL x_hi+4(FP), BX + MOVL x_lo+0(FP), CX + CMPL BX, $0xfff00000 + JNE not_neginf + CMPL CX, $0 + JNE not_neginf + MOVL $0, ret_lo+8(FP) + MOVL $0, ret_hi+12(FP) + RET +not_neginf: + MOVL CX, ret_lo+8(FP) + MOVL BX, ret_hi+12(FP) + RET diff --git a/src/math/exp2_amd64.s b/src/math/exp2_amd64.s new file mode 100644 index 000000000..903c83589 --- /dev/null +++ b/src/math/exp2_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Exp2(SB),NOSPLIT,$0 + JMP ·exp2(SB) diff --git a/src/math/exp2_amd64p32.s b/src/math/exp2_amd64p32.s new file mode 100644 index 000000000..4d3830914 --- /dev/null +++ b/src/math/exp2_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "exp2_amd64.s" diff --git a/src/math/exp2_arm.s b/src/math/exp2_arm.s new file mode 100644 index 000000000..58283cd08 --- /dev/null +++ b/src/math/exp2_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Exp2(SB),NOSPLIT,$0 + B ·exp2(SB) diff --git a/src/math/exp_386.s b/src/math/exp_386.s new file mode 100644 index 000000000..6a478a5e6 --- /dev/null +++ b/src/math/exp_386.s @@ -0,0 +1,41 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Exp(x float64) float64 +TEXT ·Exp(SB),NOSPLIT,$0 +// test bits for not-finite + MOVL x_hi+4(FP), AX + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + FLDL2E // F0=log2(e) + FMULD x+0(FP), F0 // F0=x*log2(e) + FMOVD F0, F1 // F0=x*log2(e), F1=x*log2(e) + FRNDINT // F0=int(x*log2(e)), F1=x*log2(e) + FSUBD F0, F1 // F0=int(x*log2(e)), F1=x*log2(e)-int(x*log2(e)) + FXCHD F0, F1 // F0=x*log2(e)-int(x*log2(e)), F1=int(x*log2(e)) + F2XM1 // F0=2**(x*log2(e)-int(x*log2(e)))-1, F1=int(x*log2(e)) + FLD1 // F0=1, F1=2**(x*log2(e)-int(x*log2(e)))-1, F2=int(x*log2(e)) + FADDDP F0, F1 // F0=2**(x*log2(e)-int(x*log2(e))), F1=int(x*log2(e)) + FSCALE // F0=e**x, F1=int(x*log2(e)) + FMOVDP F0, F1 // F0=e**x + FMOVDP F0, ret+8(FP) + RET +not_finite: +// test bits for -Inf + MOVL x_hi+4(FP), BX + MOVL x_lo+0(FP), CX + CMPL BX, $0xfff00000 + JNE not_neginf + CMPL CX, $0 + JNE not_neginf + FLDZ // F0=0 + FMOVDP F0, ret+8(FP) + RET +not_neginf: + MOVL CX, ret_lo+8(FP) + MOVL BX, ret_hi+12(FP) + RET diff --git a/src/math/exp_amd64.s b/src/math/exp_amd64.s new file mode 100644 index 000000000..d9cf8fd86 --- /dev/null +++ b/src/math/exp_amd64.s @@ -0,0 +1,114 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// The method is based on a paper by Naoki Shibata: "Efficient evaluation +// methods of elementary functions suitable for SIMD computation", Proc. +// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 +// (May 2010). The paper is available at +// http://www.springerlink.com/content/340228x165742104/ +// +// The original code and the constants below are from the author's +// implementation available at http://freshmeat.net/projects/sleef. +// The README file says, "The software is in public domain. +// You can use the software without any obligation." +// +// This code is a simplified version of the original. + +#define LN2 0.6931471805599453094172321214581766 // log_e(2) +#define LOG2E 1.4426950408889634073599246810018920 // 1/LN2 +#define LN2U 0.69314718055966295651160180568695068359375 // upper half LN2 +#define LN2L 0.28235290563031577122588448175013436025525412068e-12 // lower half LN2 +#define T0 1.0 +#define T1 0.5 +#define T2 1.6666666666666666667e-1 +#define T3 4.1666666666666666667e-2 +#define T4 8.3333333333333333333e-3 +#define T5 1.3888888888888888889e-3 +#define T6 1.9841269841269841270e-4 +#define T7 2.4801587301587301587e-5 +#define PosInf 0x7FF0000000000000 +#define NegInf 0xFFF0000000000000 + +// func Exp(x float64) float64 +TEXT ·Exp(SB),NOSPLIT,$0 +// test bits for not-finite + MOVQ x+0(FP), BX + MOVQ $~(1<<63), AX // sign bit mask + MOVQ BX, DX + ANDQ AX, DX + MOVQ $PosInf, AX + CMPQ AX, DX + JLE notFinite + MOVQ BX, X0 + MOVSD $LOG2E, X1 + MULSD X0, X1 + CVTSD2SL X1, BX // BX = exponent + CVTSL2SD BX, X1 + MOVSD $LN2U, X2 + MULSD X1, X2 + SUBSD X2, X0 + MOVSD $LN2L, X2 + MULSD X1, X2 + SUBSD X2, X0 + // reduce argument + MULSD $0.0625, X0 + // Taylor series evaluation + MOVSD $T7, X1 + MULSD X0, X1 + ADDSD $T6, X1 + MULSD X0, X1 + ADDSD $T5, X1 + MULSD X0, X1 + ADDSD $T4, X1 + MULSD X0, X1 + ADDSD $T3, X1 + MULSD X0, X1 + ADDSD $T2, X1 + MULSD X0, X1 + ADDSD $T1, X1 + MULSD X0, X1 + ADDSD $T0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + MOVSD $2.0, X1 + ADDSD X0, X1 + MULSD X1, X0 + ADDSD $1.0, X0 + // return fr * 2**exponent + MOVL $0x3FF, AX // bias + ADDL AX, BX + JLE underflow + CMPL BX, $0x7FF + JGE overflow + MOVL $52, CX + SHLQ CX, BX + MOVQ BX, X1 + MULSD X1, X0 + MOVSD X0, ret+8(FP) + RET +notFinite: + // test bits for -Inf + MOVQ $NegInf, AX + CMPQ AX, BX + JNE notNegInf + // -Inf, return 0 +underflow: // return 0 + MOVQ $0, AX + MOVQ AX, ret+8(FP) + RET +overflow: // return +Inf + MOVQ $PosInf, BX +notNegInf: // NaN or +Inf, return x + MOVQ BX, ret+8(FP) + RET diff --git a/src/math/exp_amd64p32.s b/src/math/exp_amd64p32.s new file mode 100644 index 000000000..98ac2e91e --- /dev/null +++ b/src/math/exp_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "exp_amd64.s" diff --git a/src/math/exp_arm.s b/src/math/exp_arm.s new file mode 100644 index 000000000..ce36d03ca --- /dev/null +++ b/src/math/exp_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Exp(SB),NOSPLIT,$0 + B ·exp(SB) diff --git a/src/math/expm1.go b/src/math/expm1.go new file mode 100644 index 000000000..8f56e15cc --- /dev/null +++ b/src/math/expm1.go @@ -0,0 +1,237 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// expm1(x) +// Returns exp(x)-1, the exponential of x minus 1. +// +// Method +// 1. Argument reduction: +// Given x, find r and integer k such that +// +// x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 +// +// Here a correction term c will be computed to compensate +// the error in r when rounded to a floating-point number. +// +// 2. Approximating expm1(r) by a special rational function on +// the interval [0,0.34658]: +// Since +// r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ... +// we define R1(r*r) by +// r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r) +// That is, +// R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) +// = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) +// = 1 - r**2/60 + r**4/2520 - r**6/100800 + ... +// We use a special Reme algorithm on [0,0.347] to generate +// a polynomial of degree 5 in r*r to approximate R1. The +// maximum error of this polynomial approximation is bounded +// by 2**-61. In other words, +// R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 +// where Q1 = -1.6666666666666567384E-2, +// Q2 = 3.9682539681370365873E-4, +// Q3 = -9.9206344733435987357E-6, +// Q4 = 2.5051361420808517002E-7, +// Q5 = -6.2843505682382617102E-9; +// (where z=r*r, and the values of Q1 to Q5 are listed below) +// with error bounded by +// | 5 | -61 +// | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 +// | | +// +// expm1(r) = exp(r)-1 is then computed by the following +// specific way which minimize the accumulation rounding error: +// 2 3 +// r r [ 3 - (R1 + R1*r/2) ] +// expm1(r) = r + --- + --- * [--------------------] +// 2 2 [ 6 - r*(3 - R1*r/2) ] +// +// To compensate the error in the argument reduction, we use +// expm1(r+c) = expm1(r) + c + expm1(r)*c +// ~ expm1(r) + c + r*c +// Thus c+r*c will be added in as the correction terms for +// expm1(r+c). Now rearrange the term to avoid optimization +// screw up: +// ( 2 2 ) +// ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) +// expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) +// ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) +// ( ) +// +// = r - E +// 3. Scale back to obtain expm1(x): +// From step 1, we have +// expm1(x) = either 2**k*[expm1(r)+1] - 1 +// = or 2**k*[expm1(r) + (1-2**-k)] +// 4. Implementation notes: +// (A). To save one multiplication, we scale the coefficient Qi +// to Qi*2**i, and replace z by (x**2)/2. +// (B). To achieve maximum accuracy, we compute expm1(x) by +// (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) +// (ii) if k=0, return r-E +// (iii) if k=-1, return 0.5*(r-E)-0.5 +// (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) +// else return 1.0+2.0*(r-E); +// (v) if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1) +// (vi) if k <= 20, return 2**k((1-2**-k)-(E-r)), else +// (vii) return 2**k(1-((E+2**-k)-r)) +// +// Special cases: +// expm1(INF) is INF, expm1(NaN) is NaN; +// expm1(-INF) is -1, and +// for finite argument, only expm1(0)=0 is exact. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Misc. info. +// For IEEE double +// if x > 7.09782712893383973096e+02 then expm1(x) overflow +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// + +// Expm1 returns e**x - 1, the base-e exponential of x minus 1. +// It is more accurate than Exp(x) - 1 when x is near zero. +// +// Special cases are: +// Expm1(+Inf) = +Inf +// Expm1(-Inf) = -1 +// Expm1(NaN) = NaN +// Very large values overflow to -1 or +Inf. +func Expm1(x float64) float64 + +func expm1(x float64) float64 { + const ( + Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF + Ln2X56 = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1 + Ln2HalfX3 = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73 + Ln2Half = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef + Ln2Hi = 6.93147180369123816490e-01 // 0x3fe62e42fee00000 + Ln2Lo = 1.90821492927058770002e-10 // 0x3dea39ef35793c76 + InvLn2 = 1.44269504088896338700e+00 // 0x3ff71547652b82fe + Tiny = 1.0 / (1 << 54) // 2**-54 = 0x3c90000000000000 + // scaled coefficients related to expm1 + Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4 + Q2 = 1.58730158725481460165e-03 // 0x3F5A01A019FE5585 + Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7 + Q4 = 4.00821782732936239552e-06 // 0x3ED0CFCA86E65239 + Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D + ) + + // special cases + switch { + case IsInf(x, 1) || IsNaN(x): + return x + case IsInf(x, -1): + return -1 + } + + absx := x + sign := false + if x < 0 { + absx = -absx + sign = true + } + + // filter out huge argument + if absx >= Ln2X56 { // if |x| >= 56 * ln2 + if absx >= Othreshold { // if |x| >= 709.78... + return Inf(1) // overflow + } + if sign { + return -1 // x < -56*ln2, return -1.0 + } + } + + // argument reduction + var c float64 + var k int + if absx > Ln2Half { // if |x| > 0.5 * ln2 + var hi, lo float64 + if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2 + if !sign { + hi = x - Ln2Hi + lo = Ln2Lo + k = 1 + } else { + hi = x + Ln2Hi + lo = -Ln2Lo + k = -1 + } + } else { + if !sign { + k = int(InvLn2*x + 0.5) + } else { + k = int(InvLn2*x - 0.5) + } + t := float64(k) + hi = x - t*Ln2Hi // t * Ln2Hi is exact here + lo = t * Ln2Lo + } + x = hi - lo + c = (hi - x) - lo + } else if absx < Tiny { // when |x| < 2**-54, return x + return x + } else { + k = 0 + } + + // x is now in primary range + hfx := 0.5 * x + hxs := x * hfx + r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))) + t := 3 - r1*hfx + e := hxs * ((r1 - t) / (6.0 - x*t)) + if k != 0 { + e = (x*(e-c) - c) + e -= hxs + switch { + case k == -1: + return 0.5*(x-e) - 0.5 + case k == 1: + if x < -0.25 { + return -2 * (e - (x + 0.5)) + } + return 1 + 2*(x-e) + case k <= -2 || k > 56: // suffice to return exp(x)-1 + y := 1 - (e - x) + y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent + return y - 1 + } + if k < 20 { + t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k + y := t - (e - x) + y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent + return y + } + t := Float64frombits(uint64((0x3ff - k) << 52)) // 2**-k + y := x - (e + t) + y += 1 + y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent + return y + } + return x - (x*e - hxs) // c is 0 +} diff --git a/src/math/expm1_386.s b/src/math/expm1_386.s new file mode 100644 index 000000000..a48ca8a58 --- /dev/null +++ b/src/math/expm1_386.s @@ -0,0 +1,57 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Expm1(x float64) float64 +TEXT ·Expm1(SB),NOSPLIT,$0 + FLDLN2 // F0=log(2) = 1/log2(e) ~ 0.693147 + FMOVD x+0(FP), F0 // F0=x, F1=1/log2(e) + FABS // F0=|x|, F1=1/log2(e) + FUCOMPP F0, F1 // compare F0 to F1 + FSTSW AX + SAHF + JCC use_exp // jump if F0 >= F1 + FLDL2E // F0=log2(e) + FMULD x+0(FP), F0 // F0=x*log2(e) (-1<F0<1) + F2XM1 // F0=e**x-1 = 2**(x*log2(e))-1 + FMOVDP F0, ret+8(FP) + RET +use_exp: +// test bits for not-finite + MOVL x_hi+4(FP), AX + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + FLDL2E // F0=log2(e) + FMULD x+0(FP), F0 // F0=x*log2(e) + FMOVD F0, F1 // F0=x*log2(e), F1=x*log2(e) + FRNDINT // F0=int(x*log2(e)), F1=x*log2(e) + FSUBD F0, F1 // F0=int(x*log2(e)), F1=x*log2(e)-int(x*log2(e)) + FXCHD F0, F1 // F0=x*log2(e)-int(x*log2(e)), F1=int(x*log2(e)) + F2XM1 // F0=2**(x*log2(e)-int(x*log2(e)))-1, F1=int(x*log2(e)) + FLD1 // F0=1, F1=2**(x*log2(e)-int(x*log2(e)))-1, F2=int(x*log2(e)) + FADDDP F0, F1 // F0=2**(x*log2(e)-int(x*log2(e))), F1=int(x*log2(e)) + FSCALE // F0=e**x, F1=int(x*log2(e)) + FMOVDP F0, F1 // F0=e**x + FLD1 // F0=1, F1=e**x + FSUBDP F0, F1 // F0=e**x-1 + FMOVDP F0, ret+8(FP) + RET +not_finite: +// test bits for -Inf + MOVL x_hi+4(FP), BX + MOVL x_lo+0(FP), CX + CMPL BX, $0xfff00000 + JNE not_neginf + CMPL CX, $0 + JNE not_neginf + FLD1 // F0=1 + FCHS // F0=-1 + FMOVDP F0, ret+8(FP) + RET +not_neginf: + MOVL CX, ret_lo+8(FP) + MOVL BX, ret_hi+12(FP) + RET diff --git a/src/math/expm1_amd64.s b/src/math/expm1_amd64.s new file mode 100644 index 000000000..b7d5a3be0 --- /dev/null +++ b/src/math/expm1_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Expm1(SB),NOSPLIT,$0 + JMP ·expm1(SB) diff --git a/src/math/expm1_amd64p32.s b/src/math/expm1_amd64p32.s new file mode 100644 index 000000000..709ebefcb --- /dev/null +++ b/src/math/expm1_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "expm1_amd64.s" diff --git a/src/math/expm1_arm.s b/src/math/expm1_arm.s new file mode 100644 index 000000000..5f80d872f --- /dev/null +++ b/src/math/expm1_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Expm1(SB),NOSPLIT,$0 + B ·expm1(SB) diff --git a/src/math/export_test.go b/src/math/export_test.go new file mode 100644 index 000000000..02992d70e --- /dev/null +++ b/src/math/export_test.go @@ -0,0 +1,11 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Export internal functions for testing. +var ExpGo = exp +var Exp2Go = exp2 +var HypotGo = hypot +var SqrtGo = sqrt diff --git a/src/math/floor.go b/src/math/floor.go new file mode 100644 index 000000000..9d30629c5 --- /dev/null +++ b/src/math/floor.go @@ -0,0 +1,56 @@ +// Copyright 2009-2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Floor returns the greatest integer value less than or equal to x. +// +// Special cases are: +// Floor(±0) = ±0 +// Floor(±Inf) = ±Inf +// Floor(NaN) = NaN +func Floor(x float64) float64 + +func floor(x float64) float64 { + if x == 0 || IsNaN(x) || IsInf(x, 0) { + return x + } + if x < 0 { + d, fract := Modf(-x) + if fract != 0.0 { + d = d + 1 + } + return -d + } + d, _ := Modf(x) + return d +} + +// Ceil returns the least integer value greater than or equal to x. +// +// Special cases are: +// Ceil(±0) = ±0 +// Ceil(±Inf) = ±Inf +// Ceil(NaN) = NaN +func Ceil(x float64) float64 + +func ceil(x float64) float64 { + return -Floor(-x) +} + +// Trunc returns the integer value of x. +// +// Special cases are: +// Trunc(±0) = ±0 +// Trunc(±Inf) = ±Inf +// Trunc(NaN) = NaN +func Trunc(x float64) float64 + +func trunc(x float64) float64 { + if x == 0 || IsNaN(x) || IsInf(x, 0) { + return x + } + d, _ := Modf(x) + return d +} diff --git a/src/math/floor_386.s b/src/math/floor_386.s new file mode 100644 index 000000000..31c9b174d --- /dev/null +++ b/src/math/floor_386.s @@ -0,0 +1,46 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Ceil(x float64) float64 +TEXT ·Ceil(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ANDW $0xf3ff, AX + ORW $0x0800, AX // Rounding Control set to +Inf + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=Ceil(x) + FLDCW -2(SP) // load old Control Word + FMOVDP F0, ret+8(FP) + RET + +// func Floor(x float64) float64 +TEXT ·Floor(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ANDW $0xf3ff, AX + ORW $0x0400, AX // Rounding Control set to -Inf + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=Floor(x) + FLDCW -2(SP) // load old Control Word + FMOVDP F0, ret+8(FP) + RET + +// func Trunc(x float64) float64 +TEXT ·Trunc(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ORW $0x0c00, AX // Rounding Control set to truncate + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=Trunc(x) + FLDCW -2(SP) // load old Control Word + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/floor_amd64.s b/src/math/floor_amd64.s new file mode 100644 index 000000000..67b7cdec0 --- /dev/null +++ b/src/math/floor_amd64.s @@ -0,0 +1,76 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define Big 0x4330000000000000 // 2**52 + +// func Floor(x float64) float64 +TEXT ·Floor(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + ANDQ AX,DX // DX = |x| + SUBQ $1,DX + MOVQ $(Big - 1), CX // if |x| >= 2**52-1 or IsNaN(x) or |x| == 0, return x + CMPQ DX,CX + JAE isBig_floor + MOVQ AX, X0 // X0 = x + CVTTSD2SQ X0, AX + CVTSQ2SD AX, X1 // X1 = float(int(x)) + CMPSD X1, X0, 1 // compare LT; X0 = 0xffffffffffffffff or 0 + MOVSD $(-1.0), X2 + ANDPD X2, X0 // if x < float(int(x)) {X0 = -1} else {X0 = 0} + ADDSD X1, X0 + MOVSD X0, ret+8(FP) + RET +isBig_floor: + MOVQ AX, ret+8(FP) // return x + RET + +// func Ceil(x float64) float64 +TEXT ·Ceil(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + MOVQ AX, BX // BX = copy of x + ANDQ DX, BX // BX = |x| + MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x + CMPQ BX, CX + JAE isBig_ceil + MOVQ AX, X0 // X0 = x + MOVQ DX, X2 // X2 = sign bit mask + CVTTSD2SQ X0, AX + ANDNPD X0, X2 // X2 = sign + CVTSQ2SD AX, X1 // X1 = float(int(x)) + CMPSD X1, X0, 2 // compare LE; X0 = 0xffffffffffffffff or 0 + ORPD X2, X1 // if X1 = 0.0, incorporate sign + MOVSD $1.0, X3 + ANDNPD X3, X0 + ORPD X2, X0 // if float(int(x)) <= x {X0 = 1} else {X0 = -0} + ADDSD X1, X0 + MOVSD X0, ret+8(FP) + RET +isBig_ceil: + MOVQ AX, ret+8(FP) + RET + +// func Trunc(x float64) float64 +TEXT ·Trunc(SB),NOSPLIT,$0 + MOVQ x+0(FP), AX + MOVQ $~(1<<63), DX // sign bit mask + MOVQ AX, BX // BX = copy of x + ANDQ DX, BX // BX = |x| + MOVQ $Big, CX // if |x| >= 2**52 or IsNaN(x), return x + CMPQ BX, CX + JAE isBig_trunc + MOVQ AX, X0 + MOVQ DX, X2 // X2 = sign bit mask + CVTTSD2SQ X0, AX + ANDNPD X0, X2 // X2 = sign + CVTSQ2SD AX, X0 // X0 = float(int(x)) + ORPD X2, X0 // if X0 = 0.0, incorporate sign + MOVSD X0, ret+8(FP) + RET +isBig_trunc: + MOVQ AX, ret+8(FP) // return x + RET diff --git a/src/math/floor_amd64p32.s b/src/math/floor_amd64p32.s new file mode 100644 index 000000000..5b87d7a40 --- /dev/null +++ b/src/math/floor_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "floor_amd64.s" diff --git a/src/math/floor_arm.s b/src/math/floor_arm.s new file mode 100644 index 000000000..59091765b --- /dev/null +++ b/src/math/floor_arm.s @@ -0,0 +1,14 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Floor(SB),NOSPLIT,$0 + B ·floor(SB) + +TEXT ·Ceil(SB),NOSPLIT,$0 + B ·ceil(SB) + +TEXT ·Trunc(SB),NOSPLIT,$0 + B ·trunc(SB) diff --git a/src/math/frexp.go b/src/math/frexp.go new file mode 100644 index 000000000..0e26feb66 --- /dev/null +++ b/src/math/frexp.go @@ -0,0 +1,33 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Frexp breaks f into a normalized fraction +// and an integral power of two. +// It returns frac and exp satisfying f == frac × 2**exp, +// with the absolute value of frac in the interval [½, 1). +// +// Special cases are: +// Frexp(±0) = ±0, 0 +// Frexp(±Inf) = ±Inf, 0 +// Frexp(NaN) = NaN, 0 +func Frexp(f float64) (frac float64, exp int) + +func frexp(f float64) (frac float64, exp int) { + // special cases + switch { + case f == 0: + return f, 0 // correctly return -0 + case IsInf(f, 0) || IsNaN(f): + return f, 0 + } + f, exp = normalize(f) + x := Float64bits(f) + exp += int((x>>shift)&mask) - bias + 1 + x &^= mask << shift + x |= (-1 + bias) << shift + frac = Float64frombits(x) + return +} diff --git a/src/math/frexp_386.s b/src/math/frexp_386.s new file mode 100644 index 000000000..5bff7e215 --- /dev/null +++ b/src/math/frexp_386.s @@ -0,0 +1,25 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Frexp(f float64) (frac float64, exp int) +TEXT ·Frexp(SB),NOSPLIT,$0 + FMOVD f+0(FP), F0 // F0=f + FXAM + FSTSW AX + SAHF + JNP nan_zero_inf + JCS nan_zero_inf + FXTRACT // F0=f (0<=f<1), F1=e + FMULD $(0.5), F0 // F0=f (0.5<=f<1), F1=e + FMOVDP F0, frac+8(FP) // F0=e + FLD1 // F0=1, F1=e + FADDDP F0, F1 // F0=e+1 + FMOVLP F0, exp+16(FP) // (int=int32) + RET +nan_zero_inf: + FMOVDP F0, frac+8(FP) // F0=e + MOVL $0, exp+16(FP) // exp=0 + RET diff --git a/src/math/frexp_amd64.s b/src/math/frexp_amd64.s new file mode 100644 index 000000000..93a321039 --- /dev/null +++ b/src/math/frexp_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Frexp(SB),NOSPLIT,$0 + JMP ·frexp(SB) diff --git a/src/math/frexp_amd64p32.s b/src/math/frexp_amd64p32.s new file mode 100644 index 000000000..fbb564539 --- /dev/null +++ b/src/math/frexp_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "frexp_amd64.s" diff --git a/src/math/frexp_arm.s b/src/math/frexp_arm.s new file mode 100644 index 000000000..7842eca59 --- /dev/null +++ b/src/math/frexp_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Frexp(SB),NOSPLIT,$0 + B ·frexp(SB) diff --git a/src/math/gamma.go b/src/math/gamma.go new file mode 100644 index 000000000..164f54f33 --- /dev/null +++ b/src/math/gamma.go @@ -0,0 +1,202 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from http://netlib.sandia.gov/cephes/cprob/gamma.c. +// The go code is a simplified version of the original C. +// +// tgamma.c +// +// Gamma function +// +// SYNOPSIS: +// +// double x, y, tgamma(); +// extern int signgam; +// +// y = tgamma( x ); +// +// DESCRIPTION: +// +// Returns gamma function of the argument. The result is +// correctly signed, and the sign (+1 or -1) is also +// returned in a global (extern) variable named signgam. +// This variable is also filled in by the logarithmic gamma +// function lgamma(). +// +// Arguments |x| <= 34 are reduced by recurrence and the function +// approximated by a rational function of degree 6/7 in the +// interval (2,3). Large arguments are handled by Stirling's +// formula. Large negative arguments are made positive using +// a reflection formula. +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC -34, 34 10000 1.3e-16 2.5e-17 +// IEEE -170,-33 20000 2.3e-15 3.3e-16 +// IEEE -33, 33 20000 9.4e-16 2.2e-16 +// IEEE 33, 171.6 20000 2.3e-15 3.2e-16 +// +// Error for arguments outside the test range will be larger +// owing to error amplification by the exponential function. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +var _gamP = [...]float64{ + 1.60119522476751861407e-04, + 1.19135147006586384913e-03, + 1.04213797561761569935e-02, + 4.76367800457137231464e-02, + 2.07448227648435975150e-01, + 4.94214826801497100753e-01, + 9.99999999999999996796e-01, +} +var _gamQ = [...]float64{ + -2.31581873324120129819e-05, + 5.39605580493303397842e-04, + -4.45641913851797240494e-03, + 1.18139785222060435552e-02, + 3.58236398605498653373e-02, + -2.34591795718243348568e-01, + 7.14304917030273074085e-02, + 1.00000000000000000320e+00, +} +var _gamS = [...]float64{ + 7.87311395793093628397e-04, + -2.29549961613378126380e-04, + -2.68132617805781232825e-03, + 3.47222221605458667310e-03, + 8.33333333333482257126e-02, +} + +// Gamma function computed by Stirling's formula. +// The polynomial is valid for 33 <= x <= 172. +func stirling(x float64) float64 { + const ( + SqrtTwoPi = 2.506628274631000502417 + MaxStirling = 143.01608 + ) + w := 1 / x + w = 1 + w*((((_gamS[0]*w+_gamS[1])*w+_gamS[2])*w+_gamS[3])*w+_gamS[4]) + y := Exp(x) + if x > MaxStirling { // avoid Pow() overflow + v := Pow(x, 0.5*x-0.25) + y = v * (v / y) + } else { + y = Pow(x, x-0.5) / y + } + y = SqrtTwoPi * y * w + return y +} + +// Gamma returns the Gamma function of x. +// +// Special cases are: +// Gamma(+Inf) = +Inf +// Gamma(+0) = +Inf +// Gamma(-0) = -Inf +// Gamma(x) = NaN for integer x < 0 +// Gamma(-Inf) = NaN +// Gamma(NaN) = NaN +func Gamma(x float64) float64 { + const Euler = 0.57721566490153286060651209008240243104215933593992 // A001620 + // special cases + switch { + case isNegInt(x) || IsInf(x, -1) || IsNaN(x): + return NaN() + case x == 0: + if Signbit(x) { + return Inf(-1) + } + return Inf(1) + case x < -170.5674972726612 || x > 171.61447887182298: + return Inf(1) + } + q := Abs(x) + p := Floor(q) + if q > 33 { + if x >= 0 { + return stirling(x) + } + signgam := 1 + if ip := int(p); ip&1 == 0 { + signgam = -1 + } + z := q - p + if z > 0.5 { + p = p + 1 + z = q - p + } + z = q * Sin(Pi*z) + if z == 0 { + return Inf(signgam) + } + z = Pi / (Abs(z) * stirling(q)) + return float64(signgam) * z + } + + // Reduce argument + z := 1.0 + for x >= 3 { + x = x - 1 + z = z * x + } + for x < 0 { + if x > -1e-09 { + goto small + } + z = z / x + x = x + 1 + } + for x < 2 { + if x < 1e-09 { + goto small + } + z = z / x + x = x + 1 + } + + if x == 2 { + return z + } + + x = x - 2 + p = (((((x*_gamP[0]+_gamP[1])*x+_gamP[2])*x+_gamP[3])*x+_gamP[4])*x+_gamP[5])*x + _gamP[6] + q = ((((((x*_gamQ[0]+_gamQ[1])*x+_gamQ[2])*x+_gamQ[3])*x+_gamQ[4])*x+_gamQ[5])*x+_gamQ[6])*x + _gamQ[7] + return z * p / q + +small: + if x == 0 { + return Inf(1) + } + return z / ((1 + Euler*x) * x) +} + +func isNegInt(x float64) bool { + if x < 0 { + _, xf := Modf(x) + return xf == 0 + } + return false +} diff --git a/src/math/hypot.go b/src/math/hypot.go new file mode 100644 index 000000000..2087cb05b --- /dev/null +++ b/src/math/hypot.go @@ -0,0 +1,43 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Hypot -- sqrt(p*p + q*q), but overflows only if the result does. +*/ + +// Hypot returns Sqrt(p*p + q*q), taking care to avoid +// unnecessary overflow and underflow. +// +// Special cases are: +// Hypot(±Inf, q) = +Inf +// Hypot(p, ±Inf) = +Inf +// Hypot(NaN, q) = NaN +// Hypot(p, NaN) = NaN +func Hypot(p, q float64) float64 + +func hypot(p, q float64) float64 { + // special cases + switch { + case IsInf(p, 0) || IsInf(q, 0): + return Inf(1) + case IsNaN(p) || IsNaN(q): + return NaN() + } + if p < 0 { + p = -p + } + if q < 0 { + q = -q + } + if p < q { + p, q = q, p + } + if p == 0 { + return 0 + } + q = q / p + return p * Sqrt(1+q*q) +} diff --git a/src/math/hypot_386.s b/src/math/hypot_386.s new file mode 100644 index 000000000..d321f465b --- /dev/null +++ b/src/math/hypot_386.s @@ -0,0 +1,59 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Hypot(p, q float64) float64 +TEXT ·Hypot(SB),NOSPLIT,$0 +// test bits for not-finite + MOVL p_hi+4(FP), AX // high word p + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + MOVL q_hi+12(FP), AX // high word q + ANDL $0x7ff00000, AX + CMPL AX, $0x7ff00000 + JEQ not_finite + FMOVD p+0(FP), F0 // F0=p + FABS // F0=|p| + FMOVD q+8(FP), F0 // F0=q, F1=|p| + FABS // F0=|q|, F1=|p| + FUCOMI F0, F1 // compare F0 to F1 + JCC 2(PC) // jump if F0 >= F1 + FXCHD F0, F1 // F0=|p| (larger), F1=|q| (smaller) + FTST // compare F0 to 0 + FSTSW AX + ANDW $0x4000, AX + JNE 10(PC) // jump if F0 = 0 + FXCHD F0, F1 // F0=q (smaller), F1=p (larger) + FDIVD F1, F0 // F0=q(=q/p), F1=p + FMULD F0, F0 // F0=q*q, F1=p + FLD1 // F0=1, F1=q*q, F2=p + FADDDP F0, F1 // F0=1+q*q, F1=p + FSQRT // F0=sqrt(1+q*q), F1=p + FMULDP F0, F1 // F0=p*sqrt(1+q*q) + FMOVDP F0, ret+16(FP) + RET + FMOVDP F0, F1 // F0=0 + FMOVDP F0, ret+16(FP) + RET +not_finite: +// test bits for -Inf or +Inf + MOVL p_hi+4(FP), AX // high word p + ORL p_lo+0(FP), AX // low word p + ANDL $0x7fffffff, AX + CMPL AX, $0x7ff00000 + JEQ is_inf + MOVL q_hi+12(FP), AX // high word q + ORL q_lo+8(FP), AX // low word q + ANDL $0x7fffffff, AX + CMPL AX, $0x7ff00000 + JEQ is_inf + MOVL $0x7ff80000, ret_hi+20(FP) // return NaN = 0x7FF8000000000001 + MOVL $0x00000001, ret_lo+16(FP) + RET +is_inf: + MOVL AX, ret_hi+20(FP) // return +Inf = 0x7FF0000000000000 + MOVL $0x00000000, ret_lo+16(FP) + RET diff --git a/src/math/hypot_amd64.s b/src/math/hypot_amd64.s new file mode 100644 index 000000000..a68eebc8c --- /dev/null +++ b/src/math/hypot_amd64.s @@ -0,0 +1,52 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 + +// func Hypot(p, q float64) float64 +TEXT ·Hypot(SB),NOSPLIT,$0 + // test bits for special cases + MOVQ p+0(FP), BX + MOVQ $~(1<<63), AX + ANDQ AX, BX // p = |p| + MOVQ q+8(FP), CX + ANDQ AX, CX // q = |q| + MOVQ $PosInf, AX + CMPQ AX, BX + JLE isInfOrNaN + CMPQ AX, CX + JLE isInfOrNaN + // hypot = max * sqrt(1 + (min/max)**2) + MOVQ BX, X0 + MOVQ CX, X1 + ORQ CX, BX + JEQ isZero + MOVAPD X0, X2 + MAXSD X1, X0 + MINSD X2, X1 + DIVSD X0, X1 + MULSD X1, X1 + ADDSD $1.0, X1 + SQRTSD X1, X1 + MULSD X1, X0 + MOVSD X0, ret+16(FP) + RET +isInfOrNaN: + CMPQ AX, BX + JEQ isInf + CMPQ AX, CX + JEQ isInf + MOVQ $NaN, AX + MOVQ AX, ret+16(FP) // return NaN + RET +isInf: + MOVQ AX, ret+16(FP) // return +Inf + RET +isZero: + MOVQ $0, AX + MOVQ AX, ret+16(FP) // return 0 + RET diff --git a/src/math/hypot_amd64p32.s b/src/math/hypot_amd64p32.s new file mode 100644 index 000000000..b84542ae3 --- /dev/null +++ b/src/math/hypot_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "hypot_amd64.s" diff --git a/src/math/hypot_arm.s b/src/math/hypot_arm.s new file mode 100644 index 000000000..9c8abca13 --- /dev/null +++ b/src/math/hypot_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Hypot(SB),NOSPLIT,$0 + B ·hypot(SB) diff --git a/src/math/j0.go b/src/math/j0.go new file mode 100644 index 000000000..c20a9b22a --- /dev/null +++ b/src/math/j0.go @@ -0,0 +1,429 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Bessel function of the first and second kinds of order zero. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_j0.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_j0(x), __ieee754_y0(x) +// Bessel function of the first and second kinds of order zero. +// Method -- j0(x): +// 1. For tiny x, we use j0(x) = 1 - x**2/4 + x**4/64 - ... +// 2. Reduce x to |x| since j0(x)=j0(-x), and +// for x in (0,2) +// j0(x) = 1-z/4+ z**2*R0/S0, where z = x*x; +// (precision: |j0-1+z/4-z**2R0/S0 |<2**-63.67 ) +// for x in (2,inf) +// j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) +// where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) +// as follow: +// cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) +// = 1/sqrt(2) * (cos(x) + sin(x)) +// sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) +// = 1/sqrt(2) * (sin(x) - cos(x)) +// (To avoid cancellation, use +// sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) +// to compute the worse one.) +// +// 3 Special cases +// j0(nan)= nan +// j0(0) = 1 +// j0(inf) = 0 +// +// Method -- y0(x): +// 1. For x<2. +// Since +// y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x**2/4 - ...) +// therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. +// We use the following function to approximate y0, +// y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x**2 +// where +// U(z) = u00 + u01*z + ... + u06*z**6 +// V(z) = 1 + v01*z + ... + v04*z**4 +// with absolute approximation error bounded by 2**-72. +// Note: For tiny x, U/V = u0 and j0(x)~1, hence +// y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) +// 2. For x>=2. +// y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) +// where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) +// by the method mentioned above. +// 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. +// + +// J0 returns the order-zero Bessel function of the first kind. +// +// Special cases are: +// J0(±Inf) = 0 +// J0(0) = 1 +// J0(NaN) = NaN +func J0(x float64) float64 { + const ( + Huge = 1e300 + TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000 + TwoM13 = 1.0 / (1 << 13) // 2**-13 0x3f20000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + // R0/S0 on [0, 2] + R02 = 1.56249999999999947958e-02 // 0x3F8FFFFFFFFFFFFD + R03 = -1.89979294238854721751e-04 // 0xBF28E6A5B61AC6E9 + R04 = 1.82954049532700665670e-06 // 0x3EBEB1D10C503919 + R05 = -4.61832688532103189199e-09 // 0xBE33D5E773D63FCE + S01 = 1.56191029464890010492e-02 // 0x3F8FFCE882C8C2A4 + S02 = 1.16926784663337450260e-04 // 0x3F1EA6D2DD57DBF4 + S03 = 5.13546550207318111446e-07 // 0x3EA13B54CE84D5A9 + S04 = 1.16614003333790000205e-09 // 0x3E1408BCF4745D8F + ) + // special cases + switch { + case IsNaN(x): + return x + case IsInf(x, 0): + return 0 + case x == 0: + return 1 + } + + if x < 0 { + x = -x + } + if x >= 2 { + s, c := Sincos(x) + ss := s - c + cc := s + c + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := -Cos(x + x) + if s*c < 0 { + cc = z / ss + } else { + ss = z / cc + } + } + + // j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + // y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + + var z float64 + if x > Two129 { // |x| > ~6.8056e+38 + z = (1 / SqrtPi) * cc / Sqrt(x) + } else { + u := pzero(x) + v := qzero(x) + z = (1 / SqrtPi) * (u*cc - v*ss) / Sqrt(x) + } + return z // |x| >= 2.0 + } + if x < TwoM13 { // |x| < ~1.2207e-4 + if x < TwoM27 { + return 1 // |x| < ~7.4506e-9 + } + return 1 - 0.25*x*x // ~7.4506e-9 < |x| < ~1.2207e-4 + } + z := x * x + r := z * (R02 + z*(R03+z*(R04+z*R05))) + s := 1 + z*(S01+z*(S02+z*(S03+z*S04))) + if x < 1 { + return 1 + z*(-0.25+(r/s)) // |x| < 1.00 + } + u := 0.5 * x + return (1+u)*(1-u) + z*(r/s) // 1.0 < |x| < 2.0 +} + +// Y0 returns the order-zero Bessel function of the second kind. +// +// Special cases are: +// Y0(+Inf) = 0 +// Y0(0) = -Inf +// Y0(x < 0) = NaN +// Y0(NaN) = NaN +func Y0(x float64) float64 { + const ( + TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + U00 = -7.38042951086872317523e-02 // 0xBFB2E4D699CBD01F + U01 = 1.76666452509181115538e-01 // 0x3FC69D019DE9E3FC + U02 = -1.38185671945596898896e-02 // 0xBF8C4CE8B16CFA97 + U03 = 3.47453432093683650238e-04 // 0x3F36C54D20B29B6B + U04 = -3.81407053724364161125e-06 // 0xBECFFEA773D25CAD + U05 = 1.95590137035022920206e-08 // 0x3E5500573B4EABD4 + U06 = -3.98205194132103398453e-11 // 0xBDC5E43D693FB3C8 + V01 = 1.27304834834123699328e-02 // 0x3F8A127091C9C71A + V02 = 7.60068627350353253702e-05 // 0x3F13ECBBF578C6C1 + V03 = 2.59150851840457805467e-07 // 0x3E91642D7FF202FD + V04 = 4.41110311332675467403e-10 // 0x3DFE50183BD6D9EF + ) + // special cases + switch { + case x < 0 || IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + case x == 0: + return Inf(-1) + } + + if x >= 2 { // |x| >= 2.0 + + // y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0)) + // where x0 = x-pi/4 + // Better formula: + // cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) + // = 1/sqrt(2) * (sin(x) + cos(x)) + // sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + // = 1/sqrt(2) * (sin(x) - cos(x)) + // To avoid cancellation, use + // sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + // to compute the worse one. + + s, c := Sincos(x) + ss := s - c + cc := s + c + + // j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) + // y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := -Cos(x + x) + if s*c < 0 { + cc = z / ss + } else { + ss = z / cc + } + } + var z float64 + if x > Two129 { // |x| > ~6.8056e+38 + z = (1 / SqrtPi) * ss / Sqrt(x) + } else { + u := pzero(x) + v := qzero(x) + z = (1 / SqrtPi) * (u*ss + v*cc) / Sqrt(x) + } + return z // |x| >= 2.0 + } + if x <= TwoM27 { + return U00 + (2/Pi)*Log(x) // |x| < ~7.4506e-9 + } + z := x * x + u := U00 + z*(U01+z*(U02+z*(U03+z*(U04+z*(U05+z*U06))))) + v := 1 + z*(V01+z*(V02+z*(V03+z*V04))) + return u/v + (2/Pi)*J0(x)*Log(x) // ~7.4506e-9 < |x| < 2.0 +} + +// The asymptotic expansions of pzero is +// 1 - 9/128 s**2 + 11025/98304 s**4 - ..., where s = 1/x. +// For x >= 2, We approximate pzero by +// pzero(x) = 1 + (R/S) +// where R = pR0 + pR1*s**2 + pR2*s**4 + ... + pR5*s**10 +// S = 1 + pS0*s**2 + ... + pS4*s**10 +// and +// | pzero(x)-1-R/S | <= 2 ** ( -60.26) + +// for x in [inf, 8]=1/[0,0.125] +var p0R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + -7.03124999999900357484e-02, // 0xBFB1FFFFFFFFFD32 + -8.08167041275349795626e+00, // 0xC02029D0B44FA779 + -2.57063105679704847262e+02, // 0xC07011027B19E863 + -2.48521641009428822144e+03, // 0xC0A36A6ECD4DCAFC + -5.25304380490729545272e+03, // 0xC0B4850B36CC643D +} +var p0S8 = [5]float64{ + 1.16534364619668181717e+02, // 0x405D223307A96751 + 3.83374475364121826715e+03, // 0x40ADF37D50596938 + 4.05978572648472545552e+04, // 0x40E3D2BB6EB6B05F + 1.16752972564375915681e+05, // 0x40FC810F8F9FA9BD + 4.76277284146730962675e+04, // 0x40E741774F2C49DC +} + +// for x in [8,4.5454]=1/[0.125,0.22001] +var p0R5 = [6]float64{ + -1.14125464691894502584e-11, // 0xBDA918B147E495CC + -7.03124940873599280078e-02, // 0xBFB1FFFFE69AFBC6 + -4.15961064470587782438e+00, // 0xC010A370F90C6BBF + -6.76747652265167261021e+01, // 0xC050EB2F5A7D1783 + -3.31231299649172967747e+02, // 0xC074B3B36742CC63 + -3.46433388365604912451e+02, // 0xC075A6EF28A38BD7 +} +var p0S5 = [5]float64{ + 6.07539382692300335975e+01, // 0x404E60810C98C5DE + 1.05125230595704579173e+03, // 0x40906D025C7E2864 + 5.97897094333855784498e+03, // 0x40B75AF88FBE1D60 + 9.62544514357774460223e+03, // 0x40C2CCB8FA76FA38 + 2.40605815922939109441e+03, // 0x40A2CC1DC70BE864 +} + +// for x in [4.547,2.8571]=1/[0.2199,0.35001] +var p0R3 = [6]float64{ + -2.54704601771951915620e-09, // 0xBE25E1036FE1AA86 + -7.03119616381481654654e-02, // 0xBFB1FFF6F7C0E24B + -2.40903221549529611423e+00, // 0xC00345B2AEA48074 + -2.19659774734883086467e+01, // 0xC035F74A4CB94E14 + -5.80791704701737572236e+01, // 0xC04D0A22420A1A45 + -3.14479470594888503854e+01, // 0xC03F72ACA892D80F +} +var p0S3 = [5]float64{ + 3.58560338055209726349e+01, // 0x4041ED9284077DD3 + 3.61513983050303863820e+02, // 0x40769839464A7C0E + 1.19360783792111533330e+03, // 0x4092A66E6D1061D6 + 1.12799679856907414432e+03, // 0x40919FFCB8C39B7E + 1.73580930813335754692e+02, // 0x4065B296FC379081 +} + +// for x in [2.8570,2]=1/[0.3499,0.5] +var p0R2 = [6]float64{ + -8.87534333032526411254e-08, // 0xBE77D316E927026D + -7.03030995483624743247e-02, // 0xBFB1FF62495E1E42 + -1.45073846780952986357e+00, // 0xBFF736398A24A843 + -7.63569613823527770791e+00, // 0xC01E8AF3EDAFA7F3 + -1.11931668860356747786e+01, // 0xC02662E6C5246303 + -3.23364579351335335033e+00, // 0xC009DE81AF8FE70F +} +var p0S2 = [5]float64{ + 2.22202997532088808441e+01, // 0x40363865908B5959 + 1.36206794218215208048e+02, // 0x4061069E0EE8878F + 2.70470278658083486789e+02, // 0x4070E78642EA079B + 1.53875394208320329881e+02, // 0x40633C033AB6FAFF + 1.46576176948256193810e+01, // 0x402D50B344391809 +} + +func pzero(x float64) float64 { + var p [6]float64 + var q [5]float64 + if x >= 8 { + p = p0R8 + q = p0S8 + } else if x >= 4.5454 { + p = p0R5 + q = p0S5 + } else if x >= 2.8571 { + p = p0R3 + q = p0S3 + } else if x >= 2 { + p = p0R2 + q = p0S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))) + return 1 + r/s +} + +// For x >= 8, the asymptotic expansions of qzero is +// -1/8 s + 75/1024 s**3 - ..., where s = 1/x. +// We approximate pzero by +// qzero(x) = s*(-1.25 + (R/S)) +// where R = qR0 + qR1*s**2 + qR2*s**4 + ... + qR5*s**10 +// S = 1 + qS0*s**2 + ... + qS5*s**12 +// and +// | qzero(x)/s +1.25-R/S | <= 2**(-61.22) + +// for x in [inf, 8]=1/[0,0.125] +var q0R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + 7.32421874999935051953e-02, // 0x3FB2BFFFFFFFFE2C + 1.17682064682252693899e+01, // 0x402789525BB334D6 + 5.57673380256401856059e+02, // 0x40816D6315301825 + 8.85919720756468632317e+03, // 0x40C14D993E18F46D + 3.70146267776887834771e+04, // 0x40E212D40E901566 +} +var q0S8 = [6]float64{ + 1.63776026895689824414e+02, // 0x406478D5365B39BC + 8.09834494656449805916e+03, // 0x40BFA2584E6B0563 + 1.42538291419120476348e+05, // 0x4101665254D38C3F + 8.03309257119514397345e+05, // 0x412883DA83A52B43 + 8.40501579819060512818e+05, // 0x4129A66B28DE0B3D + -3.43899293537866615225e+05, // 0xC114FD6D2C9530C5 +} + +// for x in [8,4.5454]=1/[0.125,0.22001] +var q0R5 = [6]float64{ + 1.84085963594515531381e-11, // 0x3DB43D8F29CC8CD9 + 7.32421766612684765896e-02, // 0x3FB2BFFFD172B04C + 5.83563508962056953777e+00, // 0x401757B0B9953DD3 + 1.35111577286449829671e+02, // 0x4060E3920A8788E9 + 1.02724376596164097464e+03, // 0x40900CF99DC8C481 + 1.98997785864605384631e+03, // 0x409F17E953C6E3A6 +} +var q0S5 = [6]float64{ + 8.27766102236537761883e+01, // 0x4054B1B3FB5E1543 + 2.07781416421392987104e+03, // 0x40A03BA0DA21C0CE + 1.88472887785718085070e+04, // 0x40D267D27B591E6D + 5.67511122894947329769e+04, // 0x40EBB5E397E02372 + 3.59767538425114471465e+04, // 0x40E191181F7A54A0 + -5.35434275601944773371e+03, // 0xC0B4EA57BEDBC609 +} + +// for x in [4.547,2.8571]=1/[0.2199,0.35001] +var q0R3 = [6]float64{ + 4.37741014089738620906e-09, // 0x3E32CD036ADECB82 + 7.32411180042911447163e-02, // 0x3FB2BFEE0E8D0842 + 3.34423137516170720929e+00, // 0x400AC0FC61149CF5 + 4.26218440745412650017e+01, // 0x40454F98962DAEDD + 1.70808091340565596283e+02, // 0x406559DBE25EFD1F + 1.66733948696651168575e+02, // 0x4064D77C81FA21E0 +} +var q0S3 = [6]float64{ + 4.87588729724587182091e+01, // 0x40486122BFE343A6 + 7.09689221056606015736e+02, // 0x40862D8386544EB3 + 3.70414822620111362994e+03, // 0x40ACF04BE44DFC63 + 6.46042516752568917582e+03, // 0x40B93C6CD7C76A28 + 2.51633368920368957333e+03, // 0x40A3A8AAD94FB1C0 + -1.49247451836156386662e+02, // 0xC062A7EB201CF40F +} + +// for x in [2.8570,2]=1/[0.3499,0.5] +var q0R2 = [6]float64{ + 1.50444444886983272379e-07, // 0x3E84313B54F76BDB + 7.32234265963079278272e-02, // 0x3FB2BEC53E883E34 + 1.99819174093815998816e+00, // 0x3FFFF897E727779C + 1.44956029347885735348e+01, // 0x402CFDBFAAF96FE5 + 3.16662317504781540833e+01, // 0x403FAA8E29FBDC4A + 1.62527075710929267416e+01, // 0x403040B171814BB4 +} +var q0S2 = [6]float64{ + 3.03655848355219184498e+01, // 0x403E5D96F7C07AED + 2.69348118608049844624e+02, // 0x4070D591E4D14B40 + 8.44783757595320139444e+02, // 0x408A664522B3BF22 + 8.82935845112488550512e+02, // 0x408B977C9C5CC214 + 2.12666388511798828631e+02, // 0x406A95530E001365 + -5.31095493882666946917e+00, // 0xC0153E6AF8B32931 +} + +func qzero(x float64) float64 { + var p, q [6]float64 + if x >= 8 { + p = q0R8 + q = q0S8 + } else if x >= 4.5454 { + p = q0R5 + q = q0S5 + } else if x >= 2.8571 { + p = q0R3 + q = q0S3 + } else if x >= 2 { + p = q0R2 + q = q0S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))) + return (-0.125 + r/s) / x +} diff --git a/src/math/j1.go b/src/math/j1.go new file mode 100644 index 000000000..7ac186b72 --- /dev/null +++ b/src/math/j1.go @@ -0,0 +1,422 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Bessel function of the first and second kinds of order one. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_j1.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_j1(x), __ieee754_y1(x) +// Bessel function of the first and second kinds of order one. +// Method -- j1(x): +// 1. For tiny x, we use j1(x) = x/2 - x**3/16 + x**5/384 - ... +// 2. Reduce x to |x| since j1(x)=-j1(-x), and +// for x in (0,2) +// j1(x) = x/2 + x*z*R0/S0, where z = x*x; +// (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) +// for x in (2,inf) +// j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) +// y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) +// where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) +// as follow: +// cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) +// = 1/sqrt(2) * (sin(x) - cos(x)) +// sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) +// = -1/sqrt(2) * (sin(x) + cos(x)) +// (To avoid cancellation, use +// sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) +// to compute the worse one.) +// +// 3 Special cases +// j1(nan)= nan +// j1(0) = 0 +// j1(inf) = 0 +// +// Method -- y1(x): +// 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN +// 2. For x<2. +// Since +// y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x**3-...) +// therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. +// We use the following function to approximate y1, +// y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x**2 +// where for x in [0,2] (abs err less than 2**-65.89) +// U(z) = U0[0] + U0[1]*z + ... + U0[4]*z**4 +// V(z) = 1 + v0[0]*z + ... + v0[4]*z**5 +// Note: For tiny x, 1/x dominate y1 and hence +// y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) +// 3. For x>=2. +// y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) +// where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) +// by method mentioned above. + +// J1 returns the order-one Bessel function of the first kind. +// +// Special cases are: +// J1(±Inf) = 0 +// J1(NaN) = NaN +func J1(x float64) float64 { + const ( + TwoM27 = 1.0 / (1 << 27) // 2**-27 0x3e40000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + // R0/S0 on [0, 2] + R00 = -6.25000000000000000000e-02 // 0xBFB0000000000000 + R01 = 1.40705666955189706048e-03 // 0x3F570D9F98472C61 + R02 = -1.59955631084035597520e-05 // 0xBEF0C5C6BA169668 + R03 = 4.96727999609584448412e-08 // 0x3E6AAAFA46CA0BD9 + S01 = 1.91537599538363460805e-02 // 0x3F939D0B12637E53 + S02 = 1.85946785588630915560e-04 // 0x3F285F56B9CDF664 + S03 = 1.17718464042623683263e-06 // 0x3EB3BFF8333F8498 + S04 = 5.04636257076217042715e-09 // 0x3E35AC88C97DFF2C + S05 = 1.23542274426137913908e-11 // 0x3DAB2ACFCFB97ED8 + ) + // special cases + switch { + case IsNaN(x): + return x + case IsInf(x, 0) || x == 0: + return 0 + } + + sign := false + if x < 0 { + x = -x + sign = true + } + if x >= 2 { + s, c := Sincos(x) + ss := -s - c + cc := s - c + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := Cos(x + x) + if s*c > 0 { + cc = z / ss + } else { + ss = z / cc + } + } + + // j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x) + // y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x) + + var z float64 + if x > Two129 { + z = (1 / SqrtPi) * cc / Sqrt(x) + } else { + u := pone(x) + v := qone(x) + z = (1 / SqrtPi) * (u*cc - v*ss) / Sqrt(x) + } + if sign { + return -z + } + return z + } + if x < TwoM27 { // |x|<2**-27 + return 0.5 * x // inexact if x!=0 necessary + } + z := x * x + r := z * (R00 + z*(R01+z*(R02+z*R03))) + s := 1.0 + z*(S01+z*(S02+z*(S03+z*(S04+z*S05)))) + r *= x + z = 0.5*x + r/s + if sign { + return -z + } + return z +} + +// Y1 returns the order-one Bessel function of the second kind. +// +// Special cases are: +// Y1(+Inf) = 0 +// Y1(0) = -Inf +// Y1(x < 0) = NaN +// Y1(NaN) = NaN +func Y1(x float64) float64 { + const ( + TwoM54 = 1.0 / (1 << 54) // 2**-54 0x3c90000000000000 + Two129 = 1 << 129 // 2**129 0x4800000000000000 + U00 = -1.96057090646238940668e-01 // 0xBFC91866143CBC8A + U01 = 5.04438716639811282616e-02 // 0x3FA9D3C776292CD1 + U02 = -1.91256895875763547298e-03 // 0xBF5F55E54844F50F + U03 = 2.35252600561610495928e-05 // 0x3EF8AB038FA6B88E + U04 = -9.19099158039878874504e-08 // 0xBE78AC00569105B8 + V00 = 1.99167318236649903973e-02 // 0x3F94650D3F4DA9F0 + V01 = 2.02552581025135171496e-04 // 0x3F2A8C896C257764 + V02 = 1.35608801097516229404e-06 // 0x3EB6C05A894E8CA6 + V03 = 6.22741452364621501295e-09 // 0x3E3ABF1D5BA69A86 + V04 = 1.66559246207992079114e-11 // 0x3DB25039DACA772A + ) + // special cases + switch { + case x < 0 || IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + case x == 0: + return Inf(-1) + } + + if x >= 2 { + s, c := Sincos(x) + ss := -s - c + cc := s - c + + // make sure x+x does not overflow + if x < MaxFloat64/2 { + z := Cos(x + x) + if s*c > 0 { + cc = z / ss + } else { + ss = z / cc + } + } + // y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0)) + // where x0 = x-3pi/4 + // Better formula: + // cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) + // = 1/sqrt(2) * (sin(x) - cos(x)) + // sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) + // = -1/sqrt(2) * (cos(x) + sin(x)) + // To avoid cancellation, use + // sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + // to compute the worse one. + + var z float64 + if x > Two129 { + z = (1 / SqrtPi) * ss / Sqrt(x) + } else { + u := pone(x) + v := qone(x) + z = (1 / SqrtPi) * (u*ss + v*cc) / Sqrt(x) + } + return z + } + if x <= TwoM54 { // x < 2**-54 + return -(2 / Pi) / x + } + z := x * x + u := U00 + z*(U01+z*(U02+z*(U03+z*U04))) + v := 1 + z*(V00+z*(V01+z*(V02+z*(V03+z*V04)))) + return x*(u/v) + (2/Pi)*(J1(x)*Log(x)-1/x) +} + +// For x >= 8, the asymptotic expansions of pone is +// 1 + 15/128 s**2 - 4725/2**15 s**4 - ..., where s = 1/x. +// We approximate pone by +// pone(x) = 1 + (R/S) +// where R = pr0 + pr1*s**2 + pr2*s**4 + ... + pr5*s**10 +// S = 1 + ps0*s**2 + ... + ps4*s**10 +// and +// | pone(x)-1-R/S | <= 2**(-60.06) + +// for x in [inf, 8]=1/[0,0.125] +var p1R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + 1.17187499999988647970e-01, // 0x3FBDFFFFFFFFFCCE + 1.32394806593073575129e+01, // 0x402A7A9D357F7FCE + 4.12051854307378562225e+02, // 0x4079C0D4652EA590 + 3.87474538913960532227e+03, // 0x40AE457DA3A532CC + 7.91447954031891731574e+03, // 0x40BEEA7AC32782DD +} +var p1S8 = [5]float64{ + 1.14207370375678408436e+02, // 0x405C8D458E656CAC + 3.65093083420853463394e+03, // 0x40AC85DC964D274F + 3.69562060269033463555e+04, // 0x40E20B8697C5BB7F + 9.76027935934950801311e+04, // 0x40F7D42CB28F17BB + 3.08042720627888811578e+04, // 0x40DE1511697A0B2D +} + +// for x in [8,4.5454] = 1/[0.125,0.22001] +var p1R5 = [6]float64{ + 1.31990519556243522749e-11, // 0x3DAD0667DAE1CA7D + 1.17187493190614097638e-01, // 0x3FBDFFFFE2C10043 + 6.80275127868432871736e+00, // 0x401B36046E6315E3 + 1.08308182990189109773e+02, // 0x405B13B9452602ED + 5.17636139533199752805e+02, // 0x40802D16D052D649 + 5.28715201363337541807e+02, // 0x408085B8BB7E0CB7 +} +var p1S5 = [5]float64{ + 5.92805987221131331921e+01, // 0x404DA3EAA8AF633D + 9.91401418733614377743e+02, // 0x408EFB361B066701 + 5.35326695291487976647e+03, // 0x40B4E9445706B6FB + 7.84469031749551231769e+03, // 0x40BEA4B0B8A5BB15 + 1.50404688810361062679e+03, // 0x40978030036F5E51 +} + +// for x in[4.5453,2.8571] = 1/[0.2199,0.35001] +var p1R3 = [6]float64{ + 3.02503916137373618024e-09, // 0x3E29FC21A7AD9EDD + 1.17186865567253592491e-01, // 0x3FBDFFF55B21D17B + 3.93297750033315640650e+00, // 0x400F76BCE85EAD8A + 3.51194035591636932736e+01, // 0x40418F489DA6D129 + 9.10550110750781271918e+01, // 0x4056C3854D2C1837 + 4.85590685197364919645e+01, // 0x4048478F8EA83EE5 +} +var p1S3 = [5]float64{ + 3.47913095001251519989e+01, // 0x40416549A134069C + 3.36762458747825746741e+02, // 0x40750C3307F1A75F + 1.04687139975775130551e+03, // 0x40905B7C5037D523 + 8.90811346398256432622e+02, // 0x408BD67DA32E31E9 + 1.03787932439639277504e+02, // 0x4059F26D7C2EED53 +} + +// for x in [2.8570,2] = 1/[0.3499,0.5] +var p1R2 = [6]float64{ + 1.07710830106873743082e-07, // 0x3E7CE9D4F65544F4 + 1.17176219462683348094e-01, // 0x3FBDFF42BE760D83 + 2.36851496667608785174e+00, // 0x4002F2B7F98FAEC0 + 1.22426109148261232917e+01, // 0x40287C377F71A964 + 1.76939711271687727390e+01, // 0x4031B1A8177F8EE2 + 5.07352312588818499250e+00, // 0x40144B49A574C1FE +} +var p1S2 = [5]float64{ + 2.14364859363821409488e+01, // 0x40356FBD8AD5ECDC + 1.25290227168402751090e+02, // 0x405F529314F92CD5 + 2.32276469057162813669e+02, // 0x406D08D8D5A2DBD9 + 1.17679373287147100768e+02, // 0x405D6B7ADA1884A9 + 8.36463893371618283368e+00, // 0x4020BAB1F44E5192 +} + +func pone(x float64) float64 { + var p [6]float64 + var q [5]float64 + if x >= 8 { + p = p1R8 + q = p1S8 + } else if x >= 4.5454 { + p = p1R5 + q = p1S5 + } else if x >= 2.8571 { + p = p1R3 + q = p1S3 + } else if x >= 2 { + p = p1R2 + q = p1S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1.0 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))) + return 1 + r/s +} + +// For x >= 8, the asymptotic expansions of qone is +// 3/8 s - 105/1024 s**3 - ..., where s = 1/x. +// We approximate qone by +// qone(x) = s*(0.375 + (R/S)) +// where R = qr1*s**2 + qr2*s**4 + ... + qr5*s**10 +// S = 1 + qs1*s**2 + ... + qs6*s**12 +// and +// | qone(x)/s -0.375-R/S | <= 2**(-61.13) + +// for x in [inf, 8] = 1/[0,0.125] +var q1R8 = [6]float64{ + 0.00000000000000000000e+00, // 0x0000000000000000 + -1.02539062499992714161e-01, // 0xBFBA3FFFFFFFFDF3 + -1.62717534544589987888e+01, // 0xC0304591A26779F7 + -7.59601722513950107896e+02, // 0xC087BCD053E4B576 + -1.18498066702429587167e+04, // 0xC0C724E740F87415 + -4.84385124285750353010e+04, // 0xC0E7A6D065D09C6A +} +var q1S8 = [6]float64{ + 1.61395369700722909556e+02, // 0x40642CA6DE5BCDE5 + 7.82538599923348465381e+03, // 0x40BE9162D0D88419 + 1.33875336287249578163e+05, // 0x4100579AB0B75E98 + 7.19657723683240939863e+05, // 0x4125F65372869C19 + 6.66601232617776375264e+05, // 0x412457D27719AD5C + -2.94490264303834643215e+05, // 0xC111F9690EA5AA18 +} + +// for x in [8,4.5454] = 1/[0.125,0.22001] +var q1R5 = [6]float64{ + -2.08979931141764104297e-11, // 0xBDB6FA431AA1A098 + -1.02539050241375426231e-01, // 0xBFBA3FFFCB597FEF + -8.05644828123936029840e+00, // 0xC0201CE6CA03AD4B + -1.83669607474888380239e+02, // 0xC066F56D6CA7B9B0 + -1.37319376065508163265e+03, // 0xC09574C66931734F + -2.61244440453215656817e+03, // 0xC0A468E388FDA79D +} +var q1S5 = [6]float64{ + 8.12765501384335777857e+01, // 0x405451B2FF5A11B2 + 1.99179873460485964642e+03, // 0x409F1F31E77BF839 + 1.74684851924908907677e+04, // 0x40D10F1F0D64CE29 + 4.98514270910352279316e+04, // 0x40E8576DAABAD197 + 2.79480751638918118260e+04, // 0x40DB4B04CF7C364B + -4.71918354795128470869e+03, // 0xC0B26F2EFCFFA004 +} + +// for x in [4.5454,2.8571] = 1/[0.2199,0.35001] ??? +var q1R3 = [6]float64{ + -5.07831226461766561369e-09, // 0xBE35CFA9D38FC84F + -1.02537829820837089745e-01, // 0xBFBA3FEB51AEED54 + -4.61011581139473403113e+00, // 0xC01270C23302D9FF + -5.78472216562783643212e+01, // 0xC04CEC71C25D16DA + -2.28244540737631695038e+02, // 0xC06C87D34718D55F + -2.19210128478909325622e+02, // 0xC06B66B95F5C1BF6 +} +var q1S3 = [6]float64{ + 4.76651550323729509273e+01, // 0x4047D523CCD367E4 + 6.73865112676699709482e+02, // 0x40850EEBC031EE3E + 3.38015286679526343505e+03, // 0x40AA684E448E7C9A + 5.54772909720722782367e+03, // 0x40B5ABBAA61D54A6 + 1.90311919338810798763e+03, // 0x409DBC7A0DD4DF4B + -1.35201191444307340817e+02, // 0xC060E670290A311F +} + +// for x in [2.8570,2] = 1/[0.3499,0.5] +var q1R2 = [6]float64{ + -1.78381727510958865572e-07, // 0xBE87F12644C626D2 + -1.02517042607985553460e-01, // 0xBFBA3E8E9148B010 + -2.75220568278187460720e+00, // 0xC006048469BB4EDA + -1.96636162643703720221e+01, // 0xC033A9E2C168907F + -4.23253133372830490089e+01, // 0xC04529A3DE104AAA + -2.13719211703704061733e+01, // 0xC0355F3639CF6E52 +} +var q1S2 = [6]float64{ + 2.95333629060523854548e+01, // 0x403D888A78AE64FF + 2.52981549982190529136e+02, // 0x406F9F68DB821CBA + 7.57502834868645436472e+02, // 0x4087AC05CE49A0F7 + 7.39393205320467245656e+02, // 0x40871B2548D4C029 + 1.55949003336666123687e+02, // 0x40637E5E3C3ED8D4 + -4.95949898822628210127e+00, // 0xC013D686E71BE86B +} + +func qone(x float64) float64 { + var p, q [6]float64 + if x >= 8 { + p = q1R8 + q = q1S8 + } else if x >= 4.5454 { + p = q1R5 + q = q1S5 + } else if x >= 2.8571 { + p = q1R3 + q = q1S3 + } else if x >= 2 { + p = q1R2 + q = q1S2 + } + z := 1 / (x * x) + r := p[0] + z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))) + s := 1 + z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))) + return (0.375 + r/s) / x +} diff --git a/src/math/jn.go b/src/math/jn.go new file mode 100644 index 000000000..a7909eb24 --- /dev/null +++ b/src/math/jn.go @@ -0,0 +1,306 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Bessel function of the first and second kinds of order n. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_jn.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_jn(n, x), __ieee754_yn(n, x) +// floating point Bessel's function of the 1st and 2nd kind +// of order n +// +// Special cases: +// y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; +// y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. +// Note 2. About jn(n,x), yn(n,x) +// For n=0, j0(x) is called, +// for n=1, j1(x) is called, +// for n<x, forward recursion is used starting +// from values of j0(x) and j1(x). +// for n>x, a continued fraction approximation to +// j(n,x)/j(n-1,x) is evaluated and then backward +// recursion is used starting from a supposed value +// for j(n,x). The resulting value of j(0,x) is +// compared with the actual value to correct the +// supposed value of j(n,x). +// +// yn(n,x) is similar in all respects, except +// that forward recursion is used for all +// values of n>1. + +// Jn returns the order-n Bessel function of the first kind. +// +// Special cases are: +// Jn(n, ±Inf) = 0 +// Jn(n, NaN) = NaN +func Jn(n int, x float64) float64 { + const ( + TwoM29 = 1.0 / (1 << 29) // 2**-29 0x3e10000000000000 + Two302 = 1 << 302 // 2**302 0x52D0000000000000 + ) + // special cases + switch { + case IsNaN(x): + return x + case IsInf(x, 0): + return 0 + } + // J(-n, x) = (-1)**n * J(n, x), J(n, -x) = (-1)**n * J(n, x) + // Thus, J(-n, x) = J(n, -x) + + if n == 0 { + return J0(x) + } + if x == 0 { + return 0 + } + if n < 0 { + n, x = -n, -x + } + if n == 1 { + return J1(x) + } + sign := false + if x < 0 { + x = -x + if n&1 == 1 { + sign = true // odd n and negative x + } + } + var b float64 + if float64(n) <= x { + // Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) + if x >= Two302 { // x > 2**302 + + // (x >> n**2) + // Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Let s=sin(x), c=cos(x), + // xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + // + // n sin(xn)*sqt2 cos(xn)*sqt2 + // ---------------------------------- + // 0 s-c c+s + // 1 -s-c -c+s + // 2 -s+c -c-s + // 3 s+c c-s + + var temp float64 + switch n & 3 { + case 0: + temp = Cos(x) + Sin(x) + case 1: + temp = -Cos(x) + Sin(x) + case 2: + temp = -Cos(x) - Sin(x) + case 3: + temp = Cos(x) - Sin(x) + } + b = (1 / SqrtPi) * temp / Sqrt(x) + } else { + b = J1(x) + for i, a := 1, J0(x); i < n; i++ { + a, b = b, b*(float64(i+i)/x)-a // avoid underflow + } + } + } else { + if x < TwoM29 { // x < 2**-29 + // x is tiny, return the first Taylor expansion of J(n,x) + // J(n,x) = 1/n!*(x/2)**n - ... + + if n > 33 { // underflow + b = 0 + } else { + temp := x * 0.5 + b = temp + a := 1.0 + for i := 2; i <= n; i++ { + a *= float64(i) // a = n! + b *= temp // b = (x/2)**n + } + b /= a + } + } else { + // use backward recurrence + // x x**2 x**2 + // J(n,x)/J(n-1,x) = ---- ------ ------ ..... + // 2n - 2(n+1) - 2(n+2) + // + // 1 1 1 + // (for large x) = ---- ------ ------ ..... + // 2n 2(n+1) 2(n+2) + // -- - ------ - ------ - + // x x x + // + // Let w = 2n/x and h=2/x, then the above quotient + // is equal to the continued fraction: + // 1 + // = ----------------------- + // 1 + // w - ----------------- + // 1 + // w+h - --------- + // w+2h - ... + // + // To determine how many terms needed, let + // Q(0) = w, Q(1) = w(w+h) - 1, + // Q(k) = (w+k*h)*Q(k-1) - Q(k-2), + // When Q(k) > 1e4 good for single + // When Q(k) > 1e9 good for double + // When Q(k) > 1e17 good for quadruple + + // determine k + w := float64(n+n) / x + h := 2 / x + q0 := w + z := w + h + q1 := w*z - 1 + k := 1 + for q1 < 1e9 { + k += 1 + z += h + q0, q1 = q1, z*q1-q0 + } + m := n + n + t := 0.0 + for i := 2 * (n + k); i >= m; i -= 2 { + t = 1 / (float64(i)/x - t) + } + a := t + b = 1 + // estimate log((2/x)**n*n!) = n*log(2/x)+n*ln(n) + // Hence, if n*(log(2n/x)) > ... + // single 8.8722839355e+01 + // double 7.09782712893383973096e+02 + // long double 1.1356523406294143949491931077970765006170e+04 + // then recurrent value may overflow and the result is + // likely underflow to zero + + tmp := float64(n) + v := 2 / x + tmp = tmp * Log(Abs(v*tmp)) + if tmp < 7.09782712893383973096e+02 { + for i := n - 1; i > 0; i-- { + di := float64(i + i) + a, b = b, b*di/x-a + di -= 2 + } + } else { + for i := n - 1; i > 0; i-- { + di := float64(i + i) + a, b = b, b*di/x-a + di -= 2 + // scale b to avoid spurious overflow + if b > 1e100 { + a /= b + t /= b + b = 1 + } + } + } + b = t * J0(x) / b + } + } + if sign { + return -b + } + return b +} + +// Yn returns the order-n Bessel function of the second kind. +// +// Special cases are: +// Yn(n, +Inf) = 0 +// Yn(n > 0, 0) = -Inf +// Yn(n < 0, 0) = +Inf if n is odd, -Inf if n is even +// Y1(n, x < 0) = NaN +// Y1(n, NaN) = NaN +func Yn(n int, x float64) float64 { + const Two302 = 1 << 302 // 2**302 0x52D0000000000000 + // special cases + switch { + case x < 0 || IsNaN(x): + return NaN() + case IsInf(x, 1): + return 0 + } + + if n == 0 { + return Y0(x) + } + if x == 0 { + if n < 0 && n&1 == 1 { + return Inf(1) + } + return Inf(-1) + } + sign := false + if n < 0 { + n = -n + if n&1 == 1 { + sign = true // sign true if n < 0 && |n| odd + } + } + if n == 1 { + if sign { + return -Y1(x) + } + return Y1(x) + } + var b float64 + if x >= Two302 { // x > 2**302 + // (x >> n**2) + // Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) + // Let s=sin(x), c=cos(x), + // xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then + // + // n sin(xn)*sqt2 cos(xn)*sqt2 + // ---------------------------------- + // 0 s-c c+s + // 1 -s-c -c+s + // 2 -s+c -c-s + // 3 s+c c-s + + var temp float64 + switch n & 3 { + case 0: + temp = Sin(x) - Cos(x) + case 1: + temp = -Sin(x) - Cos(x) + case 2: + temp = -Sin(x) + Cos(x) + case 3: + temp = Sin(x) + Cos(x) + } + b = (1 / SqrtPi) * temp / Sqrt(x) + } else { + a := Y0(x) + b = Y1(x) + // quit if b is -inf + for i := 1; i < n && !IsInf(b, -1); i++ { + a, b = b, (float64(i+i)/x)*b-a + } + } + if sign { + return -b + } + return b +} diff --git a/src/math/ldexp.go b/src/math/ldexp.go new file mode 100644 index 000000000..b5d2a5e7e --- /dev/null +++ b/src/math/ldexp.go @@ -0,0 +1,45 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Ldexp is the inverse of Frexp. +// It returns frac × 2**exp. +// +// Special cases are: +// Ldexp(±0, exp) = ±0 +// Ldexp(±Inf, exp) = ±Inf +// Ldexp(NaN, exp) = NaN +func Ldexp(frac float64, exp int) float64 + +func ldexp(frac float64, exp int) float64 { + // special cases + switch { + case frac == 0: + return frac // correctly return -0 + case IsInf(frac, 0) || IsNaN(frac): + return frac + } + frac, e := normalize(frac) + exp += e + x := Float64bits(frac) + exp += int(x>>shift)&mask - bias + if exp < -1074 { + return Copysign(0, frac) // underflow + } + if exp > 1023 { // overflow + if frac < 0 { + return Inf(-1) + } + return Inf(1) + } + var m float64 = 1 + if exp < -1022 { // denormal + exp += 52 + m = 1.0 / (1 << 52) // 2**-52 + } + x &^= mask << shift + x |= uint64(exp+bias) << shift + return m * Float64frombits(x) +} diff --git a/src/math/ldexp_386.s b/src/math/ldexp_386.s new file mode 100644 index 000000000..ac8e8ba54 --- /dev/null +++ b/src/math/ldexp_386.s @@ -0,0 +1,14 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Ldexp(frac float64, exp int) float64 +TEXT ·Ldexp(SB),NOSPLIT,$0 + FMOVL exp+8(FP), F0 // F0=exp + FMOVD frac+0(FP), F0 // F0=frac, F1=e + FSCALE // F0=x*2**e, F1=e + FMOVDP F0, F1 // F0=x*2**e + FMOVDP F0, ret+12(FP) + RET diff --git a/src/math/ldexp_amd64.s b/src/math/ldexp_amd64.s new file mode 100644 index 000000000..6063a6480 --- /dev/null +++ b/src/math/ldexp_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Ldexp(SB),NOSPLIT,$0 + JMP ·ldexp(SB) diff --git a/src/math/ldexp_amd64p32.s b/src/math/ldexp_amd64p32.s new file mode 100644 index 000000000..9aa9d9da3 --- /dev/null +++ b/src/math/ldexp_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "ldexp_amd64.s" diff --git a/src/math/ldexp_arm.s b/src/math/ldexp_arm.s new file mode 100644 index 000000000..fcffa2e0f --- /dev/null +++ b/src/math/ldexp_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Ldexp(SB),NOSPLIT,$0 + B ·ldexp(SB) diff --git a/src/math/lgamma.go b/src/math/lgamma.go new file mode 100644 index 000000000..6a02c412d --- /dev/null +++ b/src/math/lgamma.go @@ -0,0 +1,365 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point logarithm of the Gamma function. +*/ + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_lgamma_r.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_lgamma_r(x, signgamp) +// Reentrant version of the logarithm of the Gamma function +// with user provided pointer for the sign of Gamma(x). +// +// Method: +// 1. Argument Reduction for 0 < x <= 8 +// Since gamma(1+s)=s*gamma(s), for x in [0,8], we may +// reduce x to a number in [1.5,2.5] by +// lgamma(1+s) = log(s) + lgamma(s) +// for example, +// lgamma(7.3) = log(6.3) + lgamma(6.3) +// = log(6.3*5.3) + lgamma(5.3) +// = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) +// 2. Polynomial approximation of lgamma around its +// minimum (ymin=1.461632144968362245) to maintain monotonicity. +// On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use +// Let z = x-ymin; +// lgamma(x) = -1.214862905358496078218 + z**2*poly(z) +// poly(z) is a 14 degree polynomial. +// 2. Rational approximation in the primary interval [2,3] +// We use the following approximation: +// s = x-2.0; +// lgamma(x) = 0.5*s + s*P(s)/Q(s) +// with accuracy +// |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 +// Our algorithms are based on the following observation +// +// zeta(2)-1 2 zeta(3)-1 3 +// lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... +// 2 3 +// +// where Euler = 0.5772156649... is the Euler constant, which +// is very close to 0.5. +// +// 3. For x>=8, we have +// lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... +// (better formula: +// lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) +// Let z = 1/x, then we approximation +// f(z) = lgamma(x) - (x-0.5)(log(x)-1) +// by +// 3 5 11 +// w = w0 + w1*z + w2*z + w3*z + ... + w6*z +// where +// |w - f(z)| < 2**-58.74 +// +// 4. For negative x, since (G is gamma function) +// -x*G(-x)*G(x) = pi/sin(pi*x), +// we have +// G(x) = pi/(sin(pi*x)*(-x)*G(-x)) +// since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0 +// Hence, for x<0, signgam = sign(sin(pi*x)) and +// lgamma(x) = log(|Gamma(x)|) +// = log(pi/(|x*sin(pi*x)|)) - lgamma(-x); +// Note: one should avoid computing pi*(-x) directly in the +// computation of sin(pi*(-x)). +// +// 5. Special Cases +// lgamma(2+s) ~ s*(1-Euler) for tiny s +// lgamma(1)=lgamma(2)=0 +// lgamma(x) ~ -log(x) for tiny x +// lgamma(0) = lgamma(inf) = inf +// lgamma(-integer) = +-inf +// +// + +var _lgamA = [...]float64{ + 7.72156649015328655494e-02, // 0x3FB3C467E37DB0C8 + 3.22467033424113591611e-01, // 0x3FD4A34CC4A60FAD + 6.73523010531292681824e-02, // 0x3FB13E001A5562A7 + 2.05808084325167332806e-02, // 0x3F951322AC92547B + 7.38555086081402883957e-03, // 0x3F7E404FB68FEFE8 + 2.89051383673415629091e-03, // 0x3F67ADD8CCB7926B + 1.19270763183362067845e-03, // 0x3F538A94116F3F5D + 5.10069792153511336608e-04, // 0x3F40B6C689B99C00 + 2.20862790713908385557e-04, // 0x3F2CF2ECED10E54D + 1.08011567247583939954e-04, // 0x3F1C5088987DFB07 + 2.52144565451257326939e-05, // 0x3EFA7074428CFA52 + 4.48640949618915160150e-05, // 0x3F07858E90A45837 +} +var _lgamR = [...]float64{ + 1.0, // placeholder + 1.39200533467621045958e+00, // 0x3FF645A762C4AB74 + 7.21935547567138069525e-01, // 0x3FE71A1893D3DCDC + 1.71933865632803078993e-01, // 0x3FC601EDCCFBDF27 + 1.86459191715652901344e-02, // 0x3F9317EA742ED475 + 7.77942496381893596434e-04, // 0x3F497DDACA41A95B + 7.32668430744625636189e-06, // 0x3EDEBAF7A5B38140 +} +var _lgamS = [...]float64{ + -7.72156649015328655494e-02, // 0xBFB3C467E37DB0C8 + 2.14982415960608852501e-01, // 0x3FCB848B36E20878 + 3.25778796408930981787e-01, // 0x3FD4D98F4F139F59 + 1.46350472652464452805e-01, // 0x3FC2BB9CBEE5F2F7 + 2.66422703033638609560e-02, // 0x3F9B481C7E939961 + 1.84028451407337715652e-03, // 0x3F5E26B67368F239 + 3.19475326584100867617e-05, // 0x3F00BFECDD17E945 +} +var _lgamT = [...]float64{ + 4.83836122723810047042e-01, // 0x3FDEF72BC8EE38A2 + -1.47587722994593911752e-01, // 0xBFC2E4278DC6C509 + 6.46249402391333854778e-02, // 0x3FB08B4294D5419B + -3.27885410759859649565e-02, // 0xBFA0C9A8DF35B713 + 1.79706750811820387126e-02, // 0x3F9266E7970AF9EC + -1.03142241298341437450e-02, // 0xBF851F9FBA91EC6A + 6.10053870246291332635e-03, // 0x3F78FCE0E370E344 + -3.68452016781138256760e-03, // 0xBF6E2EFFB3E914D7 + 2.25964780900612472250e-03, // 0x3F6282D32E15C915 + -1.40346469989232843813e-03, // 0xBF56FE8EBF2D1AF1 + 8.81081882437654011382e-04, // 0x3F4CDF0CEF61A8E9 + -5.38595305356740546715e-04, // 0xBF41A6109C73E0EC + 3.15632070903625950361e-04, // 0x3F34AF6D6C0EBBF7 + -3.12754168375120860518e-04, // 0xBF347F24ECC38C38 + 3.35529192635519073543e-04, // 0x3F35FD3EE8C2D3F4 +} +var _lgamU = [...]float64{ + -7.72156649015328655494e-02, // 0xBFB3C467E37DB0C8 + 6.32827064025093366517e-01, // 0x3FE4401E8B005DFF + 1.45492250137234768737e+00, // 0x3FF7475CD119BD6F + 9.77717527963372745603e-01, // 0x3FEF497644EA8450 + 2.28963728064692451092e-01, // 0x3FCD4EAEF6010924 + 1.33810918536787660377e-02, // 0x3F8B678BBF2BAB09 +} +var _lgamV = [...]float64{ + 1.0, + 2.45597793713041134822e+00, // 0x4003A5D7C2BD619C + 2.12848976379893395361e+00, // 0x40010725A42B18F5 + 7.69285150456672783825e-01, // 0x3FE89DFBE45050AF + 1.04222645593369134254e-01, // 0x3FBAAE55D6537C88 + 3.21709242282423911810e-03, // 0x3F6A5ABB57D0CF61 +} +var _lgamW = [...]float64{ + 4.18938533204672725052e-01, // 0x3FDACFE390C97D69 + 8.33333333333329678849e-02, // 0x3FB555555555553B + -2.77777777728775536470e-03, // 0xBF66C16C16B02E5C + 7.93650558643019558500e-04, // 0x3F4A019F98CF38B6 + -5.95187557450339963135e-04, // 0xBF4380CB8C0FE741 + 8.36339918996282139126e-04, // 0x3F4B67BA4CDAD5D1 + -1.63092934096575273989e-03, // 0xBF5AB89D0B9E43E4 +} + +// Lgamma returns the natural logarithm and sign (-1 or +1) of Gamma(x). +// +// Special cases are: +// Lgamma(+Inf) = +Inf +// Lgamma(0) = +Inf +// Lgamma(-integer) = +Inf +// Lgamma(-Inf) = -Inf +// Lgamma(NaN) = NaN +func Lgamma(x float64) (lgamma float64, sign int) { + const ( + Ymin = 1.461632144968362245 + Two52 = 1 << 52 // 0x4330000000000000 ~4.5036e+15 + Two53 = 1 << 53 // 0x4340000000000000 ~9.0072e+15 + Two58 = 1 << 58 // 0x4390000000000000 ~2.8823e+17 + Tiny = 1.0 / (1 << 70) // 0x3b90000000000000 ~8.47033e-22 + Tc = 1.46163214496836224576e+00 // 0x3FF762D86356BE3F + Tf = -1.21486290535849611461e-01 // 0xBFBF19B9BCC38A42 + // Tt = -(tail of Tf) + Tt = -3.63867699703950536541e-18 // 0xBC50C7CAA48A971F + ) + // special cases + sign = 1 + switch { + case IsNaN(x): + lgamma = x + return + case IsInf(x, 0): + lgamma = x + return + case x == 0: + lgamma = Inf(1) + return + } + + neg := false + if x < 0 { + x = -x + neg = true + } + + if x < Tiny { // if |x| < 2**-70, return -log(|x|) + if neg { + sign = -1 + } + lgamma = -Log(x) + return + } + var nadj float64 + if neg { + if x >= Two52 { // |x| >= 2**52, must be -integer + lgamma = Inf(1) + return + } + t := sinPi(x) + if t == 0 { + lgamma = Inf(1) // -integer + return + } + nadj = Log(Pi / Abs(t*x)) + if t < 0 { + sign = -1 + } + } + + switch { + case x == 1 || x == 2: // purge off 1 and 2 + lgamma = 0 + return + case x < 2: // use lgamma(x) = lgamma(x+1) - log(x) + var y float64 + var i int + if x <= 0.9 { + lgamma = -Log(x) + switch { + case x >= (Ymin - 1 + 0.27): // 0.7316 <= x <= 0.9 + y = 1 - x + i = 0 + case x >= (Ymin - 1 - 0.27): // 0.2316 <= x < 0.7316 + y = x - (Tc - 1) + i = 1 + default: // 0 < x < 0.2316 + y = x + i = 2 + } + } else { + lgamma = 0 + switch { + case x >= (Ymin + 0.27): // 1.7316 <= x < 2 + y = 2 - x + i = 0 + case x >= (Ymin - 0.27): // 1.2316 <= x < 1.7316 + y = x - Tc + i = 1 + default: // 0.9 < x < 1.2316 + y = x - 1 + i = 2 + } + } + switch i { + case 0: + z := y * y + p1 := _lgamA[0] + z*(_lgamA[2]+z*(_lgamA[4]+z*(_lgamA[6]+z*(_lgamA[8]+z*_lgamA[10])))) + p2 := z * (_lgamA[1] + z*(+_lgamA[3]+z*(_lgamA[5]+z*(_lgamA[7]+z*(_lgamA[9]+z*_lgamA[11]))))) + p := y*p1 + p2 + lgamma += (p - 0.5*y) + case 1: + z := y * y + w := z * y + p1 := _lgamT[0] + w*(_lgamT[3]+w*(_lgamT[6]+w*(_lgamT[9]+w*_lgamT[12]))) // parallel comp + p2 := _lgamT[1] + w*(_lgamT[4]+w*(_lgamT[7]+w*(_lgamT[10]+w*_lgamT[13]))) + p3 := _lgamT[2] + w*(_lgamT[5]+w*(_lgamT[8]+w*(_lgamT[11]+w*_lgamT[14]))) + p := z*p1 - (Tt - w*(p2+y*p3)) + lgamma += (Tf + p) + case 2: + p1 := y * (_lgamU[0] + y*(_lgamU[1]+y*(_lgamU[2]+y*(_lgamU[3]+y*(_lgamU[4]+y*_lgamU[5]))))) + p2 := 1 + y*(_lgamV[1]+y*(_lgamV[2]+y*(_lgamV[3]+y*(_lgamV[4]+y*_lgamV[5])))) + lgamma += (-0.5*y + p1/p2) + } + case x < 8: // 2 <= x < 8 + i := int(x) + y := x - float64(i) + p := y * (_lgamS[0] + y*(_lgamS[1]+y*(_lgamS[2]+y*(_lgamS[3]+y*(_lgamS[4]+y*(_lgamS[5]+y*_lgamS[6])))))) + q := 1 + y*(_lgamR[1]+y*(_lgamR[2]+y*(_lgamR[3]+y*(_lgamR[4]+y*(_lgamR[5]+y*_lgamR[6]))))) + lgamma = 0.5*y + p/q + z := 1.0 // Lgamma(1+s) = Log(s) + Lgamma(s) + switch i { + case 7: + z *= (y + 6) + fallthrough + case 6: + z *= (y + 5) + fallthrough + case 5: + z *= (y + 4) + fallthrough + case 4: + z *= (y + 3) + fallthrough + case 3: + z *= (y + 2) + lgamma += Log(z) + } + case x < Two58: // 8 <= x < 2**58 + t := Log(x) + z := 1 / x + y := z * z + w := _lgamW[0] + z*(_lgamW[1]+y*(_lgamW[2]+y*(_lgamW[3]+y*(_lgamW[4]+y*(_lgamW[5]+y*_lgamW[6]))))) + lgamma = (x-0.5)*(t-1) + w + default: // 2**58 <= x <= Inf + lgamma = x * (Log(x) - 1) + } + if neg { + lgamma = nadj - lgamma + } + return +} + +// sinPi(x) is a helper function for negative x +func sinPi(x float64) float64 { + const ( + Two52 = 1 << 52 // 0x4330000000000000 ~4.5036e+15 + Two53 = 1 << 53 // 0x4340000000000000 ~9.0072e+15 + ) + if x < 0.25 { + return -Sin(Pi * x) + } + + // argument reduction + z := Floor(x) + var n int + if z != x { // inexact + x = Mod(x, 2) + n = int(x * 4) + } else { + if x >= Two53 { // x must be even + x = 0 + n = 0 + } else { + if x < Two52 { + z = x + Two52 // exact + } + n = int(1 & Float64bits(z)) + x = float64(n) + n <<= 2 + } + } + switch n { + case 0: + x = Sin(Pi * x) + case 1, 2: + x = Cos(Pi * (0.5 - x)) + case 3, 4: + x = Sin(Pi * (1 - x)) + case 5, 6: + x = -Cos(Pi * (x - 1.5)) + default: + x = Sin(Pi * (x - 2)) + } + return -x +} diff --git a/src/math/log.go b/src/math/log.go new file mode 100644 index 000000000..818f00a73 --- /dev/null +++ b/src/math/log.go @@ -0,0 +1,123 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point logarithm. +*/ + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/e_log.c +// and came with this notice. The go code is a simpler +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_log(x) +// Return the logarithm of x +// +// Method : +// 1. Argument Reduction: find k and f such that +// x = 2**k * (1+f), +// where sqrt(2)/2 < 1+f < sqrt(2) . +// +// 2. Approximation of log(1+f). +// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) +// = 2s + 2/3 s**3 + 2/5 s**5 + ....., +// = 2s + s*R +// We use a special Reme algorithm on [0,0.1716] to generate +// a polynomial of degree 14 to approximate R. The maximum error +// of this polynomial approximation is bounded by 2**-58.45. In +// other words, +// 2 4 6 8 10 12 14 +// R(z) ~ L1*s +L2*s +L3*s +L4*s +L5*s +L6*s +L7*s +// (the values of L1 to L7 are listed in the program) and +// | 2 14 | -58.45 +// | L1*s +...+L7*s - R(z) | <= 2 +// | | +// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. +// In order to guarantee error in log below 1ulp, we compute log by +// log(1+f) = f - s*(f - R) (if f is not too large) +// log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) +// +// 3. Finally, log(x) = k*Ln2 + log(1+f). +// = k*Ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*Ln2_lo))) +// Here Ln2 is split into two floating point number: +// Ln2_hi + Ln2_lo, +// where n*Ln2_hi is always exact for |n| < 2000. +// +// Special cases: +// log(x) is NaN with signal if x < 0 (including -INF) ; +// log(+INF) is +INF; log(0) is -INF with signal; +// log(NaN) is that NaN with no signal. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. + +// Log returns the natural logarithm of x. +// +// Special cases are: +// Log(+Inf) = +Inf +// Log(0) = -Inf +// Log(x < 0) = NaN +// Log(NaN) = NaN +func Log(x float64) float64 + +func log(x float64) float64 { + const ( + Ln2Hi = 6.93147180369123816490e-01 /* 3fe62e42 fee00000 */ + Ln2Lo = 1.90821492927058770002e-10 /* 3dea39ef 35793c76 */ + L1 = 6.666666666666735130e-01 /* 3FE55555 55555593 */ + L2 = 3.999999999940941908e-01 /* 3FD99999 9997FA04 */ + L3 = 2.857142874366239149e-01 /* 3FD24924 94229359 */ + L4 = 2.222219843214978396e-01 /* 3FCC71C5 1D8E78AF */ + L5 = 1.818357216161805012e-01 /* 3FC74664 96CB03DE */ + L6 = 1.531383769920937332e-01 /* 3FC39A09 D078C69F */ + L7 = 1.479819860511658591e-01 /* 3FC2F112 DF3E5244 */ + ) + + // special cases + switch { + case IsNaN(x) || IsInf(x, 1): + return x + case x < 0: + return NaN() + case x == 0: + return Inf(-1) + } + + // reduce + f1, ki := Frexp(x) + if f1 < Sqrt2/2 { + f1 *= 2 + ki-- + } + f := f1 - 1 + k := float64(ki) + + // compute + s := f / (2 + f) + s2 := s * s + s4 := s2 * s2 + t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) + t2 := s4 * (L2 + s4*(L4+s4*L6)) + R := t1 + t2 + hfsq := 0.5 * f * f + return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f) +} diff --git a/src/math/log10.go b/src/math/log10.go new file mode 100644 index 000000000..95cfbf47c --- /dev/null +++ b/src/math/log10.go @@ -0,0 +1,22 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Log10 returns the decimal logarithm of x. +// The special cases are the same as for Log. +func Log10(x float64) float64 + +func log10(x float64) float64 { + return Log(x) * (1 / Ln10) +} + +// Log2 returns the binary logarithm of x. +// The special cases are the same as for Log. +func Log2(x float64) float64 + +func log2(x float64) float64 { + frac, exp := Frexp(x) + return Log(frac)*(1/Ln2) + float64(exp) +} diff --git a/src/math/log10_386.s b/src/math/log10_386.s new file mode 100644 index 000000000..2897f3c15 --- /dev/null +++ b/src/math/log10_386.s @@ -0,0 +1,21 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Log10(x float64) float64 +TEXT ·Log10(SB),NOSPLIT,$0 + FLDLG2 // F0=log10(2) + FMOVD x+0(FP), F0 // F0=x, F1=log10(2) + FYL2X // F0=log10(x)=log2(x)*log10(2) + FMOVDP F0, ret+8(FP) + RET + +// func Log2(x float64) float64 +TEXT ·Log2(SB),NOSPLIT,$0 + FLD1 // F0=1 + FMOVD x+0(FP), F0 // F0=x, F1=1 + FYL2X // F0=log2(x) + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/log10_amd64.s b/src/math/log10_amd64.s new file mode 100644 index 000000000..8382ba7ae --- /dev/null +++ b/src/math/log10_amd64.s @@ -0,0 +1,11 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Log10(SB),NOSPLIT,$0 + JMP ·log10(SB) + +TEXT ·Log2(SB),NOSPLIT,$0 + JMP ·log2(SB) diff --git a/src/math/log10_amd64p32.s b/src/math/log10_amd64p32.s new file mode 100644 index 000000000..bf43841e2 --- /dev/null +++ b/src/math/log10_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "log10_amd64.s" diff --git a/src/math/log10_arm.s b/src/math/log10_arm.s new file mode 100644 index 000000000..dbcb8351c --- /dev/null +++ b/src/math/log10_arm.s @@ -0,0 +1,11 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Log10(SB),NOSPLIT,$0 + B ·log10(SB) + +TEXT ·Log2(SB),NOSPLIT,$0 + B ·log2(SB) diff --git a/src/math/log1p.go b/src/math/log1p.go new file mode 100644 index 000000000..12b98684c --- /dev/null +++ b/src/math/log1p.go @@ -0,0 +1,200 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below are from FreeBSD's /usr/src/lib/msun/src/s_log1p.c +// and came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// +// double log1p(double x) +// +// Method : +// 1. Argument Reduction: find k and f such that +// 1+x = 2**k * (1+f), +// where sqrt(2)/2 < 1+f < sqrt(2) . +// +// Note. If k=0, then f=x is exact. However, if k!=0, then f +// may not be representable exactly. In that case, a correction +// term is need. Let u=1+x rounded. Let c = (1+x)-u, then +// log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), +// and add back the correction term c/u. +// (Note: when x > 2**53, one can simply return log(x)) +// +// 2. Approximation of log1p(f). +// Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) +// = 2s + 2/3 s**3 + 2/5 s**5 + ....., +// = 2s + s*R +// We use a special Reme algorithm on [0,0.1716] to generate +// a polynomial of degree 14 to approximate R The maximum error +// of this polynomial approximation is bounded by 2**-58.45. In +// other words, +// 2 4 6 8 10 12 14 +// R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s +// (the values of Lp1 to Lp7 are listed in the program) +// and +// | 2 14 | -58.45 +// | Lp1*s +...+Lp7*s - R(z) | <= 2 +// | | +// Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. +// In order to guarantee error in log below 1ulp, we compute log +// by +// log1p(f) = f - (hfsq - s*(hfsq+R)). +// +// 3. Finally, log1p(x) = k*ln2 + log1p(f). +// = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) +// Here ln2 is split into two floating point number: +// ln2_hi + ln2_lo, +// where n*ln2_hi is always exact for |n| < 2000. +// +// Special cases: +// log1p(x) is NaN with signal if x < -1 (including -INF) ; +// log1p(+INF) is +INF; log1p(-1) is -INF with signal; +// log1p(NaN) is that NaN with no signal. +// +// Accuracy: +// according to an error analysis, the error is always less than +// 1 ulp (unit in the last place). +// +// Constants: +// The hexadecimal values are the intended ones for the following +// constants. The decimal values may be used, provided that the +// compiler will convert from decimal to binary accurately enough +// to produce the hexadecimal values shown. +// +// Note: Assuming log() return accurate answer, the following +// algorithm can be used to compute log1p(x) to within a few ULP: +// +// u = 1+x; +// if(u==1.0) return x ; else +// return log(u)*(x/(u-1.0)); +// +// See HP-15C Advanced Functions Handbook, p.193. + +// Log1p returns the natural logarithm of 1 plus its argument x. +// It is more accurate than Log(1 + x) when x is near zero. +// +// Special cases are: +// Log1p(+Inf) = +Inf +// Log1p(±0) = ±0 +// Log1p(-1) = -Inf +// Log1p(x < -1) = NaN +// Log1p(NaN) = NaN +func Log1p(x float64) float64 + +func log1p(x float64) float64 { + const ( + Sqrt2M1 = 4.142135623730950488017e-01 // Sqrt(2)-1 = 0x3fda827999fcef34 + Sqrt2HalfM1 = -2.928932188134524755992e-01 // Sqrt(2)/2-1 = 0xbfd2bec333018866 + Small = 1.0 / (1 << 29) // 2**-29 = 0x3e20000000000000 + Tiny = 1.0 / (1 << 54) // 2**-54 + Two53 = 1 << 53 // 2**53 + Ln2Hi = 6.93147180369123816490e-01 // 3fe62e42fee00000 + Ln2Lo = 1.90821492927058770002e-10 // 3dea39ef35793c76 + Lp1 = 6.666666666666735130e-01 // 3FE5555555555593 + Lp2 = 3.999999999940941908e-01 // 3FD999999997FA04 + Lp3 = 2.857142874366239149e-01 // 3FD2492494229359 + Lp4 = 2.222219843214978396e-01 // 3FCC71C51D8E78AF + Lp5 = 1.818357216161805012e-01 // 3FC7466496CB03DE + Lp6 = 1.531383769920937332e-01 // 3FC39A09D078C69F + Lp7 = 1.479819860511658591e-01 // 3FC2F112DF3E5244 + ) + + // special cases + switch { + case x < -1 || IsNaN(x): // includes -Inf + return NaN() + case x == -1: + return Inf(-1) + case IsInf(x, 1): + return Inf(1) + } + + absx := x + if absx < 0 { + absx = -absx + } + + var f float64 + var iu uint64 + k := 1 + if absx < Sqrt2M1 { // |x| < Sqrt(2)-1 + if absx < Small { // |x| < 2**-29 + if absx < Tiny { // |x| < 2**-54 + return x + } + return x - x*x*0.5 + } + if x > Sqrt2HalfM1 { // Sqrt(2)/2-1 < x + // (Sqrt(2)/2-1) < x < (Sqrt(2)-1) + k = 0 + f = x + iu = 1 + } + } + var c float64 + if k != 0 { + var u float64 + if absx < Two53 { // 1<<53 + u = 1.0 + x + iu = Float64bits(u) + k = int((iu >> 52) - 1023) + if k > 0 { + c = 1.0 - (u - x) + } else { + c = x - (u - 1.0) // correction term + c /= u + } + } else { + u = x + iu = Float64bits(u) + k = int((iu >> 52) - 1023) + c = 0 + } + iu &= 0x000fffffffffffff + if iu < 0x0006a09e667f3bcd { // mantissa of Sqrt(2) + u = Float64frombits(iu | 0x3ff0000000000000) // normalize u + } else { + k += 1 + u = Float64frombits(iu | 0x3fe0000000000000) // normalize u/2 + iu = (0x0010000000000000 - iu) >> 2 + } + f = u - 1.0 // Sqrt(2)/2 < u < Sqrt(2) + } + hfsq := 0.5 * f * f + var s, R, z float64 + if iu == 0 { // |f| < 2**-20 + if f == 0 { + if k == 0 { + return 0 + } else { + c += float64(k) * Ln2Lo + return float64(k)*Ln2Hi + c + } + } + R = hfsq * (1.0 - 0.66666666666666666*f) // avoid division + if k == 0 { + return f - R + } + return float64(k)*Ln2Hi - ((R - (float64(k)*Ln2Lo + c)) - f) + } + s = f / (2.0 + f) + z = s * s + R = z * (Lp1 + z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7)))))) + if k == 0 { + return f - (hfsq - s*(hfsq+R)) + } + return float64(k)*Ln2Hi - ((hfsq - (s*(hfsq+R) + (float64(k)*Ln2Lo + c))) - f) +} diff --git a/src/math/log1p_386.s b/src/math/log1p_386.s new file mode 100644 index 000000000..1c2d683a8 --- /dev/null +++ b/src/math/log1p_386.s @@ -0,0 +1,27 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Log1p(x float64) float64 +TEXT ·Log1p(SB),NOSPLIT,$0 + FMOVD $(2.928932188134524e-01), F0 + FMOVD x+0(FP), F0 // F0=x, F1=1-sqrt(2)/2 = 0.29289321881345247559915564 + FABS // F0=|x|, F1=1-sqrt(2)/2 + FUCOMPP F0, F1 // compare F0 to F1 + FSTSW AX + FLDLN2 // F0=log(2) + ANDW $0x0100, AX + JEQ use_fyl2x // jump if F0 >= F1 + FMOVD x+0(FP), F0 // F0=x, F1=log(2) + FYL2XP1 // F0=log(1+x)=log2(1+x)*log(2) + FMOVDP F0, ret+8(FP) + RET +use_fyl2x: + FLD1 // F0=1, F2=log(2) + FADDD x+0(FP), F0 // F0=1+x, F1=log(2) + FYL2X // F0=log(1+x)=log2(1+x)*log(2) + FMOVDP F0, ret+8(FP) + RET + diff --git a/src/math/log1p_amd64.s b/src/math/log1p_amd64.s new file mode 100644 index 000000000..1e58fb110 --- /dev/null +++ b/src/math/log1p_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Log1p(SB),NOSPLIT,$0 + JMP ·log1p(SB) diff --git a/src/math/log1p_amd64p32.s b/src/math/log1p_amd64p32.s new file mode 100644 index 000000000..a14b5e38a --- /dev/null +++ b/src/math/log1p_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "log1p_amd64.s" diff --git a/src/math/log1p_arm.s b/src/math/log1p_arm.s new file mode 100644 index 000000000..95d549678 --- /dev/null +++ b/src/math/log1p_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Log1p(SB),NOSPLIT,$0 + B ·log1p(SB) diff --git a/src/math/log_386.s b/src/math/log_386.s new file mode 100644 index 000000000..ff998afb4 --- /dev/null +++ b/src/math/log_386.s @@ -0,0 +1,13 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Log(x float64) float64 +TEXT ·Log(SB),NOSPLIT,$0 + FLDLN2 // F0=log(2) + FMOVD x+0(FP), F0 // F0=x, F1=log(2) + FYL2X // F0=log(x)=log2(x)*log(2) + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/log_amd64.s b/src/math/log_amd64.s new file mode 100644 index 000000000..84c60ab4d --- /dev/null +++ b/src/math/log_amd64.s @@ -0,0 +1,111 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +#define HSqrt2 7.07106781186547524401e-01 // sqrt(2)/2 +#define Ln2Hi 6.93147180369123816490e-01 // 0x3fe62e42fee00000 +#define Ln2Lo 1.90821492927058770002e-10 // 0x3dea39ef35793c76 +#define L1 6.666666666666735130e-01 // 0x3FE5555555555593 +#define L2 3.999999999940941908e-01 // 0x3FD999999997FA04 +#define L3 2.857142874366239149e-01 // 0x3FD2492494229359 +#define L4 2.222219843214978396e-01 // 0x3FCC71C51D8E78AF +#define L5 1.818357216161805012e-01 // 0x3FC7466496CB03DE +#define L6 1.531383769920937332e-01 // 0x3FC39A09D078C69F +#define L7 1.479819860511658591e-01 // 0x3FC2F112DF3E5244 +#define NaN 0x7FF8000000000001 +#define NegInf 0xFFF0000000000000 +#define PosInf 0x7FF0000000000000 + +// func Log(x float64) float64 +TEXT ·Log(SB),NOSPLIT,$0 + // test bits for special cases + MOVQ x+0(FP), BX + MOVQ $~(1<<63), AX // sign bit mask + ANDQ BX, AX + JEQ isZero + MOVQ $0, AX + CMPQ AX, BX + JGT isNegative + MOVQ $PosInf, AX + CMPQ AX, BX + JLE isInfOrNaN + // f1, ki := math.Frexp(x); k := float64(ki) + MOVQ BX, X0 + MOVQ $0x000FFFFFFFFFFFFF, AX + MOVQ AX, X2 + ANDPD X0, X2 + MOVSD $0.5, X0 // 0x3FE0000000000000 + ORPD X0, X2 // X2= f1 + SHRQ $52, BX + ANDL $0x7FF, BX + SUBL $0x3FE, BX + CVTSL2SD BX, X1 // x1= k, x2= f1 + // if f1 < math.Sqrt2/2 { k -= 1; f1 *= 2 } + MOVSD $HSqrt2, X0 // x0= 0.7071, x1= k, x2= f1 + CMPSD X2, X0, 5 // cmpnlt; x0= 0 or ^0, x1= k, x2 = f1 + MOVSD $1.0, X3 // x0= 0 or ^0, x1= k, x2 = f1, x3= 1 + ANDPD X0, X3 // x0= 0 or ^0, x1= k, x2 = f1, x3= 0 or 1 + SUBSD X3, X1 // x0= 0 or ^0, x1= k, x2 = f1, x3= 0 or 1 + MOVSD $1.0, X0 // x0= 1, x1= k, x2= f1, x3= 0 or 1 + ADDSD X0, X3 // x0= 1, x1= k, x2= f1, x3= 1 or 2 + MULSD X3, X2 // x0= 1, x1= k, x2= f1 + // f := f1 - 1 + SUBSD X0, X2 // x1= k, x2= f + // s := f / (2 + f) + MOVSD $2.0, X0 + ADDSD X2, X0 + MOVAPD X2, X3 + DIVSD X0, X3 // x1=k, x2= f, x3= s + // s2 := s * s + MOVAPD X3, X4 // x1= k, x2= f, x3= s + MULSD X4, X4 // x1= k, x2= f, x3= s, x4= s2 + // s4 := s2 * s2 + MOVAPD X4, X5 // x1= k, x2= f, x3= s, x4= s2 + MULSD X5, X5 // x1= k, x2= f, x3= s, x4= s2, x5= s4 + // t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) + MOVSD $L7, X6 + MULSD X5, X6 + ADDSD $L5, X6 + MULSD X5, X6 + ADDSD $L3, X6 + MULSD X5, X6 + ADDSD $L1, X6 + MULSD X6, X4 // x1= k, x2= f, x3= s, x4= t1, x5= s4 + // t2 := s4 * (L2 + s4*(L4+s4*L6)) + MOVSD $L6, X6 + MULSD X5, X6 + ADDSD $L4, X6 + MULSD X5, X6 + ADDSD $L2, X6 + MULSD X6, X5 // x1= k, x2= f, x3= s, x4= t1, x5= t2 + // R := t1 + t2 + ADDSD X5, X4 // x1= k, x2= f, x3= s, x4= R + // hfsq := 0.5 * f * f + MOVSD $0.5, X0 + MULSD X2, X0 + MULSD X2, X0 // x0= hfsq, x1= k, x2= f, x3= s, x4= R + // return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f) + ADDSD X0, X4 // x0= hfsq, x1= k, x2= f, x3= s, x4= hfsq+R + MULSD X4, X3 // x0= hfsq, x1= k, x2= f, x3= s*(hfsq+R) + MOVSD $Ln2Lo, X4 + MULSD X1, X4 // x4= k*Ln2Lo + ADDSD X4, X3 // x0= hfsq, x1= k, x2= f, x3= s*(hfsq+R)+k*Ln2Lo + SUBSD X3, X0 // x0= hfsq-(s*(hfsq+R)+k*Ln2Lo), x1= k, x2= f + SUBSD X2, X0 // x0= (hfsq-(s*(hfsq+R)+k*Ln2Lo))-f, x1= k + MULSD $Ln2Hi, X1 // x0= (hfsq-(s*(hfsq+R)+k*Ln2Lo))-f, x1= k*Ln2Hi + SUBSD X0, X1 // x1= k*Ln2Hi-((hfsq-(s*(hfsq+R)+k*Ln2Lo))-f) + MOVSD X1, ret+8(FP) + RET +isInfOrNaN: + MOVQ BX, ret+8(FP) // +Inf or NaN, return x + RET +isNegative: + MOVQ $NaN, AX + MOVQ AX, ret+8(FP) // return NaN + RET +isZero: + MOVQ $NegInf, AX + MOVQ AX, ret+8(FP) // return -Inf + RET diff --git a/src/math/log_amd64p32.s b/src/math/log_amd64p32.s new file mode 100644 index 000000000..5058d607e --- /dev/null +++ b/src/math/log_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "log_amd64.s" diff --git a/src/math/log_arm.s b/src/math/log_arm.s new file mode 100644 index 000000000..e21d0366e --- /dev/null +++ b/src/math/log_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Log(SB),NOSPLIT,$0 + B ·log(SB) diff --git a/src/math/logb.go b/src/math/logb.go new file mode 100644 index 000000000..f2769d4fd --- /dev/null +++ b/src/math/logb.go @@ -0,0 +1,50 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Logb returns the binary exponent of x. +// +// Special cases are: +// Logb(±Inf) = +Inf +// Logb(0) = -Inf +// Logb(NaN) = NaN +func Logb(x float64) float64 { + // special cases + switch { + case x == 0: + return Inf(-1) + case IsInf(x, 0): + return Inf(1) + case IsNaN(x): + return x + } + return float64(ilogb(x)) +} + +// Ilogb returns the binary exponent of x as an integer. +// +// Special cases are: +// Ilogb(±Inf) = MaxInt32 +// Ilogb(0) = MinInt32 +// Ilogb(NaN) = MaxInt32 +func Ilogb(x float64) int { + // special cases + switch { + case x == 0: + return MinInt32 + case IsNaN(x): + return MaxInt32 + case IsInf(x, 0): + return MaxInt32 + } + return ilogb(x) +} + +// logb returns the binary exponent of x. It assumes x is finite and +// non-zero. +func ilogb(x float64) int { + x, exp := normalize(x) + return int((Float64bits(x)>>shift)&mask) - bias + exp +} diff --git a/src/math/mod.go b/src/math/mod.go new file mode 100644 index 000000000..e1a414e5f --- /dev/null +++ b/src/math/mod.go @@ -0,0 +1,50 @@ +// Copyright 2009-2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point mod function. +*/ + +// Mod returns the floating-point remainder of x/y. +// The magnitude of the result is less than y and its +// sign agrees with that of x. +// +// Special cases are: +// Mod(±Inf, y) = NaN +// Mod(NaN, y) = NaN +// Mod(x, 0) = NaN +// Mod(x, ±Inf) = x +// Mod(x, NaN) = NaN +func Mod(x, y float64) float64 + +func mod(x, y float64) float64 { + if y == 0 || IsInf(x, 0) || IsNaN(x) || IsNaN(y) { + return NaN() + } + if y < 0 { + y = -y + } + + yfr, yexp := Frexp(y) + sign := false + r := x + if x < 0 { + r = -x + sign = true + } + + for r >= y { + rfr, rexp := Frexp(r) + if rfr < yfr { + rexp = rexp - 1 + } + r = r - Ldexp(y, rexp-yexp) + } + if sign { + r = -r + } + return r +} diff --git a/src/math/mod_386.s b/src/math/mod_386.s new file mode 100644 index 000000000..10ad98be3 --- /dev/null +++ b/src/math/mod_386.s @@ -0,0 +1,17 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Mod(x, y float64) float64 +TEXT ·Mod(SB),NOSPLIT,$0 + FMOVD y+8(FP), F0 // F0=y + FMOVD x+0(FP), F0 // F0=x, F1=y + FPREM // F0=reduced_x, F1=y + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE -3(PC) // jump if reduction incomplete + FMOVDP F0, F1 // F0=x-q*y + FMOVDP F0, ret+16(FP) + RET diff --git a/src/math/mod_amd64.s b/src/math/mod_amd64.s new file mode 100644 index 000000000..f99dbe293 --- /dev/null +++ b/src/math/mod_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Mod(SB),NOSPLIT,$0 + JMP ·mod(SB) diff --git a/src/math/mod_amd64p32.s b/src/math/mod_amd64p32.s new file mode 100644 index 000000000..c1b231124 --- /dev/null +++ b/src/math/mod_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "mod_amd64.s" diff --git a/src/math/mod_arm.s b/src/math/mod_arm.s new file mode 100644 index 000000000..5afb3594d --- /dev/null +++ b/src/math/mod_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Mod(SB),NOSPLIT,$0 + B ·mod(SB) diff --git a/src/math/modf.go b/src/math/modf.go new file mode 100644 index 000000000..1e8376a93 --- /dev/null +++ b/src/math/modf.go @@ -0,0 +1,34 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Modf returns integer and fractional floating-point numbers +// that sum to f. Both values have the same sign as f. +// +// Special cases are: +// Modf(±Inf) = ±Inf, NaN +// Modf(NaN) = NaN, NaN +func Modf(f float64) (int float64, frac float64) + +func modf(f float64) (int float64, frac float64) { + if f < 1 { + if f < 0 { + int, frac = Modf(-f) + return -int, -frac + } + return 0, f + } + + x := Float64bits(f) + e := uint(x>>shift)&mask - bias + + // Keep the top 12+e bits, the integer part; clear the rest. + if e < 64-12 { + x &^= 1<<(64-12-e) - 1 + } + int = Float64frombits(x) + frac = f - int + return +} diff --git a/src/math/modf_386.s b/src/math/modf_386.s new file mode 100644 index 000000000..3debd3b95 --- /dev/null +++ b/src/math/modf_386.s @@ -0,0 +1,21 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Modf(f float64) (int float64, frac float64) +TEXT ·Modf(SB),NOSPLIT,$0 + FMOVD f+0(FP), F0 // F0=f + FMOVD F0, F1 // F0=f, F1=f + FSTCW -2(SP) // save old Control Word + MOVW -2(SP), AX + ORW $0x0c00, AX // Rounding Control set to truncate + MOVW AX, -4(SP) // store new Control Word + FLDCW -4(SP) // load new Control Word + FRNDINT // F0=trunc(f), F1=f + FLDCW -2(SP) // load old Control Word + FSUBD F0, F1 // F0=trunc(f), F1=f-trunc(f) + FMOVDP F0, int+8(FP) // F0=f-trunc(f) + FMOVDP F0, frac+16(FP) + RET diff --git a/src/math/modf_amd64.s b/src/math/modf_amd64.s new file mode 100644 index 000000000..701cf72a3 --- /dev/null +++ b/src/math/modf_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Modf(SB),NOSPLIT,$0 + JMP ·modf(SB) diff --git a/src/math/modf_amd64p32.s b/src/math/modf_amd64p32.s new file mode 100644 index 000000000..5508c2547 --- /dev/null +++ b/src/math/modf_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "modf_amd64.s" diff --git a/src/math/modf_arm.s b/src/math/modf_arm.s new file mode 100644 index 000000000..ea3c8dc74 --- /dev/null +++ b/src/math/modf_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Modf(SB),NOSPLIT,$0 + B ·modf(SB) diff --git a/src/math/nextafter.go b/src/math/nextafter.go new file mode 100644 index 000000000..bbb139986 --- /dev/null +++ b/src/math/nextafter.go @@ -0,0 +1,47 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Nextafter32 returns the next representable float32 value after x towards y. +// Special cases: +// Nextafter32(x, x) = x +// Nextafter32(NaN, y) = NaN +// Nextafter32(x, NaN) = NaN +func Nextafter32(x, y float32) (r float32) { + switch { + case IsNaN(float64(x)) || IsNaN(float64(y)): // special case + r = float32(NaN()) + case x == y: + r = x + case x == 0: + r = float32(Copysign(float64(Float32frombits(1)), float64(y))) + case (y > x) == (x > 0): + r = Float32frombits(Float32bits(x) + 1) + default: + r = Float32frombits(Float32bits(x) - 1) + } + return +} + +// Nextafter returns the next representable float64 value after x towards y. +// Special cases: +// Nextafter64(x, x) = x +// Nextafter64(NaN, y) = NaN +// Nextafter64(x, NaN) = NaN +func Nextafter(x, y float64) (r float64) { + switch { + case IsNaN(x) || IsNaN(y): // special case + r = NaN() + case x == y: + r = x + case x == 0: + r = Copysign(Float64frombits(1), y) + case (y > x) == (x > 0): + r = Float64frombits(Float64bits(x) + 1) + default: + r = Float64frombits(Float64bits(x) - 1) + } + return +} diff --git a/src/math/pow.go b/src/math/pow.go new file mode 100644 index 000000000..77af25648 --- /dev/null +++ b/src/math/pow.go @@ -0,0 +1,137 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +func isOddInt(x float64) bool { + xi, xf := Modf(x) + return xf == 0 && int64(xi)&1 == 1 +} + +// Special cases taken from FreeBSD's /usr/src/lib/msun/src/e_pow.c +// updated by IEEE Std. 754-2008 "Section 9.2.1 Special values". + +// Pow returns x**y, the base-x exponential of y. +// +// Special cases are (in order): +// Pow(x, ±0) = 1 for any x +// Pow(1, y) = 1 for any y +// Pow(x, 1) = x for any x +// Pow(NaN, y) = NaN +// Pow(x, NaN) = NaN +// Pow(±0, y) = ±Inf for y an odd integer < 0 +// Pow(±0, -Inf) = +Inf +// Pow(±0, +Inf) = +0 +// Pow(±0, y) = +Inf for finite y < 0 and not an odd integer +// Pow(±0, y) = ±0 for y an odd integer > 0 +// Pow(±0, y) = +0 for finite y > 0 and not an odd integer +// Pow(-1, ±Inf) = 1 +// Pow(x, +Inf) = +Inf for |x| > 1 +// Pow(x, -Inf) = +0 for |x| > 1 +// Pow(x, +Inf) = +0 for |x| < 1 +// Pow(x, -Inf) = +Inf for |x| < 1 +// Pow(+Inf, y) = +Inf for y > 0 +// Pow(+Inf, y) = +0 for y < 0 +// Pow(-Inf, y) = Pow(-0, -y) +// Pow(x, y) = NaN for finite x < 0 and finite non-integer y +func Pow(x, y float64) float64 { + switch { + case y == 0 || x == 1: + return 1 + case y == 1: + return x + case y == 0.5: + return Sqrt(x) + case y == -0.5: + return 1 / Sqrt(x) + case IsNaN(x) || IsNaN(y): + return NaN() + case x == 0: + switch { + case y < 0: + if isOddInt(y) { + return Copysign(Inf(1), x) + } + return Inf(1) + case y > 0: + if isOddInt(y) { + return x + } + return 0 + } + case IsInf(y, 0): + switch { + case x == -1: + return 1 + case (Abs(x) < 1) == IsInf(y, 1): + return 0 + default: + return Inf(1) + } + case IsInf(x, 0): + if IsInf(x, -1) { + return Pow(1/x, -y) // Pow(-0, -y) + } + switch { + case y < 0: + return 0 + case y > 0: + return Inf(1) + } + } + + absy := y + flip := false + if absy < 0 { + absy = -absy + flip = true + } + yi, yf := Modf(absy) + if yf != 0 && x < 0 { + return NaN() + } + if yi >= 1<<63 { + return Exp(y * Log(x)) + } + + // ans = a1 * 2**ae (= 1 for now). + a1 := 1.0 + ae := 0 + + // ans *= x**yf + if yf != 0 { + if yf > 0.5 { + yf-- + yi++ + } + a1 = Exp(yf * Log(x)) + } + + // ans *= x**yi + // by multiplying in successive squarings + // of x according to bits of yi. + // accumulate powers of two into exp. + x1, xe := Frexp(x) + for i := int64(yi); i != 0; i >>= 1 { + if i&1 == 1 { + a1 *= x1 + ae += xe + } + x1 *= x1 + xe <<= 1 + if x1 < .5 { + x1 += x1 + xe-- + } + } + + // ans = a1*2**ae + // if flip { ans = 1 / ans } + // but in the opposite order + if flip { + a1 = 1 / a1 + ae = -ae + } + return Ldexp(a1, ae) +} diff --git a/src/math/pow10.go b/src/math/pow10.go new file mode 100644 index 000000000..f5ad28bb4 --- /dev/null +++ b/src/math/pow10.go @@ -0,0 +1,40 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// This table might overflow 127-bit exponent representations. +// In that case, truncate it after 1.0e38. +var pow10tab [70]float64 + +// Pow10 returns 10**e, the base-10 exponential of e. +// +// Special cases are: +// Pow10(e) = +Inf for e > 309 +// Pow10(e) = 0 for e < -324 +func Pow10(e int) float64 { + if e <= -325 { + return 0 + } else if e > 309 { + return Inf(1) + } + + if e < 0 { + return 1 / Pow10(-e) + } + if e < len(pow10tab) { + return pow10tab[e] + } + m := e / 2 + return Pow10(m) * Pow10(e-m) +} + +func init() { + pow10tab[0] = 1.0e0 + pow10tab[1] = 1.0e1 + for i := 2; i < len(pow10tab); i++ { + m := i / 2 + pow10tab[i] = pow10tab[m] * pow10tab[i-m] + } +} diff --git a/src/math/rand/example_test.go b/src/math/rand/example_test.go new file mode 100644 index 000000000..f42991453 --- /dev/null +++ b/src/math/rand/example_test.go @@ -0,0 +1,97 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand_test + +import ( + "fmt" + "math/rand" + "os" + "text/tabwriter" +) + +// These tests serve as an example but also make sure we don't change +// the output of the random number generator when given a fixed seed. + +func Example() { + rand.Seed(42) // Try changing this number! + answers := []string{ + "It is certain", + "It is decidedly so", + "Without a doubt", + "Yes definitely", + "You may rely on it", + "As I see it yes", + "Most likely", + "Outlook good", + "Yes", + "Signs point to yes", + "Reply hazy try again", + "Ask again later", + "Better not tell you now", + "Cannot predict now", + "Concentrate and ask again", + "Don't count on it", + "My reply is no", + "My sources say no", + "Outlook not so good", + "Very doubtful", + } + fmt.Println("Magic 8-Ball says:", answers[rand.Intn(len(answers))]) + // Output: Magic 8-Ball says: As I see it yes +} + +// This example shows the use of each of the methods on a *Rand. +// The use of the global functions is the same, without the receiver. +func Example_rand() { + // Create and seed the generator. + // Typically a non-fixed seed should be used, such as time.Now().UnixNano(). + // Using a fixed seed will produce the same output on every run. + r := rand.New(rand.NewSource(99)) + + // The tabwriter here helps us generate aligned output. + w := tabwriter.NewWriter(os.Stdout, 1, 1, 1, ' ', 0) + defer w.Flush() + show := func(name string, v1, v2, v3 interface{}) { + fmt.Fprintf(w, "%s\t%v\t%v\t%v\n", name, v1, v2, v3) + } + + // Float32 and Float64 values are in [0, 1). + show("Float32", r.Float32(), r.Float32(), r.Float32()) + show("Float64", r.Float64(), r.Float64(), r.Float64()) + + // ExpFloat64 values have an average of 1 but decay exponentially. + show("ExpFloat64", r.ExpFloat64(), r.ExpFloat64(), r.ExpFloat64()) + + // NormFloat64 values have an average of 0 and a standard deviation of 1. + show("NormFloat64", r.NormFloat64(), r.NormFloat64(), r.NormFloat64()) + + // Int31, Int63, and Uint32 generate values of the given width. + // The Int method (not shown) is like either Int31 or Int63 + // depending on the size of 'int'. + show("Int31", r.Int31(), r.Int31(), r.Int31()) + show("Int63", r.Int63(), r.Int63(), r.Int63()) + show("Uint32", r.Int63(), r.Int63(), r.Int63()) + + // Intn, Int31n, and Int63n limit their output to be < n. + // They do so more carefully than using r.Int()%n. + show("Intn(10)", r.Intn(10), r.Intn(10), r.Intn(10)) + show("Int31n(10)", r.Int31n(10), r.Int31n(10), r.Int31n(10)) + show("Int63n(10)", r.Int63n(10), r.Int63n(10), r.Int63n(10)) + + // Perm generates a random permutation of the numbers [0, n). + show("Perm", r.Perm(5), r.Perm(5), r.Perm(5)) + // Output: + // Float32 0.2635776 0.6358173 0.6718283 + // Float64 0.628605430454327 0.4504798828572669 0.9562755949377957 + // ExpFloat64 0.3362240648200941 1.4256072328483647 0.24354758816173044 + // NormFloat64 0.17233959114940064 1.577014951434847 0.04259129641113857 + // Int31 1501292890 1486668269 182840835 + // Int63 3546343826724305832 5724354148158589552 5239846799706671610 + // Uint32 5927547564735367388 637072299495207830 4128311955958246186 + // Intn(10) 1 2 5 + // Int31n(10) 4 7 8 + // Int63n(10) 7 6 3 + // Perm [1 4 2 3 0] [4 2 1 3 0] [1 2 4 0 3] +} diff --git a/src/math/rand/exp.go b/src/math/rand/exp.go new file mode 100644 index 000000000..4bc110f91 --- /dev/null +++ b/src/math/rand/exp.go @@ -0,0 +1,222 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +import ( + "math" +) + +/* + * Exponential distribution + * + * See "The Ziggurat Method for Generating Random Variables" + * (Marsaglia & Tsang, 2000) + * http://www.jstatsoft.org/v05/i08/paper [pdf] + */ + +const ( + re = 7.69711747013104972 +) + +// ExpFloat64 returns an exponentially distributed float64 in the range +// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter +// (lambda) is 1 and whose mean is 1/lambda (1). +// To produce a distribution with a different rate parameter, +// callers can adjust the output using: +// +// sample = ExpFloat64() / desiredRateParameter +// +func (r *Rand) ExpFloat64() float64 { + for { + j := r.Uint32() + i := j & 0xFF + x := float64(j) * float64(we[i]) + if j < ke[i] { + return x + } + if i == 0 { + return re - math.Log(r.Float64()) + } + if fe[i]+float32(r.Float64())*(fe[i-1]-fe[i]) < float32(math.Exp(-x)) { + return x + } + } +} + +var ke = [256]uint32{ + 0xe290a139, 0x0, 0x9beadebc, 0xc377ac71, 0xd4ddb990, + 0xde893fb8, 0xe4a8e87c, 0xe8dff16a, 0xebf2deab, 0xee49a6e8, + 0xf0204efd, 0xf19bdb8e, 0xf2d458bb, 0xf3da104b, 0xf4b86d78, + 0xf577ad8a, 0xf61de83d, 0xf6afb784, 0xf730a573, 0xf7a37651, + 0xf80a5bb6, 0xf867189d, 0xf8bb1b4f, 0xf9079062, 0xf94d70ca, + 0xf98d8c7d, 0xf9c8928a, 0xf9ff175b, 0xfa319996, 0xfa6085f8, + 0xfa8c3a62, 0xfab5084e, 0xfadb36c8, 0xfaff0410, 0xfb20a6ea, + 0xfb404fb4, 0xfb5e2951, 0xfb7a59e9, 0xfb95038c, 0xfbae44ba, + 0xfbc638d8, 0xfbdcf892, 0xfbf29a30, 0xfc0731df, 0xfc1ad1ed, + 0xfc2d8b02, 0xfc3f6c4d, 0xfc5083ac, 0xfc60ddd1, 0xfc708662, + 0xfc7f8810, 0xfc8decb4, 0xfc9bbd62, 0xfca9027c, 0xfcb5c3c3, + 0xfcc20864, 0xfccdd70a, 0xfcd935e3, 0xfce42ab0, 0xfceebace, + 0xfcf8eb3b, 0xfd02c0a0, 0xfd0c3f59, 0xfd156b7b, 0xfd1e48d6, + 0xfd26daff, 0xfd2f2552, 0xfd372af7, 0xfd3eeee5, 0xfd4673e7, + 0xfd4dbc9e, 0xfd54cb85, 0xfd5ba2f2, 0xfd62451b, 0xfd68b415, + 0xfd6ef1da, 0xfd750047, 0xfd7ae120, 0xfd809612, 0xfd8620b4, + 0xfd8b8285, 0xfd90bcf5, 0xfd95d15e, 0xfd9ac10b, 0xfd9f8d36, + 0xfda43708, 0xfda8bf9e, 0xfdad2806, 0xfdb17141, 0xfdb59c46, + 0xfdb9a9fd, 0xfdbd9b46, 0xfdc170f6, 0xfdc52bd8, 0xfdc8ccac, + 0xfdcc542d, 0xfdcfc30b, 0xfdd319ef, 0xfdd6597a, 0xfdd98245, + 0xfddc94e5, 0xfddf91e6, 0xfde279ce, 0xfde54d1f, 0xfde80c52, + 0xfdeab7de, 0xfded5034, 0xfdefd5be, 0xfdf248e3, 0xfdf4aa06, + 0xfdf6f984, 0xfdf937b6, 0xfdfb64f4, 0xfdfd818d, 0xfdff8dd0, + 0xfe018a08, 0xfe03767a, 0xfe05536c, 0xfe07211c, 0xfe08dfc9, + 0xfe0a8fab, 0xfe0c30fb, 0xfe0dc3ec, 0xfe0f48b1, 0xfe10bf76, + 0xfe122869, 0xfe1383b4, 0xfe14d17c, 0xfe1611e7, 0xfe174516, + 0xfe186b2a, 0xfe19843e, 0xfe1a9070, 0xfe1b8fd6, 0xfe1c8289, + 0xfe1d689b, 0xfe1e4220, 0xfe1f0f26, 0xfe1fcfbc, 0xfe2083ed, + 0xfe212bc3, 0xfe21c745, 0xfe225678, 0xfe22d95f, 0xfe234ffb, + 0xfe23ba4a, 0xfe241849, 0xfe2469f2, 0xfe24af3c, 0xfe24e81e, + 0xfe25148b, 0xfe253474, 0xfe2547c7, 0xfe254e70, 0xfe25485a, + 0xfe25356a, 0xfe251586, 0xfe24e88f, 0xfe24ae64, 0xfe2466e1, + 0xfe2411df, 0xfe23af34, 0xfe233eb4, 0xfe22c02c, 0xfe22336b, + 0xfe219838, 0xfe20ee58, 0xfe20358c, 0xfe1f6d92, 0xfe1e9621, + 0xfe1daef0, 0xfe1cb7ac, 0xfe1bb002, 0xfe1a9798, 0xfe196e0d, + 0xfe1832fd, 0xfe16e5fe, 0xfe15869d, 0xfe141464, 0xfe128ed3, + 0xfe10f565, 0xfe0f478c, 0xfe0d84b1, 0xfe0bac36, 0xfe09bd73, + 0xfe07b7b5, 0xfe059a40, 0xfe03644c, 0xfe011504, 0xfdfeab88, + 0xfdfc26e9, 0xfdf98629, 0xfdf6c83b, 0xfdf3ec01, 0xfdf0f04a, + 0xfdedd3d1, 0xfdea953d, 0xfde7331e, 0xfde3abe9, 0xfddffdfb, + 0xfddc2791, 0xfdd826cd, 0xfdd3f9a8, 0xfdcf9dfc, 0xfdcb1176, + 0xfdc65198, 0xfdc15bb3, 0xfdbc2ce2, 0xfdb6c206, 0xfdb117be, + 0xfdab2a63, 0xfda4f5fd, 0xfd9e7640, 0xfd97a67a, 0xfd908192, + 0xfd8901f2, 0xfd812182, 0xfd78d98e, 0xfd7022bb, 0xfd66f4ed, + 0xfd5d4732, 0xfd530f9c, 0xfd48432b, 0xfd3cd59a, 0xfd30b936, + 0xfd23dea4, 0xfd16349e, 0xfd07a7a3, 0xfcf8219b, 0xfce7895b, + 0xfcd5c220, 0xfcc2aadb, 0xfcae1d5e, 0xfc97ed4e, 0xfc7fe6d4, + 0xfc65ccf3, 0xfc495762, 0xfc2a2fc8, 0xfc07ee19, 0xfbe213c1, + 0xfbb8051a, 0xfb890078, 0xfb5411a5, 0xfb180005, 0xfad33482, + 0xfa839276, 0xfa263b32, 0xf9b72d1c, 0xf930a1a2, 0xf889f023, + 0xf7b577d2, 0xf69c650c, 0xf51530f0, 0xf2cb0e3c, 0xeeefb15d, + 0xe6da6ecf, +} +var we = [256]float32{ + 2.0249555e-09, 1.486674e-11, 2.4409617e-11, 3.1968806e-11, + 3.844677e-11, 4.4228204e-11, 4.9516443e-11, 5.443359e-11, + 5.905944e-11, 6.344942e-11, 6.7643814e-11, 7.1672945e-11, + 7.556032e-11, 7.932458e-11, 8.298079e-11, 8.654132e-11, + 9.0016515e-11, 9.3415074e-11, 9.674443e-11, 1.0001099e-10, + 1.03220314e-10, 1.06377254e-10, 1.09486115e-10, 1.1255068e-10, + 1.1557435e-10, 1.1856015e-10, 1.2151083e-10, 1.2442886e-10, + 1.2731648e-10, 1.3017575e-10, 1.3300853e-10, 1.3581657e-10, + 1.3860142e-10, 1.4136457e-10, 1.4410738e-10, 1.4683108e-10, + 1.4953687e-10, 1.5222583e-10, 1.54899e-10, 1.5755733e-10, + 1.6020171e-10, 1.6283301e-10, 1.6545203e-10, 1.6805951e-10, + 1.7065617e-10, 1.732427e-10, 1.7581973e-10, 1.7838787e-10, + 1.8094774e-10, 1.8349985e-10, 1.8604476e-10, 1.8858298e-10, + 1.9111498e-10, 1.9364126e-10, 1.9616223e-10, 1.9867835e-10, + 2.0119004e-10, 2.0369768e-10, 2.0620168e-10, 2.087024e-10, + 2.1120022e-10, 2.136955e-10, 2.1618855e-10, 2.1867974e-10, + 2.2116936e-10, 2.2365775e-10, 2.261452e-10, 2.2863202e-10, + 2.311185e-10, 2.3360494e-10, 2.360916e-10, 2.3857874e-10, + 2.4106667e-10, 2.4355562e-10, 2.4604588e-10, 2.485377e-10, + 2.5103128e-10, 2.5352695e-10, 2.560249e-10, 2.585254e-10, + 2.6102867e-10, 2.6353494e-10, 2.6604446e-10, 2.6855745e-10, + 2.7107416e-10, 2.7359479e-10, 2.761196e-10, 2.7864877e-10, + 2.8118255e-10, 2.8372119e-10, 2.8626485e-10, 2.888138e-10, + 2.9136826e-10, 2.939284e-10, 2.9649452e-10, 2.9906677e-10, + 3.016454e-10, 3.0423064e-10, 3.0682268e-10, 3.0942177e-10, + 3.1202813e-10, 3.1464195e-10, 3.1726352e-10, 3.19893e-10, + 3.2253064e-10, 3.251767e-10, 3.2783135e-10, 3.3049485e-10, + 3.3316744e-10, 3.3584938e-10, 3.3854083e-10, 3.4124212e-10, + 3.4395342e-10, 3.46675e-10, 3.4940711e-10, 3.5215003e-10, + 3.5490397e-10, 3.5766917e-10, 3.6044595e-10, 3.6323455e-10, + 3.660352e-10, 3.6884823e-10, 3.7167386e-10, 3.745124e-10, + 3.773641e-10, 3.802293e-10, 3.8310827e-10, 3.860013e-10, + 3.8890866e-10, 3.918307e-10, 3.9476775e-10, 3.9772008e-10, + 4.0068804e-10, 4.0367196e-10, 4.0667217e-10, 4.09689e-10, + 4.1272286e-10, 4.1577405e-10, 4.1884296e-10, 4.2192994e-10, + 4.250354e-10, 4.281597e-10, 4.313033e-10, 4.3446652e-10, + 4.3764986e-10, 4.408537e-10, 4.4407847e-10, 4.4732465e-10, + 4.5059267e-10, 4.5388301e-10, 4.571962e-10, 4.6053267e-10, + 4.6389292e-10, 4.6727755e-10, 4.70687e-10, 4.741219e-10, + 4.7758275e-10, 4.810702e-10, 4.845848e-10, 4.8812715e-10, + 4.9169796e-10, 4.9529775e-10, 4.989273e-10, 5.0258725e-10, + 5.0627835e-10, 5.100013e-10, 5.1375687e-10, 5.1754584e-10, + 5.21369e-10, 5.2522725e-10, 5.2912136e-10, 5.330522e-10, + 5.370208e-10, 5.4102806e-10, 5.45075e-10, 5.491625e-10, + 5.532918e-10, 5.5746385e-10, 5.616799e-10, 5.6594107e-10, + 5.7024857e-10, 5.746037e-10, 5.7900773e-10, 5.834621e-10, + 5.8796823e-10, 5.925276e-10, 5.971417e-10, 6.018122e-10, + 6.065408e-10, 6.113292e-10, 6.1617933e-10, 6.2109295e-10, + 6.260722e-10, 6.3111916e-10, 6.3623595e-10, 6.4142497e-10, + 6.4668854e-10, 6.5202926e-10, 6.5744976e-10, 6.6295286e-10, + 6.6854156e-10, 6.742188e-10, 6.79988e-10, 6.858526e-10, + 6.9181616e-10, 6.978826e-10, 7.04056e-10, 7.103407e-10, + 7.167412e-10, 7.2326256e-10, 7.2990985e-10, 7.366886e-10, + 7.4360473e-10, 7.5066453e-10, 7.5787476e-10, 7.6524265e-10, + 7.7277595e-10, 7.80483e-10, 7.883728e-10, 7.9645507e-10, + 8.047402e-10, 8.1323964e-10, 8.219657e-10, 8.309319e-10, + 8.401528e-10, 8.496445e-10, 8.594247e-10, 8.6951274e-10, + 8.799301e-10, 8.9070046e-10, 9.018503e-10, 9.134092e-10, + 9.254101e-10, 9.378904e-10, 9.508923e-10, 9.644638e-10, + 9.786603e-10, 9.935448e-10, 1.0091913e-09, 1.025686e-09, + 1.0431306e-09, 1.0616465e-09, 1.08138e-09, 1.1025096e-09, + 1.1252564e-09, 1.1498986e-09, 1.1767932e-09, 1.206409e-09, + 1.2393786e-09, 1.276585e-09, 1.3193139e-09, 1.3695435e-09, + 1.4305498e-09, 1.508365e-09, 1.6160854e-09, 1.7921248e-09, +} +var fe = [256]float32{ + 1, 0.9381437, 0.90046996, 0.87170434, 0.8477855, 0.8269933, + 0.8084217, 0.7915276, 0.77595687, 0.7614634, 0.7478686, + 0.7350381, 0.72286767, 0.71127474, 0.70019263, 0.6895665, + 0.67935055, 0.6695063, 0.66000086, 0.65080583, 0.6418967, + 0.63325197, 0.6248527, 0.6166822, 0.60872537, 0.60096896, + 0.5934009, 0.58601034, 0.5787874, 0.57172304, 0.5648092, + 0.5580383, 0.5514034, 0.5448982, 0.5385169, 0.53225386, + 0.5261042, 0.52006316, 0.5141264, 0.50828975, 0.5025495, + 0.496902, 0.49134386, 0.485872, 0.48048335, 0.4751752, + 0.46994483, 0.46478975, 0.45970762, 0.45469615, 0.44975325, + 0.44487688, 0.44006512, 0.43531612, 0.43062815, 0.42599955, + 0.42142874, 0.4169142, 0.41245446, 0.40804818, 0.403694, + 0.3993907, 0.39513698, 0.39093173, 0.38677382, 0.38266218, + 0.37859577, 0.37457356, 0.37059465, 0.3666581, 0.362763, + 0.35890847, 0.35509375, 0.351318, 0.3475805, 0.34388044, + 0.34021714, 0.3365899, 0.33299807, 0.32944095, 0.32591796, + 0.3224285, 0.3189719, 0.31554767, 0.31215525, 0.30879408, + 0.3054636, 0.3021634, 0.29889292, 0.2956517, 0.29243928, + 0.28925523, 0.28609908, 0.28297043, 0.27986884, 0.27679393, + 0.2737453, 0.2707226, 0.2677254, 0.26475343, 0.26180625, + 0.25888354, 0.25598502, 0.2531103, 0.25025907, 0.24743107, + 0.24462597, 0.24184346, 0.23908329, 0.23634516, 0.23362878, + 0.23093392, 0.2282603, 0.22560766, 0.22297576, 0.22036438, + 0.21777324, 0.21520215, 0.21265087, 0.21011916, 0.20760682, + 0.20511365, 0.20263945, 0.20018397, 0.19774707, 0.19532852, + 0.19292815, 0.19054577, 0.1881812, 0.18583426, 0.18350479, + 0.1811926, 0.17889754, 0.17661946, 0.17435817, 0.17211354, + 0.1698854, 0.16767362, 0.16547804, 0.16329853, 0.16113494, + 0.15898713, 0.15685499, 0.15473837, 0.15263714, 0.15055119, + 0.14848037, 0.14642459, 0.14438373, 0.14235765, 0.14034624, + 0.13834943, 0.13636707, 0.13439907, 0.13244532, 0.13050574, + 0.1285802, 0.12666863, 0.12477092, 0.12288698, 0.12101672, + 0.119160056, 0.1173169, 0.115487166, 0.11367077, 0.11186763, + 0.11007768, 0.10830083, 0.10653701, 0.10478614, 0.10304816, + 0.101323, 0.09961058, 0.09791085, 0.09622374, 0.09454919, + 0.09288713, 0.091237515, 0.08960028, 0.087975375, 0.08636274, + 0.08476233, 0.083174095, 0.081597984, 0.08003395, 0.07848195, + 0.076941945, 0.07541389, 0.07389775, 0.072393484, 0.07090106, + 0.069420435, 0.06795159, 0.066494495, 0.06504912, 0.063615434, + 0.062193416, 0.060783047, 0.059384305, 0.057997175, + 0.05662164, 0.05525769, 0.053905312, 0.052564494, 0.051235236, + 0.049917534, 0.048611384, 0.047316793, 0.046033762, 0.0447623, + 0.043502413, 0.042254124, 0.041017443, 0.039792392, + 0.038578995, 0.037377283, 0.036187284, 0.035009038, + 0.033842582, 0.032687962, 0.031545233, 0.030414443, 0.02929566, + 0.02818895, 0.027094385, 0.026012046, 0.024942026, 0.023884421, + 0.022839336, 0.021806888, 0.020787204, 0.019780423, 0.0187867, + 0.0178062, 0.016839107, 0.015885621, 0.014945968, 0.014020392, + 0.013109165, 0.012212592, 0.011331013, 0.01046481, 0.009614414, + 0.008780315, 0.007963077, 0.0071633533, 0.006381906, + 0.0056196423, 0.0048776558, 0.004157295, 0.0034602648, + 0.0027887989, 0.0021459677, 0.0015362998, 0.0009672693, + 0.00045413437, +} diff --git a/src/math/rand/normal.go b/src/math/rand/normal.go new file mode 100644 index 000000000..ba4ea54ca --- /dev/null +++ b/src/math/rand/normal.go @@ -0,0 +1,157 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +import ( + "math" +) + +/* + * Normal distribution + * + * See "The Ziggurat Method for Generating Random Variables" + * (Marsaglia & Tsang, 2000) + * http://www.jstatsoft.org/v05/i08/paper [pdf] + */ + +const ( + rn = 3.442619855899 +) + +func absInt32(i int32) uint32 { + if i < 0 { + return uint32(-i) + } + return uint32(i) +} + +// NormFloat64 returns a normally distributed float64 in the range +// [-math.MaxFloat64, +math.MaxFloat64] with +// standard normal distribution (mean = 0, stddev = 1). +// To produce a different normal distribution, callers can +// adjust the output using: +// +// sample = NormFloat64() * desiredStdDev + desiredMean +// +func (r *Rand) NormFloat64() float64 { + for { + j := int32(r.Uint32()) // Possibly negative + i := j & 0x7F + x := float64(j) * float64(wn[i]) + if absInt32(j) < kn[i] { + // This case should be hit better than 99% of the time. + return x + } + + if i == 0 { + // This extra work is only required for the base strip. + for { + x = -math.Log(r.Float64()) * (1.0 / rn) + y := -math.Log(r.Float64()) + if y+y >= x*x { + break + } + } + if j > 0 { + return rn + x + } + return -rn - x + } + if fn[i]+float32(r.Float64())*(fn[i-1]-fn[i]) < float32(math.Exp(-.5*x*x)) { + return x + } + } +} + +var kn = [128]uint32{ + 0x76ad2212, 0x0, 0x600f1b53, 0x6ce447a6, 0x725b46a2, + 0x7560051d, 0x774921eb, 0x789a25bd, 0x799045c3, 0x7a4bce5d, + 0x7adf629f, 0x7b5682a6, 0x7bb8a8c6, 0x7c0ae722, 0x7c50cce7, + 0x7c8cec5b, 0x7cc12cd6, 0x7ceefed2, 0x7d177e0b, 0x7d3b8883, + 0x7d5bce6c, 0x7d78dd64, 0x7d932886, 0x7dab0e57, 0x7dc0dd30, + 0x7dd4d688, 0x7de73185, 0x7df81cea, 0x7e07c0a3, 0x7e163efa, + 0x7e23b587, 0x7e303dfd, 0x7e3beec2, 0x7e46db77, 0x7e51155d, + 0x7e5aabb3, 0x7e63abf7, 0x7e6c222c, 0x7e741906, 0x7e7b9a18, + 0x7e82adfa, 0x7e895c63, 0x7e8fac4b, 0x7e95a3fb, 0x7e9b4924, + 0x7ea0a0ef, 0x7ea5b00d, 0x7eaa7ac3, 0x7eaf04f3, 0x7eb3522a, + 0x7eb765a5, 0x7ebb4259, 0x7ebeeafd, 0x7ec2620a, 0x7ec5a9c4, + 0x7ec8c441, 0x7ecbb365, 0x7ece78ed, 0x7ed11671, 0x7ed38d62, + 0x7ed5df12, 0x7ed80cb4, 0x7eda175c, 0x7edc0005, 0x7eddc78e, + 0x7edf6ebf, 0x7ee0f647, 0x7ee25ebe, 0x7ee3a8a9, 0x7ee4d473, + 0x7ee5e276, 0x7ee6d2f5, 0x7ee7a620, 0x7ee85c10, 0x7ee8f4cd, + 0x7ee97047, 0x7ee9ce59, 0x7eea0eca, 0x7eea3147, 0x7eea3568, + 0x7eea1aab, 0x7ee9e071, 0x7ee98602, 0x7ee90a88, 0x7ee86d08, + 0x7ee7ac6a, 0x7ee6c769, 0x7ee5bc9c, 0x7ee48a67, 0x7ee32efc, + 0x7ee1a857, 0x7edff42f, 0x7ede0ffa, 0x7edbf8d9, 0x7ed9ab94, + 0x7ed7248d, 0x7ed45fae, 0x7ed1585c, 0x7ece095f, 0x7eca6ccb, + 0x7ec67be2, 0x7ec22eee, 0x7ebd7d1a, 0x7eb85c35, 0x7eb2c075, + 0x7eac9c20, 0x7ea5df27, 0x7e9e769f, 0x7e964c16, 0x7e8d44ba, + 0x7e834033, 0x7e781728, 0x7e6b9933, 0x7e5d8a1a, 0x7e4d9ded, + 0x7e3b737a, 0x7e268c2f, 0x7e0e3ff5, 0x7df1aa5d, 0x7dcf8c72, + 0x7da61a1e, 0x7d72a0fb, 0x7d30e097, 0x7cd9b4ab, 0x7c600f1a, + 0x7ba90bdc, 0x7a722176, 0x77d664e5, +} +var wn = [128]float32{ + 1.7290405e-09, 1.2680929e-10, 1.6897518e-10, 1.9862688e-10, + 2.2232431e-10, 2.4244937e-10, 2.601613e-10, 2.7611988e-10, + 2.9073963e-10, 3.042997e-10, 3.1699796e-10, 3.289802e-10, + 3.4035738e-10, 3.5121603e-10, 3.616251e-10, 3.7164058e-10, + 3.8130857e-10, 3.9066758e-10, 3.9975012e-10, 4.08584e-10, + 4.1719309e-10, 4.2559822e-10, 4.338176e-10, 4.418672e-10, + 4.497613e-10, 4.5751258e-10, 4.651324e-10, 4.7263105e-10, + 4.8001775e-10, 4.87301e-10, 4.944885e-10, 5.015873e-10, + 5.0860405e-10, 5.155446e-10, 5.2241467e-10, 5.2921934e-10, + 5.359635e-10, 5.426517e-10, 5.4928817e-10, 5.5587696e-10, + 5.624219e-10, 5.6892646e-10, 5.753941e-10, 5.818282e-10, + 5.882317e-10, 5.946077e-10, 6.00959e-10, 6.072884e-10, + 6.135985e-10, 6.19892e-10, 6.2617134e-10, 6.3243905e-10, + 6.386974e-10, 6.449488e-10, 6.511956e-10, 6.5744005e-10, + 6.6368433e-10, 6.699307e-10, 6.7618144e-10, 6.824387e-10, + 6.8870465e-10, 6.949815e-10, 7.012715e-10, 7.075768e-10, + 7.1389966e-10, 7.202424e-10, 7.266073e-10, 7.329966e-10, + 7.394128e-10, 7.4585826e-10, 7.5233547e-10, 7.58847e-10, + 7.653954e-10, 7.719835e-10, 7.7861395e-10, 7.852897e-10, + 7.920138e-10, 7.987892e-10, 8.0561924e-10, 8.125073e-10, + 8.194569e-10, 8.2647167e-10, 8.3355556e-10, 8.407127e-10, + 8.479473e-10, 8.55264e-10, 8.6266755e-10, 8.7016316e-10, + 8.777562e-10, 8.8545243e-10, 8.932582e-10, 9.0117996e-10, + 9.09225e-10, 9.174008e-10, 9.2571584e-10, 9.341788e-10, + 9.427997e-10, 9.515889e-10, 9.605579e-10, 9.697193e-10, + 9.790869e-10, 9.88676e-10, 9.985036e-10, 1.0085882e-09, + 1.0189509e-09, 1.0296151e-09, 1.0406069e-09, 1.0519566e-09, + 1.063698e-09, 1.0758702e-09, 1.0885183e-09, 1.1016947e-09, + 1.1154611e-09, 1.1298902e-09, 1.1450696e-09, 1.1611052e-09, + 1.1781276e-09, 1.1962995e-09, 1.2158287e-09, 1.2369856e-09, + 1.2601323e-09, 1.2857697e-09, 1.3146202e-09, 1.347784e-09, + 1.3870636e-09, 1.4357403e-09, 1.5008659e-09, 1.6030948e-09, +} +var fn = [128]float32{ + 1, 0.9635997, 0.9362827, 0.9130436, 0.89228165, 0.87324303, + 0.8555006, 0.8387836, 0.8229072, 0.8077383, 0.793177, + 0.7791461, 0.7655842, 0.7524416, 0.73967725, 0.7272569, + 0.7151515, 0.7033361, 0.69178915, 0.68049186, 0.6694277, + 0.658582, 0.6479418, 0.63749546, 0.6272325, 0.6171434, + 0.6072195, 0.5974532, 0.58783704, 0.5783647, 0.56903, + 0.5598274, 0.5507518, 0.54179835, 0.5329627, 0.52424055, + 0.5156282, 0.50712204, 0.49871865, 0.49041483, 0.48220766, + 0.4740943, 0.46607214, 0.4581387, 0.45029163, 0.44252872, + 0.43484783, 0.427247, 0.41972435, 0.41227803, 0.40490642, + 0.39760786, 0.3903808, 0.3832238, 0.37613547, 0.36911446, + 0.3621595, 0.35526937, 0.34844297, 0.34167916, 0.33497685, + 0.3283351, 0.3217529, 0.3152294, 0.30876362, 0.30235484, + 0.29600215, 0.28970486, 0.2834622, 0.2772735, 0.27113807, + 0.2650553, 0.25902456, 0.2530453, 0.24711695, 0.241239, + 0.23541094, 0.22963232, 0.2239027, 0.21822165, 0.21258877, + 0.20700371, 0.20146611, 0.19597565, 0.19053204, 0.18513499, + 0.17978427, 0.17447963, 0.1692209, 0.16400786, 0.15884037, + 0.15371831, 0.14864157, 0.14361008, 0.13862377, 0.13368265, + 0.12878671, 0.12393598, 0.119130544, 0.11437051, 0.10965602, + 0.104987256, 0.10036444, 0.095787846, 0.0912578, 0.08677467, + 0.0823389, 0.077950984, 0.073611505, 0.06932112, 0.06508058, + 0.06089077, 0.056752663, 0.0526674, 0.048636295, 0.044660863, + 0.040742867, 0.03688439, 0.033087887, 0.029356318, + 0.025693292, 0.022103304, 0.018592102, 0.015167298, + 0.011839478, 0.008624485, 0.005548995, 0.0026696292, +} diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go new file mode 100644 index 000000000..3ffb5c4e5 --- /dev/null +++ b/src/math/rand/rand.go @@ -0,0 +1,246 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rand implements pseudo-random number generators. +// +// Random numbers are generated by a Source. Top-level functions, such as +// Float64 and Int, use a default shared Source that produces a deterministic +// sequence of values each time a program is run. Use the Seed function to +// initialize the default Source if different behavior is required for each run. +// The default Source is safe for concurrent use by multiple goroutines. +package rand + +import "sync" + +// A Source represents a source of uniformly-distributed +// pseudo-random int64 values in the range [0, 1<<63). +type Source interface { + Int63() int64 + Seed(seed int64) +} + +// NewSource returns a new pseudo-random Source seeded with the given value. +func NewSource(seed int64) Source { + var rng rngSource + rng.Seed(seed) + return &rng +} + +// A Rand is a source of random numbers. +type Rand struct { + src Source +} + +// New returns a new Rand that uses random values from src +// to generate other random values. +func New(src Source) *Rand { return &Rand{src} } + +// Seed uses the provided seed value to initialize the generator to a deterministic state. +func (r *Rand) Seed(seed int64) { r.src.Seed(seed) } + +// Int63 returns a non-negative pseudo-random 63-bit integer as an int64. +func (r *Rand) Int63() int64 { return r.src.Int63() } + +// Uint32 returns a pseudo-random 32-bit value as a uint32. +func (r *Rand) Uint32() uint32 { return uint32(r.Int63() >> 31) } + +// Int31 returns a non-negative pseudo-random 31-bit integer as an int32. +func (r *Rand) Int31() int32 { return int32(r.Int63() >> 32) } + +// Int returns a non-negative pseudo-random int. +func (r *Rand) Int() int { + u := uint(r.Int63()) + return int(u << 1 >> 1) // clear sign bit if int == int32 +} + +// Int63n returns, as an int64, a non-negative pseudo-random number in [0,n). +// It panics if n <= 0. +func (r *Rand) Int63n(n int64) int64 { + if n <= 0 { + panic("invalid argument to Int63n") + } + if n&(n-1) == 0 { // n is power of two, can mask + return r.Int63() & (n - 1) + } + max := int64((1 << 63) - 1 - (1<<63)%uint64(n)) + v := r.Int63() + for v > max { + v = r.Int63() + } + return v % n +} + +// Int31n returns, as an int32, a non-negative pseudo-random number in [0,n). +// It panics if n <= 0. +func (r *Rand) Int31n(n int32) int32 { + if n <= 0 { + panic("invalid argument to Int31n") + } + if n&(n-1) == 0 { // n is power of two, can mask + return r.Int31() & (n - 1) + } + max := int32((1 << 31) - 1 - (1<<31)%uint32(n)) + v := r.Int31() + for v > max { + v = r.Int31() + } + return v % n +} + +// Intn returns, as an int, a non-negative pseudo-random number in [0,n). +// It panics if n <= 0. +func (r *Rand) Intn(n int) int { + if n <= 0 { + panic("invalid argument to Intn") + } + if n <= 1<<31-1 { + return int(r.Int31n(int32(n))) + } + return int(r.Int63n(int64(n))) +} + +// Float64 returns, as a float64, a pseudo-random number in [0.0,1.0). +func (r *Rand) Float64() float64 { + // A clearer, simpler implementation would be: + // return float64(r.Int63n(1<<53)) / (1<<53) + // However, Go 1 shipped with + // return float64(r.Int63()) / (1 << 63) + // and we want to preserve that value stream. + // + // There is one bug in the value stream: r.Int63() may be so close + // to 1<<63 that the division rounds up to 1.0, and we've guaranteed + // that the result is always less than 1.0. To fix that, we treat the + // range as cyclic and map 1 back to 0. This is justified by observing + // that while some of the values rounded down to 0, nothing was + // rounding up to 0, so 0 was underrepresented in the results. + // Mapping 1 back to zero restores some balance. + // (The balance is not perfect because the implementation + // returns denormalized numbers for very small r.Int63(), + // and those steal from what would normally be 0 results.) + // The remapping only happens 1/2⁵³ of the time, so most clients + // will not observe it anyway. + f := float64(r.Int63()) / (1 << 63) + if f == 1 { + f = 0 + } + return f +} + +// Float32 returns, as a float32, a pseudo-random number in [0.0,1.0). +func (r *Rand) Float32() float32 { + // Same rationale as in Float64: we want to preserve the Go 1 value + // stream except we want to fix it not to return 1.0 + // There is a double rounding going on here, but the argument for + // mapping 1 to 0 still applies: 0 was underrepresented before, + // so mapping 1 to 0 doesn't cause too many 0s. + // This only happens 1/2²⁴ of the time (plus the 1/2⁵³ of the time in Float64). + f := float32(r.Float64()) + if f == 1 { + f = 0 + } + return f +} + +// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers [0,n). +func (r *Rand) Perm(n int) []int { + m := make([]int, n) + for i := 0; i < n; i++ { + j := r.Intn(i + 1) + m[i] = m[j] + m[j] = i + } + return m +} + +/* + * Top-level convenience functions + */ + +var globalRand = New(&lockedSource{src: NewSource(1)}) + +// Seed uses the provided seed value to initialize the default Source to a +// deterministic state. If Seed is not called, the generator behaves as +// if seeded by Seed(1). +func Seed(seed int64) { globalRand.Seed(seed) } + +// Int63 returns a non-negative pseudo-random 63-bit integer as an int64 +// from the default Source. +func Int63() int64 { return globalRand.Int63() } + +// Uint32 returns a pseudo-random 32-bit value as a uint32 +// from the default Source. +func Uint32() uint32 { return globalRand.Uint32() } + +// Int31 returns a non-negative pseudo-random 31-bit integer as an int32 +// from the default Source. +func Int31() int32 { return globalRand.Int31() } + +// Int returns a non-negative pseudo-random int from the default Source. +func Int() int { return globalRand.Int() } + +// Int63n returns, as an int64, a non-negative pseudo-random number in [0,n) +// from the default Source. +// It panics if n <= 0. +func Int63n(n int64) int64 { return globalRand.Int63n(n) } + +// Int31n returns, as an int32, a non-negative pseudo-random number in [0,n) +// from the default Source. +// It panics if n <= 0. +func Int31n(n int32) int32 { return globalRand.Int31n(n) } + +// Intn returns, as an int, a non-negative pseudo-random number in [0,n) +// from the default Source. +// It panics if n <= 0. +func Intn(n int) int { return globalRand.Intn(n) } + +// Float64 returns, as a float64, a pseudo-random number in [0.0,1.0) +// from the default Source. +func Float64() float64 { return globalRand.Float64() } + +// Float32 returns, as a float32, a pseudo-random number in [0.0,1.0) +// from the default Source. +func Float32() float32 { return globalRand.Float32() } + +// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers [0,n) +// from the default Source. +func Perm(n int) []int { return globalRand.Perm(n) } + +// NormFloat64 returns a normally distributed float64 in the range +// [-math.MaxFloat64, +math.MaxFloat64] with +// standard normal distribution (mean = 0, stddev = 1) +// from the default Source. +// To produce a different normal distribution, callers can +// adjust the output using: +// +// sample = NormFloat64() * desiredStdDev + desiredMean +// +func NormFloat64() float64 { return globalRand.NormFloat64() } + +// ExpFloat64 returns an exponentially distributed float64 in the range +// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter +// (lambda) is 1 and whose mean is 1/lambda (1) from the default Source. +// To produce a distribution with a different rate parameter, +// callers can adjust the output using: +// +// sample = ExpFloat64() / desiredRateParameter +// +func ExpFloat64() float64 { return globalRand.ExpFloat64() } + +type lockedSource struct { + lk sync.Mutex + src Source +} + +func (r *lockedSource) Int63() (n int64) { + r.lk.Lock() + n = r.src.Int63() + r.lk.Unlock() + return +} + +func (r *lockedSource) Seed(seed int64) { + r.lk.Lock() + r.src.Seed(seed) + r.lk.Unlock() +} diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go new file mode 100644 index 000000000..ab0dc49b4 --- /dev/null +++ b/src/math/rand/rand_test.go @@ -0,0 +1,398 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +import ( + "errors" + "fmt" + "math" + "testing" +) + +const ( + numTestSamples = 10000 +) + +type statsResults struct { + mean float64 + stddev float64 + closeEnough float64 + maxError float64 +} + +func max(a, b float64) float64 { + if a > b { + return a + } + return b +} + +func nearEqual(a, b, closeEnough, maxError float64) bool { + absDiff := math.Abs(a - b) + if absDiff < closeEnough { // Necessary when one value is zero and one value is close to zero. + return true + } + return absDiff/max(math.Abs(a), math.Abs(b)) < maxError +} + +var testSeeds = []int64{1, 1754801282, 1698661970, 1550503961} + +// checkSimilarDistribution returns success if the mean and stddev of the +// two statsResults are similar. +func (this *statsResults) checkSimilarDistribution(expected *statsResults) error { + if !nearEqual(this.mean, expected.mean, expected.closeEnough, expected.maxError) { + s := fmt.Sprintf("mean %v != %v (allowed error %v, %v)", this.mean, expected.mean, expected.closeEnough, expected.maxError) + fmt.Println(s) + return errors.New(s) + } + if !nearEqual(this.stddev, expected.stddev, 0, expected.maxError) { + s := fmt.Sprintf("stddev %v != %v (allowed error %v, %v)", this.stddev, expected.stddev, expected.closeEnough, expected.maxError) + fmt.Println(s) + return errors.New(s) + } + return nil +} + +func getStatsResults(samples []float64) *statsResults { + res := new(statsResults) + var sum, squaresum float64 + for _, s := range samples { + sum += s + squaresum += s * s + } + res.mean = sum / float64(len(samples)) + res.stddev = math.Sqrt(squaresum/float64(len(samples)) - res.mean*res.mean) + return res +} + +func checkSampleDistribution(t *testing.T, samples []float64, expected *statsResults) { + actual := getStatsResults(samples) + err := actual.checkSimilarDistribution(expected) + if err != nil { + t.Errorf(err.Error()) + } +} + +func checkSampleSliceDistributions(t *testing.T, samples []float64, nslices int, expected *statsResults) { + chunk := len(samples) / nslices + for i := 0; i < nslices; i++ { + low := i * chunk + var high int + if i == nslices-1 { + high = len(samples) - 1 + } else { + high = (i + 1) * chunk + } + checkSampleDistribution(t, samples[low:high], expected) + } +} + +// +// Normal distribution tests +// + +func generateNormalSamples(nsamples int, mean, stddev float64, seed int64) []float64 { + r := New(NewSource(seed)) + samples := make([]float64, nsamples) + for i := range samples { + samples[i] = r.NormFloat64()*stddev + mean + } + return samples +} + +func testNormalDistribution(t *testing.T, nsamples int, mean, stddev float64, seed int64) { + //fmt.Printf("testing nsamples=%v mean=%v stddev=%v seed=%v\n", nsamples, mean, stddev, seed); + + samples := generateNormalSamples(nsamples, mean, stddev, seed) + errorScale := max(1.0, stddev) // Error scales with stddev + expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.08 * errorScale} + + // Make sure that the entire set matches the expected distribution. + checkSampleDistribution(t, samples, expected) + + // Make sure that each half of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 2, expected) + + // Make sure that each 7th of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 7, expected) +} + +// Actual tests + +func TestStandardNormalValues(t *testing.T) { + for _, seed := range testSeeds { + testNormalDistribution(t, numTestSamples, 0, 1, seed) + } +} + +func TestNonStandardNormalValues(t *testing.T) { + sdmax := 1000.0 + mmax := 1000.0 + if testing.Short() { + sdmax = 5 + mmax = 5 + } + for sd := 0.5; sd < sdmax; sd *= 2 { + for m := 0.5; m < mmax; m *= 2 { + for _, seed := range testSeeds { + testNormalDistribution(t, numTestSamples, m, sd, seed) + if testing.Short() { + break + } + } + } + } +} + +// +// Exponential distribution tests +// + +func generateExponentialSamples(nsamples int, rate float64, seed int64) []float64 { + r := New(NewSource(seed)) + samples := make([]float64, nsamples) + for i := range samples { + samples[i] = r.ExpFloat64() / rate + } + return samples +} + +func testExponentialDistribution(t *testing.T, nsamples int, rate float64, seed int64) { + //fmt.Printf("testing nsamples=%v rate=%v seed=%v\n", nsamples, rate, seed); + + mean := 1 / rate + stddev := mean + + samples := generateExponentialSamples(nsamples, rate, seed) + errorScale := max(1.0, 1/rate) // Error scales with the inverse of the rate + expected := &statsResults{mean, stddev, 0.10 * errorScale, 0.20 * errorScale} + + // Make sure that the entire set matches the expected distribution. + checkSampleDistribution(t, samples, expected) + + // Make sure that each half of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 2, expected) + + // Make sure that each 7th of the set matches the expected distribution. + checkSampleSliceDistributions(t, samples, 7, expected) +} + +// Actual tests + +func TestStandardExponentialValues(t *testing.T) { + for _, seed := range testSeeds { + testExponentialDistribution(t, numTestSamples, 1, seed) + } +} + +func TestNonStandardExponentialValues(t *testing.T) { + for rate := 0.05; rate < 10; rate *= 2 { + for _, seed := range testSeeds { + testExponentialDistribution(t, numTestSamples, rate, seed) + if testing.Short() { + break + } + } + } +} + +// +// Table generation tests +// + +func initNorm() (testKn []uint32, testWn, testFn []float32) { + const m1 = 1 << 31 + var ( + dn float64 = rn + tn = dn + vn float64 = 9.91256303526217e-3 + ) + + testKn = make([]uint32, 128) + testWn = make([]float32, 128) + testFn = make([]float32, 128) + + q := vn / math.Exp(-0.5*dn*dn) + testKn[0] = uint32((dn / q) * m1) + testKn[1] = 0 + testWn[0] = float32(q / m1) + testWn[127] = float32(dn / m1) + testFn[0] = 1.0 + testFn[127] = float32(math.Exp(-0.5 * dn * dn)) + for i := 126; i >= 1; i-- { + dn = math.Sqrt(-2.0 * math.Log(vn/dn+math.Exp(-0.5*dn*dn))) + testKn[i+1] = uint32((dn / tn) * m1) + tn = dn + testFn[i] = float32(math.Exp(-0.5 * dn * dn)) + testWn[i] = float32(dn / m1) + } + return +} + +func initExp() (testKe []uint32, testWe, testFe []float32) { + const m2 = 1 << 32 + var ( + de float64 = re + te = de + ve float64 = 3.9496598225815571993e-3 + ) + + testKe = make([]uint32, 256) + testWe = make([]float32, 256) + testFe = make([]float32, 256) + + q := ve / math.Exp(-de) + testKe[0] = uint32((de / q) * m2) + testKe[1] = 0 + testWe[0] = float32(q / m2) + testWe[255] = float32(de / m2) + testFe[0] = 1.0 + testFe[255] = float32(math.Exp(-de)) + for i := 254; i >= 1; i-- { + de = -math.Log(ve/de + math.Exp(-de)) + testKe[i+1] = uint32((de / te) * m2) + te = de + testFe[i] = float32(math.Exp(-de)) + testWe[i] = float32(de / m2) + } + return +} + +// compareUint32Slices returns the first index where the two slices +// disagree, or <0 if the lengths are the same and all elements +// are identical. +func compareUint32Slices(s1, s2 []uint32) int { + if len(s1) != len(s2) { + if len(s1) > len(s2) { + return len(s2) + 1 + } + return len(s1) + 1 + } + for i := range s1 { + if s1[i] != s2[i] { + return i + } + } + return -1 +} + +// compareFloat32Slices returns the first index where the two slices +// disagree, or <0 if the lengths are the same and all elements +// are identical. +func compareFloat32Slices(s1, s2 []float32) int { + if len(s1) != len(s2) { + if len(s1) > len(s2) { + return len(s2) + 1 + } + return len(s1) + 1 + } + for i := range s1 { + if !nearEqual(float64(s1[i]), float64(s2[i]), 0, 1e-7) { + return i + } + } + return -1 +} + +func TestNormTables(t *testing.T) { + testKn, testWn, testFn := initNorm() + if i := compareUint32Slices(kn[0:], testKn); i >= 0 { + t.Errorf("kn disagrees at index %v; %v != %v", i, kn[i], testKn[i]) + } + if i := compareFloat32Slices(wn[0:], testWn); i >= 0 { + t.Errorf("wn disagrees at index %v; %v != %v", i, wn[i], testWn[i]) + } + if i := compareFloat32Slices(fn[0:], testFn); i >= 0 { + t.Errorf("fn disagrees at index %v; %v != %v", i, fn[i], testFn[i]) + } +} + +func TestExpTables(t *testing.T) { + testKe, testWe, testFe := initExp() + if i := compareUint32Slices(ke[0:], testKe); i >= 0 { + t.Errorf("ke disagrees at index %v; %v != %v", i, ke[i], testKe[i]) + } + if i := compareFloat32Slices(we[0:], testWe); i >= 0 { + t.Errorf("we disagrees at index %v; %v != %v", i, we[i], testWe[i]) + } + if i := compareFloat32Slices(fe[0:], testFe); i >= 0 { + t.Errorf("fe disagrees at index %v; %v != %v", i, fe[i], testFe[i]) + } +} + +// For issue 6721, the problem came after 7533753 calls, so check 10e6. +func TestFloat32(t *testing.T) { + r := New(NewSource(1)) + for ct := 0; ct < 10e6; ct++ { + f := r.Float32() + if f >= 1 { + t.Fatal("Float32() should be in range [0,1). ct:", ct, "f:", f) + } + } +} + +// Benchmarks + +func BenchmarkInt63Threadsafe(b *testing.B) { + for n := b.N; n > 0; n-- { + Int63() + } +} + +func BenchmarkInt63Unthreadsafe(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Int63() + } +} + +func BenchmarkIntn1000(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Intn(1000) + } +} + +func BenchmarkInt63n1000(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Int63n(1000) + } +} + +func BenchmarkInt31n1000(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Int31n(1000) + } +} + +func BenchmarkFloat32(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Float32() + } +} + +func BenchmarkFloat64(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Float64() + } +} + +func BenchmarkPerm3(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Perm(3) + } +} + +func BenchmarkPerm30(b *testing.B) { + r := New(NewSource(1)) + for n := b.N; n > 0; n-- { + r.Perm(30) + } +} diff --git a/src/math/rand/regress_test.go b/src/math/rand/regress_test.go new file mode 100644 index 000000000..2b012af89 --- /dev/null +++ b/src/math/rand/regress_test.go @@ -0,0 +1,355 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that random number sequences generated by a specific seed +// do not change from version to version. +// +// Do NOT make changes to the golden outputs. If bugs need to be fixed +// in the underlying code, find ways to fix them that do not affect the +// outputs. + +package rand_test + +import ( + "flag" + "fmt" + . "math/rand" + "reflect" + "testing" +) + +var printgolden = flag.Bool("printgolden", false, "print golden results for regression test") + +func TestRegress(t *testing.T) { + var int32s = []int32{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1} + var int64s = []int64{1, 10, 32, 1 << 20, 1<<20 + 1, 1000000000, 1 << 30, 1<<31 - 2, 1<<31 - 1, 1000000000000000000, 1 << 60, 1<<63 - 2, 1<<63 - 1} + var permSizes = []int{0, 1, 5, 8, 9, 10, 16} + r := New(NewSource(0)) + + rv := reflect.ValueOf(r) + n := rv.NumMethod() + p := 0 + if *printgolden { + fmt.Printf("var regressGolden = []interface{}{\n") + } + for i := 0; i < n; i++ { + m := rv.Type().Method(i) + mv := rv.Method(i) + mt := mv.Type() + if mt.NumOut() == 0 { + continue + } + if mt.NumOut() != 1 { + t.Fatalf("unexpected result count for r.%s", m.Name) + } + r.Seed(0) + for repeat := 0; repeat < 20; repeat++ { + var args []reflect.Value + var argstr string + if mt.NumIn() == 1 { + var x interface{} + switch mt.In(0).Kind() { + default: + t.Fatalf("unexpected argument type for r.%s", m.Name) + + case reflect.Int: + if m.Name == "Perm" { + x = permSizes[repeat%len(permSizes)] + break + } + big := int64s[repeat%len(int64s)] + if int64(int(big)) != big { + r.Int63n(big) // what would happen on 64-bit machine, to keep stream in sync + if *printgolden { + fmt.Printf("\tskipped, // must run printgolden on 64-bit machine\n") + } + p++ + continue + } + x = int(big) + + case reflect.Int32: + x = int32s[repeat%len(int32s)] + + case reflect.Int64: + x = int64s[repeat%len(int64s)] + } + argstr = fmt.Sprint(x) + args = append(args, reflect.ValueOf(x)) + } + out := mv.Call(args)[0].Interface() + if m.Name == "Int" || m.Name == "Intn" { + out = int64(out.(int)) + } + if *printgolden { + var val string + big := int64(1 << 60) + if int64(int(big)) != big && (m.Name == "Int" || m.Name == "Intn") { + // 32-bit machine cannot print 64-bit results + val = "truncated" + } else if reflect.TypeOf(out).Kind() == reflect.Slice { + val = fmt.Sprintf("%#v", out) + } else { + val = fmt.Sprintf("%T(%v)", out, out) + } + fmt.Printf("\t%s, // %s(%s)\n", val, m.Name, argstr) + } else { + want := regressGolden[p] + if m.Name == "Int" { + want = int64(int(uint(want.(int64)) << 1 >> 1)) + } + if !reflect.DeepEqual(out, want) { + t.Errorf("r.%s(%s) = %v, want %v", m.Name, argstr, out, want) + } + } + p++ + } + } + if *printgolden { + fmt.Printf("}\n") + } +} + +var regressGolden = []interface{}{ + float64(4.668112973579268), // ExpFloat64() + float64(0.1601593871172866), // ExpFloat64() + float64(3.0465834105636), // ExpFloat64() + float64(0.06385839451671879), // ExpFloat64() + float64(1.8578917487258961), // ExpFloat64() + float64(0.784676123472182), // ExpFloat64() + float64(0.11225477361256932), // ExpFloat64() + float64(0.20173283329802255), // ExpFloat64() + float64(0.3468619496201105), // ExpFloat64() + float64(0.35601103454384536), // ExpFloat64() + float64(0.888376329507869), // ExpFloat64() + float64(1.4081362450365698), // ExpFloat64() + float64(1.0077753823151994), // ExpFloat64() + float64(0.23594100766227588), // ExpFloat64() + float64(2.777245612300007), // ExpFloat64() + float64(0.5202997830662377), // ExpFloat64() + float64(1.2842705247770294), // ExpFloat64() + float64(0.030307408362776206), // ExpFloat64() + float64(2.204156824853721), // ExpFloat64() + float64(2.09891923895058), // ExpFloat64() + float32(0.94519615), // Float32() + float32(0.24496509), // Float32() + float32(0.65595627), // Float32() + float32(0.05434384), // Float32() + float32(0.3675872), // Float32() + float32(0.28948045), // Float32() + float32(0.1924386), // Float32() + float32(0.65533215), // Float32() + float32(0.8971697), // Float32() + float32(0.16735445), // Float32() + float32(0.28858566), // Float32() + float32(0.9026048), // Float32() + float32(0.84978026), // Float32() + float32(0.2730468), // Float32() + float32(0.6090802), // Float32() + float32(0.253656), // Float32() + float32(0.7746542), // Float32() + float32(0.017480763), // Float32() + float32(0.78707397), // Float32() + float32(0.7993937), // Float32() + float64(0.9451961492941164), // Float64() + float64(0.24496508529377975), // Float64() + float64(0.6559562651954052), // Float64() + float64(0.05434383959970039), // Float64() + float64(0.36758720663245853), // Float64() + float64(0.2894804331565928), // Float64() + float64(0.19243860967493215), // Float64() + float64(0.6553321508148324), // Float64() + float64(0.897169713149801), // Float64() + float64(0.16735444255905835), // Float64() + float64(0.2885856518054551), // Float64() + float64(0.9026048462705047), // Float64() + float64(0.8497802817628735), // Float64() + float64(0.2730468047134829), // Float64() + float64(0.6090801919903561), // Float64() + float64(0.25365600644283687), // Float64() + float64(0.7746542391859803), // Float64() + float64(0.017480762156647272), // Float64() + float64(0.7870739563039942), // Float64() + float64(0.7993936979594545), // Float64() + int64(8717895732742165505), // Int() + int64(2259404117704393152), // Int() + int64(6050128673802995827), // Int() + int64(501233450539197794), // Int() + int64(3390393562759376202), // Int() + int64(2669985732393126063), // Int() + int64(1774932891286980153), // Int() + int64(6044372234677422456), // Int() + int64(8274930044578894929), // Int() + int64(1543572285742637646), // Int() + int64(2661732831099943416), // Int() + int64(8325060299420976708), // Int() + int64(7837839688282259259), // Int() + int64(2518412263346885298), // Int() + int64(5617773211005988520), // Int() + int64(2339563716805116249), // Int() + int64(7144924247938981575), // Int() + int64(161231572858529631), // Int() + int64(7259475919510918339), // Int() + int64(7373105480197164748), // Int() + int32(2029793274), // Int31() + int32(526058514), // Int31() + int32(1408655353), // Int31() + int32(116702506), // Int31() + int32(789387515), // Int31() + int32(621654496), // Int31() + int32(413258767), // Int31() + int32(1407315077), // Int31() + int32(1926657288), // Int31() + int32(359390928), // Int31() + int32(619732968), // Int31() + int32(1938329147), // Int31() + int32(1824889259), // Int31() + int32(586363548), // Int31() + int32(1307989752), // Int31() + int32(544722126), // Int31() + int32(1663557311), // Int31() + int32(37539650), // Int31() + int32(1690228450), // Int31() + int32(1716684894), // Int31() + int32(0), // Int31n(1) + int32(4), // Int31n(10) + int32(25), // Int31n(32) + int32(310570), // Int31n(1048576) + int32(857611), // Int31n(1048577) + int32(621654496), // Int31n(1000000000) + int32(413258767), // Int31n(1073741824) + int32(1407315077), // Int31n(2147483646) + int32(1926657288), // Int31n(2147483647) + int32(0), // Int31n(1) + int32(8), // Int31n(10) + int32(27), // Int31n(32) + int32(367019), // Int31n(1048576) + int32(209005), // Int31n(1048577) + int32(307989752), // Int31n(1000000000) + int32(544722126), // Int31n(1073741824) + int32(1663557311), // Int31n(2147483646) + int32(37539650), // Int31n(2147483647) + int32(0), // Int31n(1) + int32(4), // Int31n(10) + int64(8717895732742165505), // Int63() + int64(2259404117704393152), // Int63() + int64(6050128673802995827), // Int63() + int64(501233450539197794), // Int63() + int64(3390393562759376202), // Int63() + int64(2669985732393126063), // Int63() + int64(1774932891286980153), // Int63() + int64(6044372234677422456), // Int63() + int64(8274930044578894929), // Int63() + int64(1543572285742637646), // Int63() + int64(2661732831099943416), // Int63() + int64(8325060299420976708), // Int63() + int64(7837839688282259259), // Int63() + int64(2518412263346885298), // Int63() + int64(5617773211005988520), // Int63() + int64(2339563716805116249), // Int63() + int64(7144924247938981575), // Int63() + int64(161231572858529631), // Int63() + int64(7259475919510918339), // Int63() + int64(7373105480197164748), // Int63() + int64(0), // Int63n(1) + int64(2), // Int63n(10) + int64(19), // Int63n(32) + int64(959842), // Int63n(1048576) + int64(688912), // Int63n(1048577) + int64(393126063), // Int63n(1000000000) + int64(89212473), // Int63n(1073741824) + int64(834026388), // Int63n(2147483646) + int64(1577188963), // Int63n(2147483647) + int64(543572285742637646), // Int63n(1000000000000000000) + int64(355889821886249464), // Int63n(1152921504606846976) + int64(8325060299420976708), // Int63n(9223372036854775806) + int64(7837839688282259259), // Int63n(9223372036854775807) + int64(0), // Int63n(1) + int64(0), // Int63n(10) + int64(25), // Int63n(32) + int64(679623), // Int63n(1048576) + int64(882178), // Int63n(1048577) + int64(510918339), // Int63n(1000000000) + int64(782454476), // Int63n(1073741824) + int64(0), // Intn(1) + int64(4), // Intn(10) + int64(25), // Intn(32) + int64(310570), // Intn(1048576) + int64(857611), // Intn(1048577) + int64(621654496), // Intn(1000000000) + int64(413258767), // Intn(1073741824) + int64(1407315077), // Intn(2147483646) + int64(1926657288), // Intn(2147483647) + int64(543572285742637646), // Intn(1000000000000000000) + int64(355889821886249464), // Intn(1152921504606846976) + int64(8325060299420976708), // Intn(9223372036854775806) + int64(7837839688282259259), // Intn(9223372036854775807) + int64(0), // Intn(1) + int64(2), // Intn(10) + int64(14), // Intn(32) + int64(515775), // Intn(1048576) + int64(839455), // Intn(1048577) + int64(690228450), // Intn(1000000000) + int64(642943070), // Intn(1073741824) + float64(-0.28158587086436215), // NormFloat64() + float64(0.570933095808067), // NormFloat64() + float64(-1.6920196326157044), // NormFloat64() + float64(0.1996229111693099), // NormFloat64() + float64(1.9195199291234621), // NormFloat64() + float64(0.8954838794918353), // NormFloat64() + float64(0.41457072128813166), // NormFloat64() + float64(-0.48700161491544713), // NormFloat64() + float64(-0.1684059662402393), // NormFloat64() + float64(0.37056410998929545), // NormFloat64() + float64(1.0156889027029008), // NormFloat64() + float64(-0.5174422210625114), // NormFloat64() + float64(-0.5565834214413804), // NormFloat64() + float64(0.778320596648391), // NormFloat64() + float64(-1.8970718197702225), // NormFloat64() + float64(0.5229525761688676), // NormFloat64() + float64(-1.5515595563231523), // NormFloat64() + float64(0.0182029289376123), // NormFloat64() + float64(-0.6820951356608795), // NormFloat64() + float64(-0.5987943422687668), // NormFloat64() + []int{}, // Perm(0) + []int{0}, // Perm(1) + []int{0, 4, 1, 3, 2}, // Perm(5) + []int{3, 1, 0, 4, 7, 5, 2, 6}, // Perm(8) + []int{5, 0, 3, 6, 7, 4, 2, 1, 8}, // Perm(9) + []int{4, 5, 0, 2, 6, 9, 3, 1, 8, 7}, // Perm(10) + []int{14, 2, 0, 8, 3, 5, 13, 12, 1, 4, 6, 7, 11, 9, 15, 10}, // Perm(16) + []int{}, // Perm(0) + []int{0}, // Perm(1) + []int{3, 0, 1, 2, 4}, // Perm(5) + []int{5, 1, 2, 0, 4, 7, 3, 6}, // Perm(8) + []int{4, 0, 6, 8, 1, 5, 2, 7, 3}, // Perm(9) + []int{8, 6, 1, 7, 5, 4, 3, 2, 9, 0}, // Perm(10) + []int{0, 3, 13, 2, 15, 4, 10, 1, 8, 14, 7, 6, 12, 9, 5, 11}, // Perm(16) + []int{}, // Perm(0) + []int{0}, // Perm(1) + []int{0, 4, 2, 1, 3}, // Perm(5) + []int{2, 1, 7, 0, 6, 3, 4, 5}, // Perm(8) + []int{8, 7, 5, 3, 4, 6, 0, 1, 2}, // Perm(9) + []int{1, 0, 2, 5, 7, 6, 9, 8, 3, 4}, // Perm(10) + uint32(4059586549), // Uint32() + uint32(1052117029), // Uint32() + uint32(2817310706), // Uint32() + uint32(233405013), // Uint32() + uint32(1578775030), // Uint32() + uint32(1243308993), // Uint32() + uint32(826517535), // Uint32() + uint32(2814630155), // Uint32() + uint32(3853314576), // Uint32() + uint32(718781857), // Uint32() + uint32(1239465936), // Uint32() + uint32(3876658295), // Uint32() + uint32(3649778518), // Uint32() + uint32(1172727096), // Uint32() + uint32(2615979505), // Uint32() + uint32(1089444252), // Uint32() + uint32(3327114623), // Uint32() + uint32(75079301), // Uint32() + uint32(3380456901), // Uint32() + uint32(3433369789), // Uint32() +} diff --git a/src/math/rand/rng.go b/src/math/rand/rng.go new file mode 100644 index 000000000..947c49f0f --- /dev/null +++ b/src/math/rand/rng.go @@ -0,0 +1,246 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand + +/* + * Uniform distribution + * + * algorithm by + * DP Mitchell and JA Reeds + */ + +const ( + _LEN = 607 + _TAP = 273 + _MAX = 1 << 63 + _MASK = _MAX - 1 + _A = 48271 + _M = (1 << 31) - 1 + _Q = 44488 + _R = 3399 +) + +var ( + // cooked random numbers + // the state of the rng + // after 780e10 iterations + rng_cooked [_LEN]int64 = [...]int64{ + 5041579894721019882, 4646389086726545243, 1395769623340756751, 5333664234075297259, + 2875692520355975054, 9033628115061424579, 7143218595135194537, 4812947590706362721, + 7937252194349799378, 5307299880338848416, 8209348851763925077, 2115741599318814044, + 4593015457530856296, 8140875735541888011, 3319429241265089026, 8619815648190321034, + 1727074043483619500, 113108499721038619, 4569519971459345583, 5062833859075314731, + 2387618771259064424, 2716131344356686112, 6559392774825876886, 7650093201692370310, + 7684323884043752161, 257867835996031390, 6593456519409015164, 271327514973697897, + 2789386447340118284, 1065192797246149621, 3344507881999356393, 4459797941780066633, + 7465081662728599889, 1014950805555097187, 4449440729345990775, 3481109366438502643, + 2418672789110888383, 5796562887576294778, 4484266064449540171, 3738982361971787048, + 4523597184512354423, 10530508058128498, 8633833783282346118, 2625309929628791628, + 8660405965245884302, 10162832508971942, 6540714680961817391, 7031802312784620857, + 6240911277345944669, 831864355460801054, 8004434137542152891, 2116287251661052151, + 2202309800992166967, 9161020366945053561, 4069299552407763864, 4936383537992622449, + 457351505131524928, 342195045928179354, 2847771682816600509, 2068020115986376518, + 4368649989588021065, 887231587095185257, 5563591506886576496, 6816225200251950296, + 5616972787034086048, 8471809303394836566, 1686575021641186857, 4045484338074262002, + 4244156215201778923, 7848217333783577387, 5632136521049761902, 833283142057835272, + 9029726508369077193, 3243583134664087292, 4316371101804477087, 8937849979965997980, + 6446940406810434101, 1679342092332374735, 6050638460742422078, 6993520719509581582, + 7640877852514293609, 5881353426285907985, 812786550756860885, 4541845584483343330, + 2725470216277009086, 4980675660146853729, 5210769080603236061, 8894283318990530821, + 6326442804750084282, 1495812843684243920, 7069751578799128019, 7370257291860230865, + 6756929275356942261, 4706794511633873654, 7824520467827898663, 8549875090542453214, + 33650829478596156, 1328918435751322643, 7297902601803624459, 1011190183918857495, + 2238025036817854944, 5147159997473910359, 896512091560522982, 2659470849286379941, + 6097729358393448602, 1731725986304753684, 4106255841983812711, 8327155210721535508, + 8477511620686074402, 5803876044675762232, 8435417780860221662, 5988852856651071244, + 4715837297103951910, 7566171971264485114, 505808562678895611, 5070098180695063370, + 842110666775871513, 572156825025677802, 1791881013492340891, 3393267094866038768, + 3778721850472236509, 2352769483186201278, 1292459583847367458, 8897907043675088419, + 5781809037144163536, 2733958794029492513, 5092019688680754699, 8996124554772526841, + 4234737173186232084, 5027558287275472836, 4635198586344772304, 8687338893267139351, + 5907508150730407386, 784756255473944452, 972392927514829904, 5422057694808175112, + 5158420642969283891, 9048531678558643225, 2407211146698877100, 7583282216521099569, + 3940796514530962282, 3341174631045206375, 3095313889586102949, 7405321895688238710, + 5832080132947175283, 7890064875145919662, 8184139210799583195, 1149859861409226130, + 1464597243840211302, 4641648007187991873, 3516491885471466898, 956288521791657692, + 6657089965014657519, 5220884358887979358, 1796677326474620641, 5340761970648932916, + 1147977171614181568, 5066037465548252321, 2574765911837859848, 1085848279845204775, + 3350107529868390359, 6116438694366558490, 2107701075971293812, 1803294065921269267, + 2469478054175558874, 7368243281019965984, 3791908367843677526, 185046971116456637, + 2257095756513439648, 7217693971077460129, 909049953079504259, 7196649268545224266, + 5637660345400869599, 3955544945427965183, 8057528650917418961, 4139268440301127643, + 6621926588513568059, 1373361136802681441, 6527366231383600011, 3507654575162700890, + 9202058512774729859, 1954818376891585542, 6640380907130175705, 8299563319178235687, + 3901867355218954373, 7046310742295574065, 6847195391333990232, 1572638100518868053, + 8850422670118399721, 3631909142291992901, 5158881091950831288, 2882958317343121593, + 4763258931815816403, 6280052734341785344, 4243789408204964850, 2043464728020827976, + 6545300466022085465, 4562580375758598164, 5495451168795427352, 1738312861590151095, + 553004618757816492, 6895160632757959823, 8233623922264685171, 7139506338801360852, + 8550891222387991669, 5535668688139305547, 2430933853350256242, 5401941257863201076, + 8159640039107728799, 6157493831600770366, 7632066283658143750, 6308328381617103346, + 3681878764086140361, 3289686137190109749, 6587997200611086848, 244714774258135476, + 4079788377417136100, 8090302575944624335, 2945117363431356361, 864324395848741045, + 3009039260312620700, 8430027460082534031, 401084700045993341, 7254622446438694921, + 4707864159563588614, 5640248530963493951, 5982507712689997893, 3315098242282210105, + 5503847578771918426, 3941971367175193882, 8118566580304798074, 3839261274019871296, + 7062410411742090847, 741381002980207668, 6027994129690250817, 2497829994150063930, + 6251390334426228834, 1368930247903518833, 8809096399316380241, 6492004350391900708, + 2462145737463489636, 404828418920299174, 4153026434231690595, 261785715255475940, + 5464715384600071357, 592710404378763017, 6764129236657751224, 8513655718539357449, + 5820343663801914208, 385298524683789911, 5224135003438199467, 6303131641338802145, + 7150122561309371392, 368107899140673753, 3115186834558311558, 2915636353584281051, + 4782583894627718279, 6718292300699989587, 8387085186914375220, 3387513132024756289, + 4654329375432538231, 8930667561363381602, 5374373436876319273, 7623042350483453954, + 7725442901813263321, 9186225467561587250, 4091027289597503355, 2357631606492579800, + 2530936820058611833, 1636551876240043639, 5564664674334965799, 1452244145334316253, + 2061642381019690829, 1279580266495294036, 9108481583171221009, 6023278686734049809, + 5007630032676973346, 2153168792952589781, 6720334534964750538, 6041546491134794105, + 3433922409283786309, 2285479922797300912, 3110614940896576130, 6366559590722842893, + 5418791419666136509, 7163298419643543757, 4891138053923696990, 580618510277907015, + 1684034065251686769, 4429514767357295841, 330346578555450005, 1119637995812174675, + 7177515271653460134, 4589042248470800257, 7693288629059004563, 143607045258444228, + 246994305896273627, 866417324803099287, 6473547110565816071, 3092379936208876896, + 2058427839513754051, 5133784708526867938, 8785882556301281247, 6149332666841167611, + 8585842181454472135, 6137678347805511274, 2070447184436970006, 5708223427705576541, + 5999657892458244504, 4358391411789012426, 325123008708389849, 6837621693887290924, + 4843721905315627004, 6010651222149276415, 5398352198963874652, 4602025990114250980, + 1044646352569048800, 9106614159853161675, 829256115228593269, 4919284369102997000, + 2681532557646850893, 3681559472488511871, 5307999518958214035, 6334130388442829274, + 2658708232916537604, 1163313865052186287, 581945337509520675, 3648778920718647903, + 4423673246306544414, 1620799783996955743, 220828013409515943, 8150384699999389761, + 4287360518296753003, 4590000184845883843, 5513660857261085186, 6964829100392774275, + 478991688350776035, 8746140185685648781, 228500091334420247, 1356187007457302238, + 3019253992034194581, 3152601605678500003, 430152752706002213, 5559581553696971176, + 4916432985369275664, 663574931734554391, 3420773838927732076, 2868348622579915573, + 1999319134044418520, 3328689518636282723, 2587672709781371173, 1517255313529399333, + 3092343956317362483, 3662252519007064108, 972445599196498113, 7664865435875959367, + 1708913533482282562, 6917817162668868494, 3217629022545312900, 2570043027221707107, + 8739788839543624613, 2488075924621352812, 4694002395387436668, 4559628481798514356, + 2997203966153298104, 1282559373026354493, 240113143146674385, 8665713329246516443, + 628141331766346752, 4571950817186770476, 1472811188152235408, 7596648026010355826, + 6091219417754424743, 7834161864828164065, 7103445518877254909, 4390861237357459201, + 4442653864240571734, 8903482404847331368, 622261699494173647, 6037261250297213248, + 504404948065709118, 7275215526217113061, 1011176780856001400, 2194750105623461063, + 2623071828615234808, 5157313728073836108, 3738405111966602044, 2539767524076729570, + 2467284396349269342, 5256026990536851868, 7841086888628396109, 6640857538655893162, + 1202087339038317498, 2113514992440715978, 7534350895342931403, 4925284734898484745, + 5145623771477493805, 8225140880134972332, 2719520354384050532, 9132346697815513771, + 4332154495710163773, 7137789594094346916, 6994721091344268833, 6667228574869048934, + 655440045726677499, 59934747298466858, 6124974028078036405, 8957774780655365418, + 2332206071942466437, 1701056712286369627, 3154897383618636503, 1637766181387607527, + 2460521277767576533, 197309393502684135, 643677854385267315, 2543179307861934850, + 4350769010207485119, 4754652089410667672, 2015595502641514512, 7999059458976458608, + 4287946071480840813, 8362686366770308971, 6486469209321732151, 3617727845841796026, + 7554353525834302244, 4450022655153542367, 1605195740213535749, 5327014565305508387, + 4626575813550328320, 2692222020597705149, 241045573717249868, 5098046974627094010, + 7916882295460730264, 884817090297530579, 5329160409530630596, 7790979528857726136, + 4955070238059373407, 4918537275422674302, 3008076183950404629, 3007769226071157901, + 2470346235617803020, 8928702772696731736, 7856187920214445904, 4474874585391974885, + 7900176660600710914, 2140571127916226672, 2425445057265199971, 2486055153341847830, + 4186670094382025798, 1883939007446035042, 8808666044074867985, 3734134241178479257, + 4065968871360089196, 6953124200385847784, 1305686814738899057, 1637739099014457647, + 3656125660947993209, 3966759634633167020, 3106378204088556331, 6328899822778449810, + 4565385105440252958, 1979884289539493806, 2331793186920865425, 3783206694208922581, + 8464961209802336085, 2843963751609577687, 3030678195484896323, 4793717574095772604, + 4459239494808162889, 402587895800087237, 8057891408711167515, 4541888170938985079, + 1042662272908816815, 5557303057122568958, 2647678726283249984, 2144477441549833761, + 5806352215355387087, 7117771003473903623, 5916597177708541638, 462597715452321361, + 8833658097025758785, 5970273481425315300, 563813119381731307, 2768349550652697015, + 1598828206250873866, 5206393647403558110, 6235043485709261823, 3152217402014639496, + 8469693267274066490, 125672920241807416, 5311079624024060938, 6663754932310491587, + 8736848295048751716, 4488039774992061878, 5923302823487327109, 140891791083103236, + 7414942793393574290, 7990420780896957397, 4317817392807076702, 3625184369705367340, + 2740722765288122703, 5743100009702758344, 5997898640509039159, 8854493341352484163, + 5242208035432907801, 701338899890987198, 7609280429197514109, 3020985755112334161, + 6651322707055512866, 2635195723621160615, 5144520864246028816, 1035086515727829828, + 1567242097116389047, 8172389260191636581, 6337820351429292273, 2163012566996458925, + 2743190902890262681, 1906367633221323427, 6011544915663598137, 5932255307352610768, + 2241128460406315459, 895504896216695588, 3094483003111372717, 4583857460292963101, + 9079887171656594975, 8839289181930711403, 5762740387243057873, 4225072055348026230, + 1838220598389033063, 3801620336801580414, 8823526620080073856, 1776617605585100335, + 7899055018877642622, 5421679761463003041, 5521102963086275121, 4248279443559365898, + 8735487530905098534, 1760527091573692978, 7142485049657745894, 8222656872927218123, + 4969531564923704323, 3394475942196872480, 6424174453260338141, 359248545074932887, + 3273651282831730598, 6797106199797138596, 3030918217665093212, 145600834617314036, + 6036575856065626233, 740416251634527158, 7080427635449935582, 6951781370868335478, + 399922722363687927, 294902314447253185, 7844950936339178523, 880320858634709042, + 6192655680808675579, 411604686384710388, 9026808440365124461, 6440783557497587732, + 4615674634722404292, 539897290441580544, 2096238225866883852, 8751955639408182687, + 1907224908052289603, 7381039757301768559, 6157238513393239656, 7749994231914157575, + 8629571604380892756, 5280433031239081479, 7101611890139813254, 2479018537985767835, + 7169176924412769570, 7942066497793203302, 1357759729055557688, 2278447439451174845, + 3625338785743880657, 6477479539006708521, 8976185375579272206, 5511371554711836120, + 1326024180520890843, 7537449876596048829, 5464680203499696154, 3189671183162196045, + 6346751753565857109, 241159987320630307, 3095793449658682053, 8978332846736310159, + 2902794662273147216, 7208698530190629697, 7276901792339343736, 1732385229314443140, + 4133292154170828382, 2918308698224194548, 1519461397937144458, 5293934712616591764, + 4922828954023452664, 2879211533496425641, 5896236396443472108, 8465043815351752425, + 7329020396871624740, 8915471717014488588, 2944902635677463047, 7052079073493465134, + 8382142935188824023, 9103922860780351547, 4152330101494654406, + } +) + +type rngSource struct { + tap int // index into vec + feed int // index into vec + vec [_LEN]int64 // current feedback register +} + +// seed rng x[n+1] = 48271 * x[n] mod (2**31 - 1) +func seedrand(x int32) int32 { + hi := x / _Q + lo := x % _Q + x = _A*lo - _R*hi + if x < 0 { + x += _M + } + return x +} + +// Seed uses the provided seed value to initialize the generator to a deterministic state. +func (rng *rngSource) Seed(seed int64) { + rng.tap = 0 + rng.feed = _LEN - _TAP + + seed = seed % _M + if seed < 0 { + seed += _M + } + if seed == 0 { + seed = 89482311 + } + + x := int32(seed) + for i := -20; i < _LEN; i++ { + x = seedrand(x) + if i >= 0 { + var u int64 + u = int64(x) << 40 + x = seedrand(x) + u ^= int64(x) << 20 + x = seedrand(x) + u ^= int64(x) + u ^= rng_cooked[i] + rng.vec[i] = u & _MASK + } + } +} + +// Int63 returns a non-negative pseudo-random 63-bit integer as an int64. +func (rng *rngSource) Int63() int64 { + rng.tap-- + if rng.tap < 0 { + rng.tap += _LEN + } + + rng.feed-- + if rng.feed < 0 { + rng.feed += _LEN + } + + x := (rng.vec[rng.feed] + rng.vec[rng.tap]) & _MASK + rng.vec[rng.feed] = x + return x +} diff --git a/src/math/rand/zipf.go b/src/math/rand/zipf.go new file mode 100644 index 000000000..8db2c6f5b --- /dev/null +++ b/src/math/rand/zipf.go @@ -0,0 +1,75 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// W.Hormann, G.Derflinger: +// "Rejection-Inversion to Generate Variates +// from Monotone Discrete Distributions" +// http://eeyore.wu-wien.ac.at/papers/96-04-04.wh-der.ps.gz + +package rand + +import "math" + +// A Zipf generates Zipf distributed variates. +type Zipf struct { + r *Rand + imax float64 + v float64 + q float64 + s float64 + oneminusQ float64 + oneminusQinv float64 + hxm float64 + hx0minusHxm float64 +} + +func (z *Zipf) h(x float64) float64 { + return math.Exp(z.oneminusQ*math.Log(z.v+x)) * z.oneminusQinv +} + +func (z *Zipf) hinv(x float64) float64 { + return math.Exp(z.oneminusQinv*math.Log(z.oneminusQ*x)) - z.v +} + +// NewZipf returns a Zipf generating variates p(k) on [0, imax] +// proportional to (v+k)**(-s) where s>1 and k>=0, and v>=1. +func NewZipf(r *Rand, s float64, v float64, imax uint64) *Zipf { + z := new(Zipf) + if s <= 1.0 || v < 1 { + return nil + } + z.r = r + z.imax = float64(imax) + z.v = v + z.q = s + z.oneminusQ = 1.0 - z.q + z.oneminusQinv = 1.0 / z.oneminusQ + z.hxm = z.h(z.imax + 0.5) + z.hx0minusHxm = z.h(0.5) - math.Exp(math.Log(z.v)*(-z.q)) - z.hxm + z.s = 1 - z.hinv(z.h(1.5)-math.Exp(-z.q*math.Log(z.v+1.0))) + return z +} + +// Uint64 returns a value drawn from the Zipf distribution described +// by the Zipf object. +func (z *Zipf) Uint64() uint64 { + if z == nil { + panic("rand: nil Zipf") + } + k := 0.0 + + for { + r := z.r.Float64() // r on [0,1] + ur := z.hxm + r*z.hx0minusHxm + x := z.hinv(ur) + k = math.Floor(x + 0.5) + if k-x <= z.s { + break + } + if ur >= z.h(k+0.5)-math.Exp(-math.Log(k+z.v)*z.q) { + break + } + } + return uint64(k) +} diff --git a/src/math/remainder.go b/src/math/remainder.go new file mode 100644 index 000000000..9a4e4154c --- /dev/null +++ b/src/math/remainder.go @@ -0,0 +1,85 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code and the comment below are from +// FreeBSD's /usr/src/lib/msun/src/e_remainder.c and came +// with this notice. The go code is a simplified version of +// the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_remainder(x,y) +// Return : +// returns x REM y = x - [x/y]*y as if in infinite +// precision arithmetic, where [x/y] is the (infinite bit) +// integer nearest x/y (in half way cases, choose the even one). +// Method : +// Based on Mod() returning x - [x/y]chopped * y exactly. + +// Remainder returns the IEEE 754 floating-point remainder of x/y. +// +// Special cases are: +// Remainder(±Inf, y) = NaN +// Remainder(NaN, y) = NaN +// Remainder(x, 0) = NaN +// Remainder(x, ±Inf) = x +// Remainder(x, NaN) = NaN +func Remainder(x, y float64) float64 + +func remainder(x, y float64) float64 { + const ( + Tiny = 4.45014771701440276618e-308 // 0x0020000000000000 + HalfMax = MaxFloat64 / 2 + ) + // special cases + switch { + case IsNaN(x) || IsNaN(y) || IsInf(x, 0) || y == 0: + return NaN() + case IsInf(y, 0): + return x + } + sign := false + if x < 0 { + x = -x + sign = true + } + if y < 0 { + y = -y + } + if x == y { + return 0 + } + if y <= HalfMax { + x = Mod(x, y+y) // now x < 2y + } + if y < Tiny { + if x+x > y { + x -= y + if x+x >= y { + x -= y + } + } + } else { + yHalf := 0.5 * y + if x > yHalf { + x -= y + if x >= yHalf { + x -= y + } + } + } + if sign { + x = -x + } + return x +} diff --git a/src/math/remainder_386.s b/src/math/remainder_386.s new file mode 100644 index 000000000..318fa2c46 --- /dev/null +++ b/src/math/remainder_386.s @@ -0,0 +1,17 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Remainder(x, y float64) float64 +TEXT ·Remainder(SB),NOSPLIT,$0 + FMOVD y+8(FP), F0 // F0=y + FMOVD x+0(FP), F0 // F0=x, F1=y + FPREM1 // F0=reduced_x, F1=y + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE -3(PC) // jump if reduction incomplete + FMOVDP F0, F1 // F0=x-q*y + FMOVDP F0, ret+16(FP) + RET diff --git a/src/math/remainder_amd64.s b/src/math/remainder_amd64.s new file mode 100644 index 000000000..f7fda99d8 --- /dev/null +++ b/src/math/remainder_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Remainder(SB),NOSPLIT,$0 + JMP ·remainder(SB) diff --git a/src/math/remainder_amd64p32.s b/src/math/remainder_amd64p32.s new file mode 100644 index 000000000..cd5cf55ff --- /dev/null +++ b/src/math/remainder_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "remainder_amd64.s" diff --git a/src/math/remainder_arm.s b/src/math/remainder_arm.s new file mode 100644 index 000000000..1ae597a60 --- /dev/null +++ b/src/math/remainder_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Remainder(SB),NOSPLIT,$0 + B ·remainder(SB) diff --git a/src/math/signbit.go b/src/math/signbit.go new file mode 100644 index 000000000..670cc1a66 --- /dev/null +++ b/src/math/signbit.go @@ -0,0 +1,10 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Signbit returns true if x is negative or negative zero. +func Signbit(x float64) bool { + return Float64bits(x)&(1<<63) != 0 +} diff --git a/src/math/sin.go b/src/math/sin.go new file mode 100644 index 000000000..ed85f21be --- /dev/null +++ b/src/math/sin.go @@ -0,0 +1,224 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point sine and cosine. +*/ + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// +// sin.c +// +// Circular sine +// +// SYNOPSIS: +// +// double x, y, sin(); +// y = sin( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the sine is approximated by +// x + x**3 P(x**2). +// Between pi/4 and pi/2 the cosine is represented as +// 1 - x**2 Q(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// DEC 0, 10 150000 3.0e-17 7.8e-18 +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. +// +// cos.c +// +// Circular cosine +// +// SYNOPSIS: +// +// double x, y, cos(); +// y = cos( x ); +// +// DESCRIPTION: +// +// Range reduction is into intervals of pi/4. The reduction error is nearly +// eliminated by contriving an extended precision modular arithmetic. +// +// Two polynomial approximating functions are employed. +// Between 0 and pi/4 the cosine is approximated by +// 1 - x**2 Q(x**2). +// Between pi/4 and pi/2 the sine is represented as +// x + x**3 P(x**2). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -1.07e9,+1.07e9 130000 2.1e-16 5.4e-17 +// DEC 0,+1.07e9 17000 3.0e-17 7.2e-18 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// sin coefficients +var _sin = [...]float64{ + 1.58962301576546568060E-10, // 0x3de5d8fd1fd19ccd + -2.50507477628578072866E-8, // 0xbe5ae5e5a9291f5d + 2.75573136213857245213E-6, // 0x3ec71de3567d48a1 + -1.98412698295895385996E-4, // 0xbf2a01a019bfdf03 + 8.33333333332211858878E-3, // 0x3f8111111110f7d0 + -1.66666666666666307295E-1, // 0xbfc5555555555548 +} + +// cos coefficients +var _cos = [...]float64{ + -1.13585365213876817300E-11, // 0xbda8fa49a0861a9b + 2.08757008419747316778E-9, // 0x3e21ee9d7b4e3f05 + -2.75573141792967388112E-7, // 0xbe927e4f7eac4bc6 + 2.48015872888517045348E-5, // 0x3efa01a019c844f5 + -1.38888888888730564116E-3, // 0xbf56c16c16c14f91 + 4.16666666666665929218E-2, // 0x3fa555555555554b +} + +// Cos returns the cosine of the radian argument x. +// +// Special cases are: +// Cos(±Inf) = NaN +// Cos(NaN) = NaN +func Cos(x float64) float64 + +func cos(x float64) float64 { + const ( + PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170, + M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi + ) + // special cases + switch { + case IsNaN(x) || IsInf(x, 0): + return NaN() + } + + // make argument positive + sign := false + if x < 0 { + x = -x + } + + j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle + y := float64(j) // integer part of x/(Pi/4), as float + + // map zeros to origin + if j&1 == 1 { + j += 1 + y += 1 + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + if j > 3 { + j -= 4 + sign = !sign + } + if j > 1 { + sign = !sign + } + + z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + zz := z * z + if j == 1 || j == 2 { + y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + } else { + y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + } + if sign { + y = -y + } + return y +} + +// Sin returns the sine of the radian argument x. +// +// Special cases are: +// Sin(±0) = ±0 +// Sin(±Inf) = NaN +// Sin(NaN) = NaN +func Sin(x float64) float64 + +func sin(x float64) float64 { + const ( + PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170, + M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi + ) + // special cases + switch { + case x == 0 || IsNaN(x): + return x // return ±0 || NaN() + case IsInf(x, 0): + return NaN() + } + + // make argument positive but save the sign + sign := false + if x < 0 { + x = -x + sign = true + } + + j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle + y := float64(j) // integer part of x/(Pi/4), as float + + // map zeros to origin + if j&1 == 1 { + j += 1 + y += 1 + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + // reflect in x axis + if j > 3 { + sign = !sign + j -= 4 + } + + z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + zz := z * z + if j == 1 || j == 2 { + y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + } else { + y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + } + if sign { + y = -y + } + return y +} diff --git a/src/math/sin_386.s b/src/math/sin_386.s new file mode 100644 index 000000000..ccc8e64be --- /dev/null +++ b/src/math/sin_386.s @@ -0,0 +1,47 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Cos(x float64) float64 +TEXT ·Cos(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FCOS // F0=cos(x) if -2**63 < x < 2**63 + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE 3(PC) // jump if x outside range + FMOVDP F0, ret+8(FP) + RET + FLDPI // F0=Pi, F1=x + FADDD F0, F0 // F0=2*Pi, F1=x + FXCHD F0, F1 // F0=x, F1=2*Pi + FPREM1 // F0=reduced_x, F1=2*Pi + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE -3(PC) // jump if reduction incomplete + FMOVDP F0, F1 // F0=reduced_x + FCOS // F0=cos(reduced_x) + FMOVDP F0, ret+8(FP) + RET + +// func Sin(x float64) float64 +TEXT ·Sin(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSIN // F0=sin(x) if -2**63 < x < 2**63 + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE 3(PC) // jump if x outside range + FMOVDP F0, ret+8(FP) + RET + FLDPI // F0=Pi, F1=x + FADDD F0, F0 // F0=2*Pi, F1=x + FXCHD F0, F1 // F0=x, F1=2*Pi + FPREM1 // F0=reduced_x, F1=2*Pi + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE -3(PC) // jump if reduction incomplete + FMOVDP F0, F1 // F0=reduced_x + FSIN // F0=sin(reduced_x) + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/sin_amd64.s b/src/math/sin_amd64.s new file mode 100644 index 000000000..0c33cecef --- /dev/null +++ b/src/math/sin_amd64.s @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Sin(SB),NOSPLIT,$0 + JMP ·sin(SB) + +TEXT ·Cos(SB),NOSPLIT,$0 + JMP ·cos(SB) diff --git a/src/math/sin_amd64p32.s b/src/math/sin_amd64p32.s new file mode 100644 index 000000000..9f93eba20 --- /dev/null +++ b/src/math/sin_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "sin_amd64.s" diff --git a/src/math/sin_arm.s b/src/math/sin_arm.s new file mode 100644 index 000000000..467af3dea --- /dev/null +++ b/src/math/sin_arm.s @@ -0,0 +1,11 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Sin(SB),NOSPLIT,$0 + B ·sin(SB) + +TEXT ·Cos(SB),NOSPLIT,$0 + B ·cos(SB) diff --git a/src/math/sincos.go b/src/math/sincos.go new file mode 100644 index 000000000..718030319 --- /dev/null +++ b/src/math/sincos.go @@ -0,0 +1,69 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// Coefficients _sin[] and _cos[] are found in pkg/math/sin.go. + +// Sincos returns Sin(x), Cos(x). +// +// Special cases are: +// Sincos(±0) = ±0, 1 +// Sincos(±Inf) = NaN, NaN +// Sincos(NaN) = NaN, NaN +func Sincos(x float64) (sin, cos float64) + +func sincos(x float64) (sin, cos float64) { + const ( + PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170, + M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi + ) + // special cases + switch { + case x == 0: + return x, 1 // return ±0.0, 1.0 + case IsNaN(x) || IsInf(x, 0): + return NaN(), NaN() + } + + // make argument positive + sinSign, cosSign := false, false + if x < 0 { + x = -x + sinSign = true + } + + j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle + y := float64(j) // integer part of x/(Pi/4), as float + + if j&1 == 1 { // map zeros to origin + j += 1 + y += 1 + } + j &= 7 // octant modulo 2Pi radians (360 degrees) + if j > 3 { // reflect in x axis + j -= 4 + sinSign, cosSign = !sinSign, !cosSign + } + if j > 1 { + cosSign = !cosSign + } + + z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic + zz := z * z + cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5]) + sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5]) + if j == 1 || j == 2 { + sin, cos = cos, sin + } + if cosSign { + cos = -cos + } + if sinSign { + sin = -sin + } + return +} diff --git a/src/math/sincos_386.s b/src/math/sincos_386.s new file mode 100644 index 000000000..83af5016e --- /dev/null +++ b/src/math/sincos_386.s @@ -0,0 +1,28 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Sincos(x float64) (sin, cos float64) +TEXT ·Sincos(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FSINCOS // F0=cos(x), F1=sin(x) if -2**63 < x < 2**63 + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE 4(PC) // jump if x outside range + FMOVDP F0, cos+16(FP) // F0=sin(x) + FMOVDP F0, sin+8(FP) + RET + FLDPI // F0=Pi, F1=x + FADDD F0, F0 // F0=2*Pi, F1=x + FXCHD F0, F1 // F0=x, F1=2*Pi + FPREM1 // F0=reduced_x, F1=2*Pi + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE -3(PC) // jump if reduction incomplete + FMOVDP F0, F1 // F0=reduced_x + FSINCOS // F0=cos(reduced_x), F1=sin(reduced_x) + FMOVDP F0, cos+16(FP) // F0=sin(reduced_x) + FMOVDP F0, sin+8(FP) + RET diff --git a/src/math/sincos_amd64.s b/src/math/sincos_amd64.s new file mode 100644 index 000000000..59bf55f58 --- /dev/null +++ b/src/math/sincos_amd64.s @@ -0,0 +1,142 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// The method is based on a paper by Naoki Shibata: "Efficient evaluation +// methods of elementary functions suitable for SIMD computation", Proc. +// of International Supercomputing Conference 2010 (ISC'10), pp. 25 -- 32 +// (May 2010). The paper is available at +// http://www.springerlink.com/content/340228x165742104/ +// +// The original code and the constants below are from the author's +// implementation available at http://freshmeat.net/projects/sleef. +// The README file says, "The software is in public domain. +// You can use the software without any obligation." +// +// This code is a simplified version of the original. + +#define PosOne 0x3FF0000000000000 +#define PosInf 0x7FF0000000000000 +#define NaN 0x7FF8000000000001 +#define PI4A 0.7853981554508209228515625 // pi/4 split into three parts +#define PI4B 0.794662735614792836713604629039764404296875e-8 +#define PI4C 0.306161699786838294306516483068750264552437361480769e-16 +#define M4PI 1.273239544735162542821171882678754627704620361328125 // 4/pi +#define T0 1.0 +#define T1 -8.33333333333333333333333e-02 // (-1.0/12) +#define T2 2.77777777777777777777778e-03 // (+1.0/360) +#define T3 -4.96031746031746031746032e-05 // (-1.0/20160) +#define T4 5.51146384479717813051146e-07 // (+1.0/1814400) + +// func Sincos(d float64) (sin, cos float64) +TEXT ·Sincos(SB),NOSPLIT,$0 + // test for special cases + MOVQ $~(1<<63), DX // sign bit mask + MOVQ x+0(FP), BX + ANDQ BX, DX + JEQ isZero + MOVQ $PosInf, AX + CMPQ AX, DX + JLE isInfOrNaN + // Reduce argument + MOVQ BX, X7 // x7= d + MOVQ DX, X0 // x0= |d| + MOVSD $M4PI, X2 + MULSD X0, X2 + CVTTSD2SQ X2, BX // bx= q + MOVQ $1, AX + ANDQ BX, AX + ADDQ BX, AX + CVTSQ2SD AX, X2 + MOVSD $PI4A, X3 + MULSD X2, X3 + SUBSD X3, X0 + MOVSD $PI4B, X3 + MULSD X2, X3 + SUBSD X3, X0 + MOVSD $PI4C, X3 + MULSD X2, X3 + SUBSD X3, X0 + MULSD $0.125, X0 // x0= x, x7= d, bx= q + // Evaluate Taylor series + MULSD X0, X0 + MOVSD $T4, X2 + MULSD X0, X2 + ADDSD $T3, X2 + MULSD X0, X2 + ADDSD $T2, X2 + MULSD X0, X2 + ADDSD $T1, X2 + MULSD X0, X2 + ADDSD $T0, X2 + MULSD X2, X0 // x0= x, x7= d, bx= q + // Apply double angle formula + MOVSD $4.0, X2 + SUBSD X0, X2 + MULSD X2, X0 + MOVSD $4.0, X2 + SUBSD X0, X2 + MULSD X2, X0 + MOVSD $4.0, X2 + SUBSD X0, X2 + MULSD X2, X0 + MULSD $0.5, X0 // x0= x, x7= d, bx= q + // sin = sqrt((2 - x) * x) + MOVSD $2.0, X2 + SUBSD X0, X2 + MULSD X0, X2 + SQRTSD X2, X2 // x0= x, x2= z, x7= d, bx= q + // cos = 1 - x + MOVSD $1.0, X1 + SUBSD X0, X1 // x1= x, x2= z, x7= d, bx= q + // if ((q + 1) & 2) != 0 { sin, cos = cos, sin } + MOVQ $1, DX + ADDQ BX, DX + ANDQ $2, DX + SHRQ $1, DX + SUBQ $1, DX + MOVQ DX, X3 + // sin = (y & z) | (^y & x) + MOVAPD X2, X0 + ANDPD X3, X0 // x0= sin + MOVAPD X3, X4 + ANDNPD X1, X4 + ORPD X4, X0 // x0= sin, x1= x, x2= z, x3= y, x7= d, bx= q + // cos = (y & x) | (^y & z) + ANDPD X3, X1 // x1= cos + ANDNPD X2, X3 + ORPD X3, X1 // x0= sin, x1= cos, x7= d, bx= q + // if ((q & 4) != 0) != (d < 0) { sin = -sin } + MOVQ BX, AX + MOVQ $61, CX + SHLQ CX, AX + MOVQ AX, X3 + XORPD X7, X3 + MOVQ $(1<<63), AX + MOVQ AX, X2 // x2= -0.0 + ANDPD X2, X3 + ORPD X3, X0 // x0= sin, x1= cos, x2= -0.0, bx= q + // if ((q + 2) & 4) != 0 { cos = -cos } + MOVQ $2, AX + ADDQ AX, BX + MOVQ $61, CX + SHLQ CX, BX + MOVQ BX, X3 + ANDPD X2, X3 + ORPD X3, X1 // x0= sin, x1= cos + // return (sin, cos) + MOVSD X0, sin+8(FP) + MOVSD X1, cos+16(FP) + RET +isZero: // return (±0.0, 1.0) + MOVQ BX, sin+8(FP) + MOVQ $PosOne, AX + MOVQ AX, cos+16(FP) + RET +isInfOrNaN: // return (NaN, NaN) + MOVQ $NaN, AX + MOVQ AX, sin+8(FP) + MOVQ AX, cos+16(FP) + RET diff --git a/src/math/sincos_amd64p32.s b/src/math/sincos_amd64p32.s new file mode 100644 index 000000000..360e94d09 --- /dev/null +++ b/src/math/sincos_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "sincos_amd64.s" diff --git a/src/math/sincos_arm.s b/src/math/sincos_arm.s new file mode 100644 index 000000000..9fe048248 --- /dev/null +++ b/src/math/sincos_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Sincos(SB),NOSPLIT,$0 + B ·sincos(SB) diff --git a/src/math/sinh.go b/src/math/sinh.go new file mode 100644 index 000000000..139b911fe --- /dev/null +++ b/src/math/sinh.go @@ -0,0 +1,77 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point hyperbolic sine and cosine. + + The exponential func is called for arguments + greater in magnitude than 0.5. + + A series is used for arguments smaller in magnitude than 0.5. + + Cosh(x) is computed from the exponential func for + all arguments. +*/ + +// Sinh returns the hyperbolic sine of x. +// +// Special cases are: +// Sinh(±0) = ±0 +// Sinh(±Inf) = ±Inf +// Sinh(NaN) = NaN +func Sinh(x float64) float64 { + // The coefficients are #2029 from Hart & Cheney. (20.36D) + const ( + P0 = -0.6307673640497716991184787251e+6 + P1 = -0.8991272022039509355398013511e+5 + P2 = -0.2894211355989563807284660366e+4 + P3 = -0.2630563213397497062819489e+2 + Q0 = -0.6307673640497716991212077277e+6 + Q1 = 0.1521517378790019070696485176e+5 + Q2 = -0.173678953558233699533450911e+3 + ) + + sign := false + if x < 0 { + x = -x + sign = true + } + + var temp float64 + switch true { + case x > 21: + temp = Exp(x) / 2 + + case x > 0.5: + temp = (Exp(x) - Exp(-x)) / 2 + + default: + sq := x * x + temp = (((P3*sq+P2)*sq+P1)*sq + P0) * x + temp = temp / (((sq+Q2)*sq+Q1)*sq + Q0) + } + + if sign { + temp = -temp + } + return temp +} + +// Cosh returns the hyperbolic cosine of x. +// +// Special cases are: +// Cosh(±0) = 1 +// Cosh(±Inf) = +Inf +// Cosh(NaN) = NaN +func Cosh(x float64) float64 { + if x < 0 { + x = -x + } + if x > 21 { + return Exp(x) / 2 + } + return (Exp(x) + Exp(-x)) / 2 +} diff --git a/src/math/sqrt.go b/src/math/sqrt.go new file mode 100644 index 000000000..fdc869992 --- /dev/null +++ b/src/math/sqrt.go @@ -0,0 +1,143 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code and the long comment below are +// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and +// came with this notice. The go code is a simplified +// version of the original C. +// +// ==================================================== +// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +// +// Developed at SunPro, a Sun Microsystems, Inc. business. +// Permission to use, copy, modify, and distribute this +// software is freely granted, provided that this notice +// is preserved. +// ==================================================== +// +// __ieee754_sqrt(x) +// Return correctly rounded sqrt. +// ----------------------------------------- +// | Use the hardware sqrt if you have one | +// ----------------------------------------- +// Method: +// Bit by bit method using integer arithmetic. (Slow, but portable) +// 1. Normalization +// Scale x to y in [1,4) with even powers of 2: +// find an integer k such that 1 <= (y=x*2**(2k)) < 4, then +// sqrt(x) = 2**k * sqrt(y) +// 2. Bit by bit computation +// Let q = sqrt(y) truncated to i bit after binary point (q = 1), +// i 0 +// i+1 2 +// s = 2*q , and y = 2 * ( y - q ). (1) +// i i i i +// +// To compute q from q , one checks whether +// i+1 i +// +// -(i+1) 2 +// (q + 2 ) <= y. (2) +// i +// -(i+1) +// If (2) is false, then q = q ; otherwise q = q + 2 . +// i+1 i i+1 i +// +// With some algebraic manipulation, it is not difficult to see +// that (2) is equivalent to +// -(i+1) +// s + 2 <= y (3) +// i i +// +// The advantage of (3) is that s and y can be computed by +// i i +// the following recurrence formula: +// if (3) is false +// +// s = s , y = y ; (4) +// i+1 i i+1 i +// +// otherwise, +// -i -(i+1) +// s = s + 2 , y = y - s - 2 (5) +// i+1 i i+1 i i +// +// One may easily use induction to prove (4) and (5). +// Note. Since the left hand side of (3) contain only i+2 bits, +// it does not necessary to do a full (53-bit) comparison +// in (3). +// 3. Final rounding +// After generating the 53 bits result, we compute one more bit. +// Together with the remainder, we can decide whether the +// result is exact, bigger than 1/2ulp, or less than 1/2ulp +// (it will never equal to 1/2ulp). +// The rounding mode can be detected by checking whether +// huge + tiny is equal to huge, and whether huge - tiny is +// equal to huge for some floating point number "huge" and "tiny". +// +// +// Notes: Rounding mode detection omitted. The constants "mask", "shift", +// and "bias" are found in src/math/bits.go + +// Sqrt returns the square root of x. +// +// Special cases are: +// Sqrt(+Inf) = +Inf +// Sqrt(±0) = ±0 +// Sqrt(x < 0) = NaN +// Sqrt(NaN) = NaN +func Sqrt(x float64) float64 + +func sqrt(x float64) float64 { + // special cases + switch { + case x == 0 || IsNaN(x) || IsInf(x, 1): + return x + case x < 0: + return NaN() + } + ix := Float64bits(x) + // normalize x + exp := int((ix >> shift) & mask) + if exp == 0 { // subnormal x + for ix&1<<shift == 0 { + ix <<= 1 + exp-- + } + exp++ + } + exp -= bias // unbias exponent + ix &^= mask << shift + ix |= 1 << shift + if exp&1 == 1 { // odd exp, double x to make it even + ix <<= 1 + } + exp >>= 1 // exp = exp/2, exponent of square root + // generate sqrt(x) bit by bit + ix <<= 1 + var q, s uint64 // q = sqrt(x) + r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB + for r != 0 { + t := s + r + if t <= ix { + s = t + r + ix -= t + q += r + } + ix <<= 1 + r >>= 1 + } + // final rounding + if ix != 0 { // remainder, result not exact + q += q & 1 // round according to extra bit + } + ix = q>>1 + uint64(exp-1+bias)<<shift // significand + biased exponent + return Float64frombits(ix) +} + +func sqrtC(f float64, r *float64) { + *r = sqrt(f) +} diff --git a/src/math/sqrt_386.s b/src/math/sqrt_386.s new file mode 100644 index 000000000..5234a1e88 --- /dev/null +++ b/src/math/sqrt_386.s @@ -0,0 +1,12 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Sqrt(x float64) float64 +TEXT ·Sqrt(SB),NOSPLIT,$0 + FMOVD x+0(FP),F0 + FSQRT + FMOVDP F0,ret+8(FP) + RET diff --git a/src/math/sqrt_amd64.s b/src/math/sqrt_amd64.s new file mode 100644 index 000000000..443d83fe3 --- /dev/null +++ b/src/math/sqrt_amd64.s @@ -0,0 +1,11 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Sqrt(x float64) float64 +TEXT ·Sqrt(SB),NOSPLIT,$0 + SQRTSD x+0(FP), X0 + MOVSD X0, ret+8(FP) + RET diff --git a/src/math/sqrt_amd64p32.s b/src/math/sqrt_amd64p32.s new file mode 100644 index 000000000..d83a286c2 --- /dev/null +++ b/src/math/sqrt_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "sqrt_amd64.s" diff --git a/src/math/sqrt_arm.s b/src/math/sqrt_arm.s new file mode 100644 index 000000000..4f9dc2e03 --- /dev/null +++ b/src/math/sqrt_arm.s @@ -0,0 +1,12 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Sqrt(x float64) float64 +TEXT ·Sqrt(SB),NOSPLIT,$0 + MOVD x+0(FP),F0 + SQRTD F0,F0 + MOVD F0,ret+8(FP) + RET diff --git a/src/math/tan.go b/src/math/tan.go new file mode 100644 index 000000000..285eff1ab --- /dev/null +++ b/src/math/tan.go @@ -0,0 +1,130 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +/* + Floating-point tangent. +*/ + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// +// tan.c +// +// Circular tangent +// +// SYNOPSIS: +// +// double x, y, tan(); +// y = tan( x ); +// +// DESCRIPTION: +// +// Returns the circular tangent of the radian argument x. +// +// Range reduction is modulo pi/4. A rational function +// x + x**3 P(x**2)/Q(x**2) +// is employed in the basic interval [0, pi/4]. +// +// ACCURACY: +// Relative error: +// arithmetic domain # trials peak rms +// DEC +-1.07e9 44000 4.1e-17 1.0e-17 +// IEEE +-1.07e9 30000 2.9e-16 8.1e-17 +// +// Partial loss of accuracy begins to occur at x = 2**30 = 1.074e9. The loss +// is not gradual, but jumps suddenly to about 1 part in 10e7. Results may +// be meaningless for x > 2**49 = 5.6e14. +// [Accuracy loss statement from sin.go comments.] +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov + +// tan coefficients +var _tanP = [...]float64{ + -1.30936939181383777646E4, // 0xc0c992d8d24f3f38 + 1.15351664838587416140E6, // 0x413199eca5fc9ddd + -1.79565251976484877988E7, // 0xc1711fead3299176 +} +var _tanQ = [...]float64{ + 1.00000000000000000000E0, + 1.36812963470692954678E4, //0x40cab8a5eeb36572 + -1.32089234440210967447E6, //0xc13427bc582abc96 + 2.50083801823357915839E7, //0x4177d98fc2ead8ef + -5.38695755929454629881E7, //0xc189afe03cbe5a31 +} + +// Tan returns the tangent of the radian argument x. +// +// Special cases are: +// Tan(±0) = ±0 +// Tan(±Inf) = NaN +// Tan(NaN) = NaN +func Tan(x float64) float64 + +func tan(x float64) float64 { + const ( + PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts + PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000, + PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170, + M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi + ) + // special cases + switch { + case x == 0 || IsNaN(x): + return x // return ±0 || NaN() + case IsInf(x, 0): + return NaN() + } + + // make argument positive but save the sign + sign := false + if x < 0 { + x = -x + sign = true + } + + j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle + y := float64(j) // integer part of x/(Pi/4), as float + + /* map zeros and singularities to origin */ + if j&1 == 1 { + j += 1 + y += 1 + } + + z := ((x - y*PI4A) - y*PI4B) - y*PI4C + zz := z * z + + if zz > 1e-14 { + y = z + z*(zz*(((_tanP[0]*zz)+_tanP[1])*zz+_tanP[2])/((((zz+_tanQ[1])*zz+_tanQ[2])*zz+_tanQ[3])*zz+_tanQ[4])) + } else { + y = z + } + if j&2 == 2 { + y = -1 / y + } + if sign { + y = -y + } + return y +} diff --git a/src/math/tan_386.s b/src/math/tan_386.s new file mode 100644 index 000000000..f1bdae153 --- /dev/null +++ b/src/math/tan_386.s @@ -0,0 +1,28 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func Tan(x float64) float64 +TEXT ·Tan(SB),NOSPLIT,$0 + FMOVD x+0(FP), F0 // F0=x + FPTAN // F0=1, F1=tan(x) if -2**63 < x < 2**63 + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE 4(PC) // jump if x outside range + FMOVDP F0, F0 // F0=tan(x) + FMOVDP F0, ret+8(FP) + RET + FLDPI // F0=Pi, F1=x + FADDD F0, F0 // F0=2*Pi, F1=x + FXCHD F0, F1 // F0=x, F1=2*Pi + FPREM1 // F0=reduced_x, F1=2*Pi + FSTSW AX // AX=status word + ANDW $0x0400, AX + JNE -3(PC) // jump if reduction incomplete + FMOVDP F0, F1 // F0=reduced_x + FPTAN // F0=1, F1=tan(reduced_x) + FMOVDP F0, F0 // F0=tan(reduced_x) + FMOVDP F0, ret+8(FP) + RET diff --git a/src/math/tan_amd64.s b/src/math/tan_amd64.s new file mode 100644 index 000000000..39aa08061 --- /dev/null +++ b/src/math/tan_amd64.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Tan(SB),NOSPLIT,$0 + JMP ·tan(SB) diff --git a/src/math/tan_amd64p32.s b/src/math/tan_amd64p32.s new file mode 100644 index 000000000..9b3f70de7 --- /dev/null +++ b/src/math/tan_amd64p32.s @@ -0,0 +1,5 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "tan_amd64.s" diff --git a/src/math/tan_arm.s b/src/math/tan_arm.s new file mode 100644 index 000000000..36c7c128f --- /dev/null +++ b/src/math/tan_arm.s @@ -0,0 +1,8 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +TEXT ·Tan(SB),NOSPLIT,$0 + B ·tan(SB) diff --git a/src/math/tanh.go b/src/math/tanh.go new file mode 100644 index 000000000..cf0ffa192 --- /dev/null +++ b/src/math/tanh.go @@ -0,0 +1,97 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +// The original C code, the long comment, and the constants +// below were from http://netlib.sandia.gov/cephes/cmath/sin.c, +// available from http://www.netlib.org/cephes/cmath.tgz. +// The go code is a simplified version of the original C. +// tanh.c +// +// Hyperbolic tangent +// +// SYNOPSIS: +// +// double x, y, tanh(); +// +// y = tanh( x ); +// +// DESCRIPTION: +// +// Returns hyperbolic tangent of argument in the range MINLOG to MAXLOG. +// MAXLOG = 8.8029691931113054295988e+01 = log(2**127) +// MINLOG = -8.872283911167299960540e+01 = log(2**-128) +// +// A rational function is used for |x| < 0.625. The form +// x + x**3 P(x)/Q(x) of Cody & Waite is employed. +// Otherwise, +// tanh(x) = sinh(x)/cosh(x) = 1 - 2/(exp(2x) + 1). +// +// ACCURACY: +// +// Relative error: +// arithmetic domain # trials peak rms +// IEEE -2,2 30000 2.5e-16 5.8e-17 +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1989, 1992, 2000 by Stephen L. Moshier +// +// The readme file at http://netlib.sandia.gov/cephes/ says: +// Some software in this archive may be from the book _Methods and +// Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster +// International, 1989) or from the Cephes Mathematical Library, a +// commercial product. In either event, it is copyrighted by the author. +// What you see here may be used freely but it comes with no support or +// guarantee. +// +// The two known misprints in the book are repaired here in the +// source listings for the gamma function and the incomplete beta +// integral. +// +// Stephen L. Moshier +// moshier@na-net.ornl.gov +// + +var tanhP = [...]float64{ + -9.64399179425052238628E-1, + -9.92877231001918586564E1, + -1.61468768441708447952E3, +} +var tanhQ = [...]float64{ + 1.12811678491632931402E2, + 2.23548839060100448583E3, + 4.84406305325125486048E3, +} + +// Tanh returns the hyperbolic tangent of x. +// +// Special cases are: +// Tanh(±0) = ±0 +// Tanh(±Inf) = ±1 +// Tanh(NaN) = NaN +func Tanh(x float64) float64 { + const MAXLOG = 8.8029691931113054295988e+01 // log(2**127) + z := Abs(x) + switch { + case z > 0.5*MAXLOG: + if x < 0 { + return -1 + } + return 1 + case z >= 0.625: + s := Exp(2 * z) + z = 1 - 2/(s+1) + if x < 0 { + z = -z + } + default: + if x == 0 { + return x + } + s := x * x + z = x + x*s*((tanhP[0]*s+tanhP[1])*s+tanhP[2])/(((s+tanhQ[0])*s+tanhQ[1])*s+tanhQ[2]) + } + return z +} diff --git a/src/math/unsafe.go b/src/math/unsafe.go new file mode 100644 index 000000000..5ae67420f --- /dev/null +++ b/src/math/unsafe.go @@ -0,0 +1,21 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import "unsafe" + +// Float32bits returns the IEEE 754 binary representation of f. +func Float32bits(f float32) uint32 { return *(*uint32)(unsafe.Pointer(&f)) } + +// Float32frombits returns the floating point number corresponding +// to the IEEE 754 binary representation b. +func Float32frombits(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) } + +// Float64bits returns the IEEE 754 binary representation of f. +func Float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) } + +// Float64frombits returns the floating point number corresponding +// the IEEE 754 binary representation b. +func Float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) } |