Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 1 | // Copyright 2010 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package math |
| 6 | |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 7 | // The original C code, the long comment, and the constants |
| 8 | // below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c |
Brad Fitzpatrick | 5fea2cc | 2016-03-01 23:21:55 +0000 | [diff] [blame] | 9 | // and came with this notice. The go code is a simplified |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 10 | // version of the original C. |
| 11 | // |
| 12 | // ==================================================== |
| 13 | // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |
| 14 | // |
| 15 | // Developed at SunPro, a Sun Microsystems, Inc. business. |
| 16 | // Permission to use, copy, modify, and distribute this |
| 17 | // software is freely granted, provided that this notice |
| 18 | // is preserved. |
| 19 | // ==================================================== |
| 20 | // |
| 21 | // expm1(x) |
| 22 | // Returns exp(x)-1, the exponential of x minus 1. |
| 23 | // |
| 24 | // Method |
| 25 | // 1. Argument reduction: |
| 26 | // Given x, find r and integer k such that |
| 27 | // |
| 28 | // x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 |
| 29 | // |
| 30 | // Here a correction term c will be computed to compensate |
| 31 | // the error in r when rounded to a floating-point number. |
| 32 | // |
| 33 | // 2. Approximating expm1(r) by a special rational function on |
| 34 | // the interval [0,0.34658]: |
| 35 | // Since |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 36 | // r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ... |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 37 | // we define R1(r*r) by |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 38 | // r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r) |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 39 | // That is, |
| 40 | // R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) |
| 41 | // = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 42 | // = 1 - r**2/60 + r**4/2520 - r**6/100800 + ... |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 43 | // We use a special Reme algorithm on [0,0.347] to generate |
| 44 | // a polynomial of degree 5 in r*r to approximate R1. The |
| 45 | // maximum error of this polynomial approximation is bounded |
| 46 | // by 2**-61. In other words, |
| 47 | // R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 |
| 48 | // where Q1 = -1.6666666666666567384E-2, |
| 49 | // Q2 = 3.9682539681370365873E-4, |
| 50 | // Q3 = -9.9206344733435987357E-6, |
| 51 | // Q4 = 2.5051361420808517002E-7, |
| 52 | // Q5 = -6.2843505682382617102E-9; |
| 53 | // (where z=r*r, and the values of Q1 to Q5 are listed below) |
| 54 | // with error bounded by |
| 55 | // | 5 | -61 |
| 56 | // | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 |
| 57 | // | | |
| 58 | // |
| 59 | // expm1(r) = exp(r)-1 is then computed by the following |
| 60 | // specific way which minimize the accumulation rounding error: |
| 61 | // 2 3 |
| 62 | // r r [ 3 - (R1 + R1*r/2) ] |
| 63 | // expm1(r) = r + --- + --- * [--------------------] |
| 64 | // 2 2 [ 6 - r*(3 - R1*r/2) ] |
| 65 | // |
| 66 | // To compensate the error in the argument reduction, we use |
| 67 | // expm1(r+c) = expm1(r) + c + expm1(r)*c |
| 68 | // ~ expm1(r) + c + r*c |
| 69 | // Thus c+r*c will be added in as the correction terms for |
| 70 | // expm1(r+c). Now rearrange the term to avoid optimization |
| 71 | // screw up: |
| 72 | // ( 2 2 ) |
| 73 | // ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) |
| 74 | // expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) |
| 75 | // ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) |
| 76 | // ( ) |
| 77 | // |
| 78 | // = r - E |
| 79 | // 3. Scale back to obtain expm1(x): |
| 80 | // From step 1, we have |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 81 | // expm1(x) = either 2**k*[expm1(r)+1] - 1 |
| 82 | // = or 2**k*[expm1(r) + (1-2**-k)] |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 83 | // 4. Implementation notes: |
| 84 | // (A). To save one multiplication, we scale the coefficient Qi |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 85 | // to Qi*2**i, and replace z by (x**2)/2. |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 86 | // (B). To achieve maximum accuracy, we compute expm1(x) by |
| 87 | // (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) |
| 88 | // (ii) if k=0, return r-E |
| 89 | // (iii) if k=-1, return 0.5*(r-E)-0.5 |
| 90 | // (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) |
| 91 | // else return 1.0+2.0*(r-E); |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 92 | // (v) if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1) |
| 93 | // (vi) if k <= 20, return 2**k((1-2**-k)-(E-r)), else |
| 94 | // (vii) return 2**k(1-((E+2**-k)-r)) |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 95 | // |
| 96 | // Special cases: |
| 97 | // expm1(INF) is INF, expm1(NaN) is NaN; |
| 98 | // expm1(-INF) is -1, and |
| 99 | // for finite argument, only expm1(0)=0 is exact. |
| 100 | // |
| 101 | // Accuracy: |
| 102 | // according to an error analysis, the error is always less than |
| 103 | // 1 ulp (unit in the last place). |
| 104 | // |
| 105 | // Misc. info. |
| 106 | // For IEEE double |
| 107 | // if x > 7.09782712893383973096e+02 then expm1(x) overflow |
| 108 | // |
| 109 | // Constants: |
| 110 | // The hexadecimal values are the intended ones for the following |
| 111 | // constants. The decimal values may be used, provided that the |
| 112 | // compiler will convert from decimal to binary accurately enough |
| 113 | // to produce the hexadecimal values shown. |
| 114 | // |
| 115 | |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 116 | // Expm1 returns e**x - 1, the base-e exponential of x minus 1. |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 117 | // It is more accurate than Exp(x) - 1 when x is near zero. |
| 118 | // |
| 119 | // Special cases are: |
Charles L. Dorian | c465331 | 2010-02-09 13:33:12 -0800 | [diff] [blame] | 120 | // Expm1(+Inf) = +Inf |
| 121 | // Expm1(-Inf) = -1 |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 122 | // Expm1(NaN) = NaN |
| 123 | // Very large values overflow to -1 or +Inf. |
Russ Cox | dd8dc6f | 2011-12-13 15:20:12 -0500 | [diff] [blame] | 124 | func Expm1(x float64) float64 |
| 125 | |
| 126 | func expm1(x float64) float64 { |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 127 | const ( |
| 128 | Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF |
| 129 | Ln2X56 = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1 |
| 130 | Ln2HalfX3 = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73 |
| 131 | Ln2Half = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef |
| 132 | Ln2Hi = 6.93147180369123816490e-01 // 0x3fe62e42fee00000 |
| 133 | Ln2Lo = 1.90821492927058770002e-10 // 0x3dea39ef35793c76 |
| 134 | InvLn2 = 1.44269504088896338700e+00 // 0x3ff71547652b82fe |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 135 | Tiny = 1.0 / (1 << 54) // 2**-54 = 0x3c90000000000000 |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 136 | // scaled coefficients related to expm1 |
| 137 | Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4 |
| 138 | Q2 = 1.58730158725481460165e-03 // 0x3F5A01A019FE5585 |
| 139 | Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7 |
| 140 | Q4 = 4.00821782732936239552e-06 // 0x3ED0CFCA86E65239 |
| 141 | Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D |
| 142 | ) |
| 143 | |
| 144 | // special cases |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 145 | switch { |
Luuk van Dijk | 8dd3de4 | 2012-02-01 16:08:31 +0100 | [diff] [blame] | 146 | case IsInf(x, 1) || IsNaN(x): |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 147 | return x |
Luuk van Dijk | 8dd3de4 | 2012-02-01 16:08:31 +0100 | [diff] [blame] | 148 | case IsInf(x, -1): |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 149 | return -1 |
| 150 | } |
| 151 | |
| 152 | absx := x |
| 153 | sign := false |
| 154 | if x < 0 { |
| 155 | absx = -absx |
| 156 | sign = true |
| 157 | } |
| 158 | |
| 159 | // filter out huge argument |
| 160 | if absx >= Ln2X56 { // if |x| >= 56 * ln2 |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 161 | if sign { |
Charlie Dorian | 8b22109 | 2015-06-30 20:14:30 -0400 | [diff] [blame] | 162 | return -1 // x < -56*ln2, return -1 |
| 163 | } |
| 164 | if absx >= Othreshold { // if |x| >= 709.78... |
| 165 | return Inf(1) |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 166 | } |
| 167 | } |
| 168 | |
| 169 | // argument reduction |
| 170 | var c float64 |
| 171 | var k int |
| 172 | if absx > Ln2Half { // if |x| > 0.5 * ln2 |
| 173 | var hi, lo float64 |
| 174 | if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2 |
| 175 | if !sign { |
| 176 | hi = x - Ln2Hi |
| 177 | lo = Ln2Lo |
| 178 | k = 1 |
| 179 | } else { |
| 180 | hi = x + Ln2Hi |
| 181 | lo = -Ln2Lo |
| 182 | k = -1 |
| 183 | } |
| 184 | } else { |
| 185 | if !sign { |
| 186 | k = int(InvLn2*x + 0.5) |
| 187 | } else { |
| 188 | k = int(InvLn2*x - 0.5) |
| 189 | } |
| 190 | t := float64(k) |
| 191 | hi = x - t*Ln2Hi // t * Ln2Hi is exact here |
| 192 | lo = t * Ln2Lo |
| 193 | } |
| 194 | x = hi - lo |
| 195 | c = (hi - x) - lo |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 196 | } else if absx < Tiny { // when |x| < 2**-54, return x |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 197 | return x |
| 198 | } else { |
| 199 | k = 0 |
| 200 | } |
| 201 | |
| 202 | // x is now in primary range |
| 203 | hfx := 0.5 * x |
| 204 | hxs := x * hfx |
| 205 | r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))) |
| 206 | t := 3 - r1*hfx |
| 207 | e := hxs * ((r1 - t) / (6.0 - x*t)) |
| 208 | if k != 0 { |
| 209 | e = (x*(e-c) - c) |
| 210 | e -= hxs |
| 211 | switch { |
| 212 | case k == -1: |
| 213 | return 0.5*(x-e) - 0.5 |
| 214 | case k == 1: |
| 215 | if x < -0.25 { |
| 216 | return -2 * (e - (x + 0.5)) |
| 217 | } |
| 218 | return 1 + 2*(x-e) |
| 219 | case k <= -2 || k > 56: // suffice to return exp(x)-1 |
| 220 | y := 1 - (e - x) |
| 221 | y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent |
| 222 | return y - 1 |
| 223 | } |
| 224 | if k < 20 { |
Charles L. Dorian | 3c3e68b | 2010-04-09 14:37:33 -0700 | [diff] [blame] | 225 | t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 226 | y := t - (e - x) |
| 227 | y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent |
| 228 | return y |
| 229 | } |
Matthew Dempsky | 7832c82 | 2015-10-29 19:16:20 -0700 | [diff] [blame] | 230 | t := Float64frombits(uint64(0x3ff-k) << 52) // 2**-k |
Charles L. Dorian | a0690b6 | 2010-02-01 22:21:40 -0800 | [diff] [blame] | 231 | y := x - (e + t) |
| 232 | y += 1 |
| 233 | y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent |
| 234 | return y |
| 235 | } |
| 236 | return x - (x*e - hxs) // c is 0 |
| 237 | } |