Russ Cox | 60be4a2 | 2014-05-19 22:57:59 -0400 | [diff] [blame] | 1 | // run |
| 2 | |
| 3 | // Check conversion of constant to float32/float64 near min/max boundaries. |
| 4 | |
| 5 | // Copyright 2014 The Go Authors. All rights reserved. |
| 6 | // Use of this source code is governed by a BSD-style |
| 7 | // license that can be found in the LICENSE file. |
| 8 | |
| 9 | package main |
| 10 | |
| 11 | import ( |
| 12 | "fmt" |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 13 | "math" |
Russ Cox | 60be4a2 | 2014-05-19 22:57:59 -0400 | [diff] [blame] | 14 | ) |
| 15 | |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 16 | // The largest exact float32 is f₁ = (1+(1-2²³))×2¹²⁷ = (1-2²⁴)×2¹²⁸ = 2¹²⁸ - 2¹⁰⁴. |
| 17 | // The next float32 would be f₂ = (1+1)×2¹²⁷ = 1×2¹²⁸, except that exponent is out of range. |
| 18 | // Float32 conversion rounds to the nearest float32, rounding to even mantissa: |
Russ Cox | cab5440 | 2014-05-21 17:19:12 -0400 | [diff] [blame] | 19 | // between f₁ and f₂, values closer to f₁ round to f₁ and values closer to f₂ are rejected as out of range. |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 20 | // f₁ is an odd mantissa, so the halfway point (f₁+f₂)/2 rounds to f₂ and is rejected. |
Russ Cox | cab5440 | 2014-05-21 17:19:12 -0400 | [diff] [blame] | 21 | // The halfway point is (f₁+f₂)/2 = 2¹²⁸ - 2¹⁰⁵. |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 22 | // |
| 23 | // The same is true of float64, with different constants: s/24/53/ and s/128/1024/. |
| 24 | |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 25 | const ( |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 26 | two24 = 1.0 * (1 << 24) |
| 27 | two53 = 1.0 * (1 << 53) |
| 28 | two64 = 1.0 * (1 << 64) |
| 29 | two128 = two64 * two64 |
| 30 | two256 = two128 * two128 |
| 31 | two512 = two256 * two256 |
| 32 | two768 = two512 * two256 |
| 33 | two1024 = two512 * two512 |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 34 | |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 35 | ulp32 = two128 / two24 |
| 36 | max32 = two128 - ulp32 |
| 37 | |
| 38 | ulp64 = two1024 / two53 |
| 39 | max64 = two1024 - ulp64 |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 40 | ) |
| 41 | |
Russ Cox | 60be4a2 | 2014-05-19 22:57:59 -0400 | [diff] [blame] | 42 | var cvt = []struct { |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 43 | bits uint64 // keep us honest |
| 44 | exact interface{} |
| 45 | approx interface{} |
| 46 | text string |
Russ Cox | 60be4a2 | 2014-05-19 22:57:59 -0400 | [diff] [blame] | 47 | }{ |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 48 | // 0 |
| 49 | {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 - ulp32/2), "max32 - ulp32 - ulp32/2"}, |
| 50 | {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32), "max32 - ulp32"}, |
| 51 | {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32/2), "max32 - ulp32/2"}, |
| 52 | {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 + ulp32/2), "max32 - ulp32 + ulp32/2"}, |
| 53 | {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + ulp32/two64), "max32 - ulp32 + ulp32/2 + ulp32/two64"}, |
| 54 | {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + ulp32/two64), "max32 - ulp32/2 + ulp32/two64"}, |
| 55 | {0x7f7fffff, float32(max32), float32(max32), "max32"}, |
| 56 | {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - ulp32/two64), "max32 + ulp32/2 - ulp32/two64"}, |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 57 | |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 58 | {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 - ulp32/2)), "-(max32 - ulp32 - ulp32/2)"}, |
| 59 | {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32)), "-(max32 - ulp32)"}, |
| 60 | {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32/2)), "-(max32 - ulp32/2)"}, |
| 61 | {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 + ulp32/2)), "-(max32 - ulp32 + ulp32/2)"}, |
| 62 | {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + ulp32/two64)), "-(max32 - ulp32 + ulp32/2 + ulp32/two64)"}, |
| 63 | {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + ulp32/two64)), "-(max32 - ulp32/2 + ulp32/two64)"}, |
| 64 | {0xff7fffff, float32(-(max32)), float32(-(max32)), "-(max32)"}, |
| 65 | {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - ulp32/two64)), "-(max32 + ulp32/2 - ulp32/two64)"}, |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 66 | |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 67 | // These are required to work: according to the Go spec, the internal float mantissa must be at least 256 bits, |
| 68 | // and these expressions can be represented exactly with a 256-bit mantissa. |
| 69 | {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1), "max32 - ulp32 + ulp32/2 + 1"}, |
| 70 | {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1), "max32 - ulp32/2 + 1"}, |
| 71 | {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1), "max32 + ulp32/2 - 1"}, |
| 72 | {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1)), "-(max32 - ulp32 + ulp32/2 + 1)"}, |
| 73 | {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1)), "-(max32 - ulp32/2 + 1)"}, |
| 74 | {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1)), "-(max32 + ulp32/2 - 1)"}, |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 75 | |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 76 | {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1/two128), "max32 - ulp32 + ulp32/2 + 1/two128"}, |
| 77 | {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1/two128), "max32 - ulp32/2 + 1/two128"}, |
| 78 | {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1/two128), "max32 + ulp32/2 - 1/two128"}, |
| 79 | {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1/two128)), "-(max32 - ulp32 + ulp32/2 + 1/two128)"}, |
| 80 | {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1/two128)), "-(max32 - ulp32/2 + 1/two128)"}, |
| 81 | {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1/two128)), "-(max32 + ulp32/2 - 1/two128)"}, |
Robert Griesemer | 765b4a3 | 2014-05-21 08:53:47 -0700 | [diff] [blame] | 82 | |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 83 | {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 - ulp64/2), "max64 - ulp64 - ulp64/2"}, |
| 84 | {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64), "max64 - ulp64"}, |
| 85 | {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64/2), "max64 - ulp64/2"}, |
| 86 | {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 + ulp64/2), "max64 - ulp64 + ulp64/2"}, |
| 87 | {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + ulp64/two64), "max64 - ulp64 + ulp64/2 + ulp64/two64"}, |
| 88 | {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + ulp64/two64), "max64 - ulp64/2 + ulp64/two64"}, |
| 89 | {0x7fefffffffffffff, float64(max64), float64(max64), "max64"}, |
| 90 | {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - ulp64/two64), "max64 + ulp64/2 - ulp64/two64"}, |
| 91 | |
| 92 | {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 - ulp64/2)), "-(max64 - ulp64 - ulp64/2)"}, |
| 93 | {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64)), "-(max64 - ulp64)"}, |
| 94 | {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64/2)), "-(max64 - ulp64/2)"}, |
| 95 | {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 + ulp64/2)), "-(max64 - ulp64 + ulp64/2)"}, |
| 96 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + ulp64/two64)), "-(max64 - ulp64 + ulp64/2 + ulp64/two64)"}, |
| 97 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + ulp64/two64)), "-(max64 - ulp64/2 + ulp64/two64)"}, |
| 98 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64)), "-(max64)"}, |
| 99 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - ulp64/two64)), "-(max64 + ulp64/2 - ulp64/two64)"}, |
| 100 | |
| 101 | // These are required to work. |
| 102 | // The mantissas are exactly 256 bits. |
| 103 | // max64 is just below 2¹⁰²⁴ so the bottom bit we can use is 2⁷⁶⁸. |
| 104 | {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + two768), "max64 - ulp64 + ulp64/2 + two768"}, |
| 105 | {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + two768), "max64 - ulp64/2 + two768"}, |
| 106 | {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - two768), "max64 + ulp64/2 - two768"}, |
| 107 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + two768)), "-(max64 - ulp64 + ulp64/2 + two768)"}, |
| 108 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + two768)), "-(max64 - ulp64/2 + two768)"}, |
| 109 | {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - two768)), "-(max64 + ulp64/2 - two768)"}, |
| 110 | } |
| 111 | |
| 112 | var bugged = false |
| 113 | |
| 114 | func bug() { |
| 115 | if !bugged { |
| 116 | bugged = true |
| 117 | fmt.Println("BUG") |
| 118 | } |
Russ Cox | 60be4a2 | 2014-05-19 22:57:59 -0400 | [diff] [blame] | 119 | } |
| 120 | |
| 121 | func main() { |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 122 | u64 := math.Float64frombits(0x7fefffffffffffff) - math.Float64frombits(0x7feffffffffffffe) |
| 123 | if ulp64 != u64 { |
| 124 | bug() |
| 125 | fmt.Printf("ulp64=%g, want %g", ulp64, u64) |
| 126 | } |
| 127 | |
| 128 | u32 := math.Float32frombits(0x7f7fffff) - math.Float32frombits(0x7f7ffffe) |
| 129 | if ulp32 != u32 { |
| 130 | bug() |
| 131 | fmt.Printf("ulp32=%g, want %g", ulp32, u32) |
| 132 | } |
| 133 | |
| 134 | for _, c := range cvt { |
| 135 | if bits(c.exact) != c.bits { |
| 136 | bug() |
| 137 | fmt.Printf("%s: inconsistent table: bits=%#x (%g) but exact=%g (%#x)\n", c.text, c.bits, fromBits(c.bits, c.exact), c.exact, bits(c.exact)) |
| 138 | } |
| 139 | if c.approx != c.exact || bits(c.approx) != c.bits { |
| 140 | bug() |
| 141 | fmt.Printf("%s: have %g (%#x) want %g (%#x)\n", c.text, c.approx, bits(c.approx), c.exact, c.bits) |
Russ Cox | 60be4a2 | 2014-05-19 22:57:59 -0400 | [diff] [blame] | 142 | } |
| 143 | } |
| 144 | } |
Russ Cox | 2de449e | 2014-05-21 17:12:06 -0400 | [diff] [blame] | 145 | |
| 146 | func bits(x interface{}) interface{} { |
| 147 | switch x := x.(type) { |
| 148 | case float32: |
| 149 | return uint64(math.Float32bits(x)) |
| 150 | case float64: |
| 151 | return math.Float64bits(x) |
| 152 | } |
| 153 | return 0 |
| 154 | } |
| 155 | |
| 156 | func fromBits(b uint64, x interface{}) interface{} { |
| 157 | switch x.(type) { |
| 158 | case float32: |
| 159 | return math.Float32frombits(uint32(b)) |
| 160 | case float64: |
| 161 | return math.Float64frombits(b) |
| 162 | } |
| 163 | return "?" |
| 164 | } |