blob: 96d23f38d1f9d7099c12c8784c0f55d7b18d1958 [file] [log] [blame]
Russ Cox60be4a22014-05-19 22:57:59 -04001// run
2
3// Check conversion of constant to float32/float64 near min/max boundaries.
4
5// Copyright 2014 The Go Authors. All rights reserved.
6// Use of this source code is governed by a BSD-style
7// license that can be found in the LICENSE file.
8
9package main
10
11import (
12 "fmt"
Russ Cox2de449e2014-05-21 17:12:06 -040013 "math"
Russ Cox60be4a22014-05-19 22:57:59 -040014)
15
Russ Cox2de449e2014-05-21 17:12:06 -040016// The largest exact float32 is f₁ = (1+(1-2²³))×2¹²⁷ = (1-2²⁴)×2¹²⁸ = 2¹²⁸ - 2¹⁰⁴.
17// The next float32 would be f₂ = (1+1)×2¹²⁷ = 1×2¹²⁸, except that exponent is out of range.
18// Float32 conversion rounds to the nearest float32, rounding to even mantissa:
Russ Coxcab54402014-05-21 17:19:12 -040019// between f₁ and f₂, values closer to f₁ round to f₁ and values closer to f₂ are rejected as out of range.
Russ Cox2de449e2014-05-21 17:12:06 -040020// f₁ is an odd mantissa, so the halfway point (f₁+f₂)/2 rounds to f₂ and is rejected.
Russ Coxcab54402014-05-21 17:19:12 -040021// The halfway point is (f₁+f₂)/2 = 2¹²⁸ - 2¹⁰⁵.
Russ Cox2de449e2014-05-21 17:12:06 -040022//
23// The same is true of float64, with different constants: s/24/53/ and s/128/1024/.
24
Robert Griesemer765b4a32014-05-21 08:53:47 -070025const (
Russ Cox2de449e2014-05-21 17:12:06 -040026 two24 = 1.0 * (1 << 24)
27 two53 = 1.0 * (1 << 53)
28 two64 = 1.0 * (1 << 64)
29 two128 = two64 * two64
30 two256 = two128 * two128
31 two512 = two256 * two256
32 two768 = two512 * two256
33 two1024 = two512 * two512
Robert Griesemer765b4a32014-05-21 08:53:47 -070034
Russ Cox2de449e2014-05-21 17:12:06 -040035 ulp32 = two128 / two24
36 max32 = two128 - ulp32
37
38 ulp64 = two1024 / two53
39 max64 = two1024 - ulp64
Robert Griesemer765b4a32014-05-21 08:53:47 -070040)
41
Russ Cox60be4a22014-05-19 22:57:59 -040042var cvt = []struct {
Russ Cox2de449e2014-05-21 17:12:06 -040043 bits uint64 // keep us honest
44 exact interface{}
45 approx interface{}
46 text string
Russ Cox60be4a22014-05-19 22:57:59 -040047}{
Russ Cox2de449e2014-05-21 17:12:06 -040048 // 0
49 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 - ulp32/2), "max32 - ulp32 - ulp32/2"},
50 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32), "max32 - ulp32"},
51 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32/2), "max32 - ulp32/2"},
52 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 + ulp32/2), "max32 - ulp32 + ulp32/2"},
53 {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + ulp32/two64), "max32 - ulp32 + ulp32/2 + ulp32/two64"},
54 {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + ulp32/two64), "max32 - ulp32/2 + ulp32/two64"},
55 {0x7f7fffff, float32(max32), float32(max32), "max32"},
56 {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - ulp32/two64), "max32 + ulp32/2 - ulp32/two64"},
Robert Griesemer765b4a32014-05-21 08:53:47 -070057
Russ Cox2de449e2014-05-21 17:12:06 -040058 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 - ulp32/2)), "-(max32 - ulp32 - ulp32/2)"},
59 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32)), "-(max32 - ulp32)"},
60 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32/2)), "-(max32 - ulp32/2)"},
61 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 + ulp32/2)), "-(max32 - ulp32 + ulp32/2)"},
62 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + ulp32/two64)), "-(max32 - ulp32 + ulp32/2 + ulp32/two64)"},
63 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + ulp32/two64)), "-(max32 - ulp32/2 + ulp32/two64)"},
64 {0xff7fffff, float32(-(max32)), float32(-(max32)), "-(max32)"},
65 {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - ulp32/two64)), "-(max32 + ulp32/2 - ulp32/two64)"},
Robert Griesemer765b4a32014-05-21 08:53:47 -070066
Russ Cox2de449e2014-05-21 17:12:06 -040067 // These are required to work: according to the Go spec, the internal float mantissa must be at least 256 bits,
68 // and these expressions can be represented exactly with a 256-bit mantissa.
69 {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1), "max32 - ulp32 + ulp32/2 + 1"},
70 {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1), "max32 - ulp32/2 + 1"},
71 {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1), "max32 + ulp32/2 - 1"},
72 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1)), "-(max32 - ulp32 + ulp32/2 + 1)"},
73 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1)), "-(max32 - ulp32/2 + 1)"},
74 {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1)), "-(max32 + ulp32/2 - 1)"},
Robert Griesemer765b4a32014-05-21 08:53:47 -070075
Russ Cox2de449e2014-05-21 17:12:06 -040076 {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1/two128), "max32 - ulp32 + ulp32/2 + 1/two128"},
77 {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1/two128), "max32 - ulp32/2 + 1/two128"},
78 {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1/two128), "max32 + ulp32/2 - 1/two128"},
79 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1/two128)), "-(max32 - ulp32 + ulp32/2 + 1/two128)"},
80 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1/two128)), "-(max32 - ulp32/2 + 1/two128)"},
81 {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1/two128)), "-(max32 + ulp32/2 - 1/two128)"},
Robert Griesemer765b4a32014-05-21 08:53:47 -070082
Russ Cox2de449e2014-05-21 17:12:06 -040083 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 - ulp64/2), "max64 - ulp64 - ulp64/2"},
84 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64), "max64 - ulp64"},
85 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64/2), "max64 - ulp64/2"},
86 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 + ulp64/2), "max64 - ulp64 + ulp64/2"},
87 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + ulp64/two64), "max64 - ulp64 + ulp64/2 + ulp64/two64"},
88 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + ulp64/two64), "max64 - ulp64/2 + ulp64/two64"},
89 {0x7fefffffffffffff, float64(max64), float64(max64), "max64"},
90 {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - ulp64/two64), "max64 + ulp64/2 - ulp64/two64"},
91
92 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 - ulp64/2)), "-(max64 - ulp64 - ulp64/2)"},
93 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64)), "-(max64 - ulp64)"},
94 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64/2)), "-(max64 - ulp64/2)"},
95 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 + ulp64/2)), "-(max64 - ulp64 + ulp64/2)"},
96 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + ulp64/two64)), "-(max64 - ulp64 + ulp64/2 + ulp64/two64)"},
97 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + ulp64/two64)), "-(max64 - ulp64/2 + ulp64/two64)"},
98 {0xffefffffffffffff, float64(-(max64)), float64(-(max64)), "-(max64)"},
99 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - ulp64/two64)), "-(max64 + ulp64/2 - ulp64/two64)"},
100
101 // These are required to work.
102 // The mantissas are exactly 256 bits.
103 // max64 is just below 2¹⁰²⁴ so the bottom bit we can use is 2⁷⁶⁸.
104 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + two768), "max64 - ulp64 + ulp64/2 + two768"},
105 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + two768), "max64 - ulp64/2 + two768"},
106 {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - two768), "max64 + ulp64/2 - two768"},
107 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + two768)), "-(max64 - ulp64 + ulp64/2 + two768)"},
108 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + two768)), "-(max64 - ulp64/2 + two768)"},
109 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - two768)), "-(max64 + ulp64/2 - two768)"},
110}
111
112var bugged = false
113
114func bug() {
115 if !bugged {
116 bugged = true
117 fmt.Println("BUG")
118 }
Russ Cox60be4a22014-05-19 22:57:59 -0400119}
120
121func main() {
Russ Cox2de449e2014-05-21 17:12:06 -0400122 u64 := math.Float64frombits(0x7fefffffffffffff) - math.Float64frombits(0x7feffffffffffffe)
123 if ulp64 != u64 {
124 bug()
125 fmt.Printf("ulp64=%g, want %g", ulp64, u64)
126 }
127
128 u32 := math.Float32frombits(0x7f7fffff) - math.Float32frombits(0x7f7ffffe)
129 if ulp32 != u32 {
130 bug()
131 fmt.Printf("ulp32=%g, want %g", ulp32, u32)
132 }
133
134 for _, c := range cvt {
135 if bits(c.exact) != c.bits {
136 bug()
137 fmt.Printf("%s: inconsistent table: bits=%#x (%g) but exact=%g (%#x)\n", c.text, c.bits, fromBits(c.bits, c.exact), c.exact, bits(c.exact))
138 }
139 if c.approx != c.exact || bits(c.approx) != c.bits {
140 bug()
141 fmt.Printf("%s: have %g (%#x) want %g (%#x)\n", c.text, c.approx, bits(c.approx), c.exact, c.bits)
Russ Cox60be4a22014-05-19 22:57:59 -0400142 }
143 }
144}
Russ Cox2de449e2014-05-21 17:12:06 -0400145
146func bits(x interface{}) interface{} {
147 switch x := x.(type) {
148 case float32:
149 return uint64(math.Float32bits(x))
150 case float64:
151 return math.Float64bits(x)
152 }
153 return 0
154}
155
156func fromBits(b uint64, x interface{}) interface{} {
157 switch x.(type) {
158 case float32:
159 return math.Float32frombits(uint32(b))
160 case float64:
161 return math.Float64frombits(b)
162 }
163 return "?"
164}