Blame - test/float_lit2.go - go

blob: 96d23f38d1f9d7099c12c8784c0f55d7b18d1958 [file] [log] [blame]

Russ Cox	60be4a2	2014-05-19 22:57:59 -0400	[diff] [blame]	1	// run
				2
				3	// Check conversion of constant to float32/float64 near min/max boundaries.
				4
				5	// Copyright 2014 The Go Authors. All rights reserved.
				6	// Use of this source code is governed by a BSD-style
				7	// license that can be found in the LICENSE file.
				8
				9	package main
				10
				11	import (
				12	"fmt"
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	13	"math"
Russ Cox	60be4a2	2014-05-19 22:57:59 -0400	[diff] [blame]	14	)
				15
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	16	// The largest exact float32 is f₁ = (1+(1-2²³))×2¹²⁷ = (1-2²⁴)×2¹²⁸ = 2¹²⁸ - 2¹⁰⁴.
				17	// The next float32 would be f₂ = (1+1)×2¹²⁷ = 1×2¹²⁸, except that exponent is out of range.
				18	// Float32 conversion rounds to the nearest float32, rounding to even mantissa:
Russ Cox	cab5440	2014-05-21 17:19:12 -0400	[diff] [blame]	19	// between f₁ and f₂, values closer to f₁ round to f₁ and values closer to f₂ are rejected as out of range.
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	20	// f₁ is an odd mantissa, so the halfway point (f₁+f₂)/2 rounds to f₂ and is rejected.
Russ Cox	cab5440	2014-05-21 17:19:12 -0400	[diff] [blame]	21	// The halfway point is (f₁+f₂)/2 = 2¹²⁸ - 2¹⁰⁵.
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	22	//
				23	// The same is true of float64, with different constants: s/24/53/ and s/128/1024/.
				24
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	25	const (
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	26	two24 = 1.0 * (1 << 24)
				27	two53 = 1.0 * (1 << 53)
				28	two64 = 1.0 * (1 << 64)
				29	two128 = two64 * two64
				30	two256 = two128 * two128
				31	two512 = two256 * two256
				32	two768 = two512 * two256
				33	two1024 = two512 * two512
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	34
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	35	ulp32 = two128 / two24
				36	max32 = two128 - ulp32
				37
				38	ulp64 = two1024 / two53
				39	max64 = two1024 - ulp64
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	40	)
				41
Russ Cox	60be4a2	2014-05-19 22:57:59 -0400	[diff] [blame]	42	var cvt = []struct {
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	43	bits uint64 // keep us honest
				44	exact interface{}
				45	approx interface{}
				46	text string
Russ Cox	60be4a2	2014-05-19 22:57:59 -0400	[diff] [blame]	47	}{
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	48	// 0
				49	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 - ulp32/2), "max32 - ulp32 - ulp32/2"},
				50	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32), "max32 - ulp32"},
				51	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32/2), "max32 - ulp32/2"},
				52	{0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 + ulp32/2), "max32 - ulp32 + ulp32/2"},
				53	{0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + ulp32/two64), "max32 - ulp32 + ulp32/2 + ulp32/two64"},
				54	{0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + ulp32/two64), "max32 - ulp32/2 + ulp32/two64"},
				55	{0x7f7fffff, float32(max32), float32(max32), "max32"},
				56	{0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - ulp32/two64), "max32 + ulp32/2 - ulp32/two64"},
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	57
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	58	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 - ulp32/2)), "-(max32 - ulp32 - ulp32/2)"},
				59	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32)), "-(max32 - ulp32)"},
				60	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32/2)), "-(max32 - ulp32/2)"},
				61	{0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 + ulp32/2)), "-(max32 - ulp32 + ulp32/2)"},
				62	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + ulp32/two64)), "-(max32 - ulp32 + ulp32/2 + ulp32/two64)"},
				63	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + ulp32/two64)), "-(max32 - ulp32/2 + ulp32/two64)"},
				64	{0xff7fffff, float32(-(max32)), float32(-(max32)), "-(max32)"},
				65	{0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - ulp32/two64)), "-(max32 + ulp32/2 - ulp32/two64)"},
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	66
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	67	// These are required to work: according to the Go spec, the internal float mantissa must be at least 256 bits,
				68	// and these expressions can be represented exactly with a 256-bit mantissa.
				69	{0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1), "max32 - ulp32 + ulp32/2 + 1"},
				70	{0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1), "max32 - ulp32/2 + 1"},
				71	{0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1), "max32 + ulp32/2 - 1"},
				72	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1)), "-(max32 - ulp32 + ulp32/2 + 1)"},
				73	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1)), "-(max32 - ulp32/2 + 1)"},
				74	{0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1)), "-(max32 + ulp32/2 - 1)"},
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	75
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	76	{0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1/two128), "max32 - ulp32 + ulp32/2 + 1/two128"},
				77	{0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1/two128), "max32 - ulp32/2 + 1/two128"},
				78	{0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1/two128), "max32 + ulp32/2 - 1/two128"},
				79	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1/two128)), "-(max32 - ulp32 + ulp32/2 + 1/two128)"},
				80	{0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1/two128)), "-(max32 - ulp32/2 + 1/two128)"},
				81	{0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1/two128)), "-(max32 + ulp32/2 - 1/two128)"},
Robert Griesemer	765b4a3	2014-05-21 08:53:47 -0700	[diff] [blame]	82
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	83	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 - ulp64/2), "max64 - ulp64 - ulp64/2"},
				84	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64), "max64 - ulp64"},
				85	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64/2), "max64 - ulp64/2"},
				86	{0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 + ulp64/2), "max64 - ulp64 + ulp64/2"},
				87	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + ulp64/two64), "max64 - ulp64 + ulp64/2 + ulp64/two64"},
				88	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + ulp64/two64), "max64 - ulp64/2 + ulp64/two64"},
				89	{0x7fefffffffffffff, float64(max64), float64(max64), "max64"},
				90	{0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - ulp64/two64), "max64 + ulp64/2 - ulp64/two64"},
				91
				92	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 - ulp64/2)), "-(max64 - ulp64 - ulp64/2)"},
				93	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64)), "-(max64 - ulp64)"},
				94	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64/2)), "-(max64 - ulp64/2)"},
				95	{0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 + ulp64/2)), "-(max64 - ulp64 + ulp64/2)"},
				96	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + ulp64/two64)), "-(max64 - ulp64 + ulp64/2 + ulp64/two64)"},
				97	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + ulp64/two64)), "-(max64 - ulp64/2 + ulp64/two64)"},
				98	{0xffefffffffffffff, float64(-(max64)), float64(-(max64)), "-(max64)"},
				99	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - ulp64/two64)), "-(max64 + ulp64/2 - ulp64/two64)"},
				100
				101	// These are required to work.
				102	// The mantissas are exactly 256 bits.
				103	// max64 is just below 2¹⁰²⁴ so the bottom bit we can use is 2⁷⁶⁸.
				104	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + two768), "max64 - ulp64 + ulp64/2 + two768"},
				105	{0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + two768), "max64 - ulp64/2 + two768"},
				106	{0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - two768), "max64 + ulp64/2 - two768"},
				107	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + two768)), "-(max64 - ulp64 + ulp64/2 + two768)"},
				108	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + two768)), "-(max64 - ulp64/2 + two768)"},
				109	{0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - two768)), "-(max64 + ulp64/2 - two768)"},
				110	}
				111
				112	var bugged = false
				113
				114	func bug() {
				115	if !bugged {
				116	bugged = true
				117	fmt.Println("BUG")
				118	}
Russ Cox	60be4a2	2014-05-19 22:57:59 -0400	[diff] [blame]	119	}
				120
				121	func main() {
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	122	u64 := math.Float64frombits(0x7fefffffffffffff) - math.Float64frombits(0x7feffffffffffffe)
				123	if ulp64 != u64 {
				124	bug()
				125	fmt.Printf("ulp64=%g, want %g", ulp64, u64)
				126	}
				127
				128	u32 := math.Float32frombits(0x7f7fffff) - math.Float32frombits(0x7f7ffffe)
				129	if ulp32 != u32 {
				130	bug()
				131	fmt.Printf("ulp32=%g, want %g", ulp32, u32)
				132	}
				133
				134	for _, c := range cvt {
				135	if bits(c.exact) != c.bits {
				136	bug()
				137	fmt.Printf("%s: inconsistent table: bits=%#x (%g) but exact=%g (%#x)\n", c.text, c.bits, fromBits(c.bits, c.exact), c.exact, bits(c.exact))
				138	}
				139	if c.approx != c.exact \|\| bits(c.approx) != c.bits {
				140	bug()
				141	fmt.Printf("%s: have %g (%#x) want %g (%#x)\n", c.text, c.approx, bits(c.approx), c.exact, c.bits)
Russ Cox	60be4a2	2014-05-19 22:57:59 -0400	[diff] [blame]	142	}
				143	}
				144	}
Russ Cox	2de449e	2014-05-21 17:12:06 -0400	[diff] [blame]	145
				146	func bits(x interface{}) interface{} {
				147	switch x := x.(type) {
				148	case float32:
				149	return uint64(math.Float32bits(x))
				150	case float64:
				151	return math.Float64bits(x)
				152	}
				153	return 0
				154	}
				155
				156	func fromBits(b uint64, x interface{}) interface{} {
				157	switch x.(type) {
				158	case float32:
				159	return math.Float32frombits(uint32(b))
				160	case float64:
				161	return math.Float64frombits(b)
				162	}
				163	return "?"
				164	}