blob: 0457e90b53c935b085f67ee3854b16d4a98167c7 [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import "math/big"
// So you want to compute x / c for some constant c?
// Machine division instructions are slow, so we try to
// compute this division with a multiplication + a few
// other cheap instructions instead.
// (We assume here that c != 0, +/- 1, or +/- 2^i. Those
// cases are easy to handle in different ways).
// Technique from https://gmplib.org/~tege/divcnst-pldi94.pdf
// First consider unsigned division.
// Our strategy is to precompute 1/c then do
// ⎣x / c⎦ = ⎣x * (1/c)⎦.
// 1/c is less than 1, so we can't compute it directly in
// integer arithmetic. Let's instead compute 2^e/c
// for a value of e TBD (^ = exponentiation). Then
// ⎣x / c⎦ = ⎣x * (2^e/c) / 2^e⎦.
// Dividing by 2^e is easy. 2^e/c isn't an integer, unfortunately.
// So we must approximate it. Let's call its approximation m.
// We'll then compute
// ⎣x * m / 2^e⎦
// Which we want to be equal to ⎣x / c⎦ for 0 <= x < 2^n-1
// where n is the word size.
// Setting x = c gives us c * m >= 2^e.
// We'll chose m = ⎡2^e/c⎤ to satisfy that equation.
// What remains is to choose e.
// Let m = 2^e/c + delta, 0 <= delta < 1
// ⎣x * (2^e/c + delta) / 2^e⎦
// ⎣x / c + x * delta / 2^e⎦
// We must have x * delta / 2^e < 1/c so that this
// additional term never rounds differently than ⎣x / c⎦ does.
// Rearranging,
// 2^e > x * delta * c
// x can be at most 2^n-1 and delta can be at most 1.
// So it is sufficient to have 2^e >= 2^n*c.
// So we'll choose e = n + s, with s = ⎡log2(c)⎤.
//
// An additional complication arises because m has n+1 bits in it.
// Hardware restricts us to n bit by n bit multiplies.
// We divide into 3 cases:
//
// Case 1: m is even.
// ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
// ⎣x / c⎦ = ⎣x * (m/2) / 2^(n+s-1)⎦
// ⎣x / c⎦ = ⎣x * (m/2) / 2^n / 2^(s-1)⎦
// ⎣x / c⎦ = ⎣⎣x * (m/2) / 2^n⎦ / 2^(s-1)⎦
// multiply + shift
//
// Case 2: c is even.
// ⎣x / c⎦ = ⎣(x/2) / (c/2)⎦
// ⎣x / c⎦ = ⎣⎣x/2⎦ / (c/2)⎦
// This is just the original problem, with x' = ⎣x/2⎦, c' = c/2, n' = n-1.
// s' = s-1
// m' = ⎡2^(n'+s')/c'⎤
// = ⎡2^(n+s-1)/c⎤
// = ⎡m/2⎤
// ⎣x / c⎦ = ⎣x' * m' / 2^(n'+s')⎦
// ⎣x / c⎦ = ⎣⎣x/2⎦ * ⎡m/2⎤ / 2^(n+s-2)⎦
// ⎣x / c⎦ = ⎣⎣⎣x/2⎦ * ⎡m/2⎤ / 2^n⎦ / 2^(s-2)⎦
// shift + multiply + shift
//
// Case 3: everything else
// let k = m - 2^n. k fits in n bits.
// ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
// ⎣x / c⎦ = ⎣x * (2^n + k) / 2^(n+s)⎦
// ⎣x / c⎦ = ⎣(x + x * k / 2^n) / 2^s⎦
// ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦
// ⎣x / c⎦ = ⎣(x + ⎣x * k / 2^n⎦) / 2^s⎦
// ⎣x / c⎦ = ⎣⎣(x + ⎣x * k / 2^n⎦) / 2⎦ / 2^(s-1)⎦
// multiply + avg + shift
//
// These can be implemented in hardware using:
// ⎣a * b / 2^n⎦ - aka high n bits of an n-bit by n-bit multiply.
// ⎣(a+b) / 2⎦ - aka "average" of two n-bit numbers.
// (Not just a regular add & shift because the intermediate result
// a+b has n+1 bits in it. Nevertheless, can be done
// in 2 instructions on x86.)
// umagicOK returns whether we should strength reduce a n-bit divide by c.
func umagicOK(n uint, c int64) bool {
// Convert from ConstX auxint values to the real uint64 constant they represent.
d := uint64(c) << (64 - n) >> (64 - n)
// Doesn't work for 0.
// Don't use for powers of 2.
return d&(d-1) != 0
}
type umagicData struct {
s int64 // ⎡log2(c)⎤
m uint64 // ⎡2^(n+s)/c⎤ - 2^n
}
// umagic computes the constants needed to strength reduce unsigned n-bit divides by the constant uint64(c).
// The return values satisfy for all 0 <= x < 2^n
// floor(x / uint64(c)) = x * (m + 2^n) >> (n+s)
func umagic(n uint, c int64) umagicData {
// Convert from ConstX auxint values to the real uint64 constant they represent.
d := uint64(c) << (64 - n) >> (64 - n)
C := new(big.Int).SetUint64(d)
s := C.BitLen()
M := big.NewInt(1)
M.Lsh(M, n+uint(s)) // 2^(n+s)
M.Add(M, C) // 2^(n+s)+c
M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
M.Div(M, C) // ⎡2^(n+s)/c⎤
if M.Bit(int(n)) != 1 {
panic("n+1st bit isn't set")
}
M.SetBit(M, int(n), 0)
m := M.Uint64()
return umagicData{s: int64(s), m: m}
}
// For signed division, we use a similar strategy.
// First, we enforce a positive c.
// x / c = -(x / (-c))
// This will require an additional Neg op for c<0.
//
// If x is positive we're in a very similar state
// to the unsigned case above. We define:
// s = ⎡log2(c)⎤-1
// m = ⎡2^(n+s)/c⎤
// Then
// ⎣x / c⎦ = ⎣x * m / 2^(n+s)⎦
// If x is negative we have
// ⎡x / c⎤ = ⎣x * m / 2^(n+s)⎦ + 1
// (TODO: derivation?)
//
// The multiply is a bit odd, as it is a signed n-bit value
// times an unsigned n-bit value. For n smaller than the
// word size, we can extend x and m appropriately and use the
// signed multiply instruction. For n == word size,
// we must use the signed multiply high and correct
// the result by adding x*2^n.
//
// Adding 1 if x<0 is done by subtracting x>>(n-1).
func smagicOK(n uint, c int64) bool {
if c < 0 {
// Doesn't work for negative c.
return false
}
// Doesn't work for 0.
// Don't use it for powers of 2.
return c&(c-1) != 0
}
type smagicData struct {
s int64 // ⎡log2(c)⎤-1
m uint64 // ⎡2^(n+s)/c⎤
}
// magic computes the constants needed to strength reduce signed n-bit divides by the constant c.
// Must have c>0.
// The return values satisfy for all -2^(n-1) <= x < 2^(n-1)
// trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0)
func smagic(n uint, c int64) smagicData {
C := new(big.Int).SetInt64(c)
s := C.BitLen() - 1
M := big.NewInt(1)
M.Lsh(M, n+uint(s)) // 2^(n+s)
M.Add(M, C) // 2^(n+s)+c
M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
M.Div(M, C) // ⎡2^(n+s)/c⎤
if M.Bit(int(n)) != 0 {
panic("n+1st bit is set")
}
if M.Bit(int(n-1)) == 0 {
panic("nth bit is not set")
}
m := M.Uint64()
return smagicData{s: int64(s), m: m}
}