blob: 6ece599a4b54efa952d1a20c24c8afc8b21ecbdb [file] [log] [blame]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
// shiftVU generates lshVU and rshVU, which do
// z, c = x << s and z, c = x >> s, for 0 < s < _W.
func shiftVU(a *Asm, name string) {
// Because these routines can be called for z.Lsh(z, N) and z.Rsh(z, N),
// the input and output slices may be aliased at different offsets.
// For example (on 64-bit systems), during z.Lsh(z, 65), &z[0] == &x[1],
// and during z.Rsh(z, 65), &z[1] == &x[0].
// For left shift, we must process the slices from len(z)-1 down to 0,
// so that we don't overwrite a word before we need to read it.
// For right shift, we must process the slices from 0 up to len(z)-1.
// The different traversals at least make the two cases more consistent,
// since we're always delaying the output by one word compared
// to the input.
f := a.Func("func " + name + "(z, x []Word, s uint) (c Word)")
// Check for no input early, since we need to start by reading 1 word.
n := f.Arg("z_len")
a.JmpZero(n, "ret0")
// Start loop by reading first input word.
s := f.ArgHint("s", HintShiftCount)
p := f.Pipe()
if name == "lshVU" {
p.SetBackward()
}
unroll := []int{1, 4}
if a.Arch == Arch386 {
unroll = []int{1} // too few registers for more
p.SetUseIndexCounter()
}
p.LoadPtrs(n)
a.Comment("shift first word into carry")
prev := p.LoadN(1)[0][0]
// Decide how to shift. On systems with a wide shift (x86), use that.
// Otherwise, we need shift by s and negative (reverse) shift by 64-s or 32-s.
shift := a.Lsh
shiftWide := a.LshWide
negShift := a.Rsh
negShiftReg := a.RshReg
if name == "rshVU" {
shift = a.Rsh
shiftWide = a.RshWide
negShift = a.Lsh
negShiftReg = a.LshReg
}
if a.Arch.HasShiftWide() {
// Use wide shift to avoid needing negative shifts.
// The invariant is that prev holds the previous word (not shifted at all),
// to be used as input into the wide shift.
// After the loop finishes, prev holds the final output word to be written.
c := a.Reg()
shiftWide(s, prev, a.Imm(0), c)
f.StoreArg(c, "c")
a.Free(c)
a.Comment("shift remaining words")
p.Start(n, unroll...)
p.Loop(func(in [][]Reg, out [][]Reg) {
// We reuse the input registers as output, delayed one cycle; prev is the first output.
// After writing the outputs to memory, we can copy the final x value into prev
// for the next iteration.
old := prev
for i, x := range in[0] {
shiftWide(s, x, old, old)
out[0][i] = old
old = x
}
p.StoreN(out)
a.Mov(old, prev)
})
a.Comment("store final shifted bits")
shift(s, prev, prev)
} else {
// Construct values from x << s and x >> (64-s).
// After the first word has been processed, the invariant is that
// prev holds x << s, to be used as the high bits of the next output word,
// once we find the low bits after reading the next input word.
// After the loop finishes, prev holds the final output word to be written.
sNeg := a.Reg()
a.Mov(a.Imm(a.Arch.WordBits), sNeg)
a.Sub(s, sNeg, sNeg, SmashCarry)
c := a.Reg()
negShift(sNeg, prev, c)
shift(s, prev, prev)
f.StoreArg(c, "c")
a.Free(c)
a.Comment("shift remaining words")
p.Start(n, unroll...)
p.Loop(func(in, out [][]Reg) {
if a.HasRegShift() {
// ARM (32-bit) allows shifts in most arithmetic expressions,
// including OR, letting us combine the negShift and a.Or.
// The simplest way to manage the registers is to do StoreN for
// one output at a time, and since we don't use multi-register
// stores on ARM, that doesn't hurt us.
out[0] = out[0][:1]
for _, x := range in[0] {
a.Or(negShiftReg(sNeg, x), prev, prev)
out[0][0] = prev
p.StoreN(out)
shift(s, x, prev)
}
return
}
// We reuse the input registers as output, delayed one cycle; z0 is the first output.
z0 := a.Reg()
z := z0
for i, x := range in[0] {
negShift(sNeg, x, z)
a.Or(prev, z, z)
shift(s, x, prev)
out[0][i] = z
z = x
}
p.StoreN(out)
})
a.Comment("store final shifted bits")
}
p.StoreN([][]Reg{{prev}})
p.Done()
a.Free(s)
a.Ret()
// Return 0, used from above.
a.Label("ret0")
f.StoreArg(a.Imm(0), "c")
a.Ret()
}