blob: 03b35155f536a71353a007f7be44bc4d6a46a46c [file] [edit]
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package arm64
import (
"cmd/internal/obj"
"fmt"
"iter"
"math"
"math/bits"
)
// instEncoder represents an instruction encoder.
type instEncoder struct {
goOp obj.As // Go opcode mnemonic
fixedBits uint32 // Known bits
args []operand // Operands, in Go order
}
type varBits struct {
// The low and high bit index in the binary encoding, exclusive on hi
lo, hi int
encoded bool // If true then its value is already encoded
bits uint32
}
// component is the component of an binary encoding.
// e.g. for operand <Zda>.<T>, <T>'s encoding function might be described as:
//
// For the "Byte and halfword" variant: is the size specifier,
// sz <T>
// 0 B
// 1 H
// bit range mappings:
// sz: [22:23)
//
// Then sz is the component of the binary encoding.
type component uint16
type elemEncoder struct {
fn func(uint32) (uint32, bool)
// comp is the component of the binary encoding.
comp component
}
// operand is the operand type of an instruction.
type operand struct {
class AClass // Operand class, register, constant, memory operation etc.
// The elements that this operand includes, this only includes the encoding-related parts
// They are represented as a list of pointers to the encoding functions.
// The first returned value is the encoded binary, the second is the ok signal.
// The encoding functions return the ok signal for deduplication purposes:
// For example:
// SDOT <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
// SDOT <Zda>.H, <Zn>.B, <Zm>.B
// SDOT <Zda>.S, <Zn>.H, <Zm>.H
//
// <T> and <Tb> are specified in the encoding text, that there is a constraint "T = 4*Tb".
// We don't know this fact by looking at the encoding format solely, without this information
// the first encoding domain entails the other 2. And at instruction matching phase we simply
// cannot deduplicate them. So we defer this deduplication to the encoding phase.
// We need the ok signal with [elemEncoder.comp] field to deduplicate them.
elemEncoders []elemEncoder
}
// opsInProg returns an iterator over the operands ([Addr]) of p
func opsInProg(p *obj.Prog) iter.Seq[*obj.Addr] {
return func(yield func(*obj.Addr) bool) {
// Go order: From, Reg, RestArgs..., To
// For SVE, Reg is unused as it's so common that registers have arrangements.
if p.From.Type != obj.TYPE_NONE {
if !yield(&p.From) {
return
}
}
for j := range p.RestArgs {
if !yield(&p.RestArgs[j].Addr) {
return
}
}
if p.To.Type != obj.TYPE_NONE {
if !yield(&p.To) {
return
}
}
}
}
// aclass returns the AClass of an Addr.
func aclass(a *obj.Addr) AClass {
if a.Type == obj.TYPE_REG {
if a.Offset&(int64(1)<<62) != 0 {
return AC_PREGSEL
}
if a.Reg >= REG_Z0 && a.Reg <= REG_Z31 {
return AC_ZREG
}
if a.Reg >= REG_P0 && a.Reg <= REG_PN15 {
return AC_PREG
}
if a.Reg >= REG_ARNG && a.Reg < REG_ELEM {
return AC_ARNG
}
if a.Reg >= REG_ZARNG && a.Reg < REG_ZARNGELEM {
return AC_ARNG
}
if a.Reg >= REG_ZARNGELEM && a.Reg < REG_PZELEM {
return AC_ARNGIDX
}
if a.Reg >= REG_PZELEM && a.Reg < REG_PARNGZM {
if a.Reg&(1<<5) == 0 {
return AC_ZREGIDX
} else {
return AC_PREGIDX
}
}
if a.Reg >= REG_PARNGZM && a.Reg < REG_PARNGZM_END {
switch (a.Reg >> 5) & 15 {
case PRED_M, PRED_Z:
return AC_PREGZM
default:
return AC_ARNG
}
}
if a.Reg >= REG_V0 && a.Reg <= REG_V31 {
return AC_VREG
}
if a.Reg >= REG_R0 && a.Reg <= REG_R31 || a.Reg == REG_RSP {
return AC_SPZGREG
}
}
if a.Type == obj.TYPE_CONST || a.Type == obj.TYPE_FCONST {
return AC_IMM
}
if a.Type == obj.TYPE_REGLIST {
if a.Scale > 0 {
return AC_REGLIST_RANGE
}
switch (a.Offset >> 12) & 0xf {
case 0x7:
return AC_REGLIST1
case 0xa:
return AC_REGLIST2
case 0x6:
return AC_REGLIST3
case 0x2:
return AC_REGLIST4
}
}
if a.Type == obj.TYPE_MEM {
if a.Index == 0 {
if a.Scale&-32768 != 0 {
return AC_MEMOFFMULVL
}
return AC_MEMOFF
}
return AC_MEMEXT
}
if a.Type == obj.TYPE_SPECIAL {
return AC_SPECIAL
}
panic(fmt.Errorf("unknown AClass, addr = %v\n", a))
}
// addrComponent returns the binary (component) of the stored element in a at index, for operand
// of type aclass.
//
// For example, for operand of type AC_ARNG, it has 2 permissible components (identified by index)
// 0. register: <reg>
// 1. arrangement: <T>
//
// They are stored in a.Reg as:
//
// reg | (arrangement << 5)
//
// More details are in the comments in the switch cases of this function.
func addrComponent(a *obj.Addr, acl AClass, index int) uint32 {
switch acl {
// AClass: AC_PREGSEL
// GNU mnemonic: <preg>.<T>[<selreg>, <imm>]
// Go mnemonic:
// [selreg, $idximm](preg.T)
// Encoding:
// Type = TYPE_REG
// Offset = packed bits: preg (5 bits) | T (4 bits) | selreg (5 bits) | idximm (6 bits) | sentinel (bit 62)
case AC_PREGSEL:
switch index {
case 0:
return uint32(a.Offset & 31)
case 1:
return uint32((a.Offset >> 5) & 15)
case 2:
return uint32((a.Offset>>9)&31 + REG_R0) // encoding functions assume a ARM64 register
case 3:
// This is to check the width of selreg, which is guaranteed to be W in AC_PREGSEL.
// W check always returns true as it's resolved in the generator already.
// So we just return a default value 0 here.
return 0
case 4:
return uint32((a.Offset >> 14) & 63)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_ARNG, AC_PREG, AC_PREGZ, AC_PREGM, AC_ZREG
// GNU mnemonic: <reg>.<T> Or <reg>/<T> (T is M or Z)
// Go mnemonic:
// reg.<T>
// Encoding:
// Type = TYPE_REG
// Reg = reg | (arrangement or predication << 5)
case AC_ARNG, AC_PREG, AC_PREGZM, AC_ZREG:
switch index {
case 0:
return uint32(a.Reg & 31)
case 1:
return uint32((a.Reg >> 5) & 15)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_ARNGIDX, AC_PREGIDX, AC_ZREGIDX
// GNU mnemonic: <reg>.<T>[<index>]
// Go mnemonic:
// reg.T[index]
// Encoding:
// Type = TYPE_REG
// Reg = reg | (arrangement << 5)
// Index = index
case AC_ARNGIDX, AC_PREGIDX, AC_ZREGIDX:
switch index {
case 0:
return uint32(a.Reg & 31)
case 1:
// Arrangement
return uint32((a.Reg >> 5) & 15)
case 2:
// Index
return uint32(a.Index)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_SPZGREG, AC_VREG
// GNU mnemonic: <width><reg>
// Go mnemonic:
// reg (the width is already represented in the opcode)
// Encoding:
// Type = TYPE_REG
// Reg = reg
case AC_SPZGREG, AC_VREG:
switch index {
case 0:
// These are all width checks, they should map to no-op checks altogether.
return 0
case 1:
return uint32(a.Reg)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_IMM
// GNU mnemonic: <imm>, <shift>
// Go mnemonic:
// $imm<<shift
// Encoding:
// Type = TYPE_CONST or TYPE_FCONST
// Offset = imm (shift already applied)
case AC_IMM:
switch index {
case 0:
if a.Type == obj.TYPE_FCONST {
switch v := a.Val.(type) {
case float64:
return math.Float32bits(float32(v))
default:
panic(fmt.Errorf("unknown float immediate value %v", a.Val))
}
}
return uint32(a.Offset)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_REGLIST1, AC_REGLIST2, AC_REGLIST3, AC_REGLIST4, AC_REGLIST_RANGE
// GNU mnemonic: {reg1.T, reg2.T, ...}
// Go mnemonic:
// [reg1.T, reg2.T, ...]
// Encoding:
// Type = TYPE_REGLIST
// Offset = register prefix | register count | arrangement (opcode) | first register
// Scale = range size - 1 (if REGLIST_RANGE)
case AC_REGLIST1, AC_REGLIST2, AC_REGLIST3, AC_REGLIST4, AC_REGLIST_RANGE:
firstReg := int(a.Offset & 31)
prefix := a.Offset >> 32 & 0b11
sum := 32
if prefix == 2 {
sum = 16
}
switch acl {
case AC_REGLIST1:
if index > 2 {
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
case AC_REGLIST2:
if index > 4 {
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
case AC_REGLIST3:
if index > 6 {
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
case AC_REGLIST4:
if index > 8 {
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
case AC_REGLIST_RANGE:
// It behaves just like a AC_REGLIST2
if index > 4 {
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
}
switch index % 2 {
case 0:
// register
if a.Scale > 0 {
// For register ranges in SVE we allow discontiguous registers.
return uint32((firstReg + (index/2)*int(a.Scale)) % sum)
}
return uint32((firstReg + index/2) % sum)
case 1:
// arrangement
curQ := a.Offset >> 30 & 0b11
curSize := a.Offset >> 10 & 0b11
switch curQ {
case 0:
switch curSize {
case 0:
return ARNG_8B
case 1:
return ARNG_4H
case 2:
return ARNG_2S
case 3:
return ARNG_1D
default:
panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
}
case 1:
switch curSize {
case 0:
return ARNG_16B
case 1:
return ARNG_8H
case 2:
return ARNG_4S
case 3:
return ARNG_2D
default:
panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
}
case 2:
switch curSize {
case 1:
return ARNG_B
case 2:
return ARNG_H
case 3:
return ARNG_S
default:
panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
}
case 3:
switch curSize {
case 1:
return ARNG_D
case 2:
return ARNG_Q
default:
panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
}
default:
panic(fmt.Errorf("unknown Q value at %d in AClass %d", index, acl))
}
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_SPECIAL
// GNU mnemonic: <special>
// Go mnemonic:
// special
// Encoding:
// Type = TYPE_SPECIAL
// Offset = SpecialOperand enum value
case AC_SPECIAL:
switch index {
case 0:
return uint32(a.Offset)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_MEMOFF, AC_MEMOFFMULVL
// GNU mnemonic: [<reg>.<T>, #<imm>]
// Go mnemonic:
// imm(reg.T)
// Encoding:
// Type = TYPE_MEM
// Reg = Base register (with arrangement if applicable)
// Offset = Immediate offset
case AC_MEMOFF, AC_MEMOFFMULVL:
switch index {
case 0:
return uint32(a.Reg & 31)
case 1:
return uint32((a.Reg >> 5) & 15)
case 2:
return uint32(a.Offset)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
// AClass: AC_MEMEXT
// GNU mnemonic: [<reg1>.<T1>, <reg2>.<T2>, <mod> <amount>]
// Go mnemonic:
// (reg2.T2.mod<<amount)(reg1.T1)
// Encoding:
// Type = TYPE_MEM
// Reg = Index register (with arrangement if applicable)
// Index = Base register (with arrangement if applicable)
// Scale = Packed mod and amount
case AC_MEMEXT:
switch index {
case 0:
return uint32(a.Index)
case 1:
return uint32((a.Index >> 5) & 15)
case 2:
return uint32(a.Reg)
case 3:
return uint32((a.Reg >> 5) & 15)
case 4:
// mod is either 1 (UXTW), 2 (SXTW), or 4 (LSL)
mod := uint32((a.Scale >> 9) & 0x7)
amount := uint32((a.Scale >> 12) & 0x7)
if mod == 0 && amount > 0 {
// LSL is implied when no extension is specified but amount > 0
mod |= 1 << 2
}
return mod
case 5:
return uint32((a.Scale >> 12) & 0x7)
default:
panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
}
}
// TODO: handle more AClasses.
panic(fmt.Errorf("unknown AClass %d", acl))
}
var codeI1Tsz uint32 = 0xffffffff
var codeImm2Tsz uint32 = 0xfffffffe
var codeShift161919212223 uint32 = 0xfffffffd
var codeShift161919212224 uint32 = 0xfffffffc
var codeShift588102224 uint32 = 0xfffffffb
var codeLogicalImmArrEncoding uint32 = 0xfffffffa
var codeImm3Tsize1621 uint32 = 0xfffffff9
var codeShiftI1TszhTszl uint32 = 0xfffffff8
var codeNoOp uint32 = 0xfffffff7
// encodeI1Tsz is the implementation of the following encoding logic:
// Is the immediate index, in the range 0 to one less than the number of elements in 128 bits, encoded in "i1:tsz".
// bit range mappings:
// i1: [20:21)
// tsz: [16:20)
// Note:
//
// arr is the arrangement.
// This encoding is aligned to the high bit of the box, according to the spec.
func encodeI1Tsz(v, arr uint32) (uint32, bool) {
switch arr {
case ARNG_B:
if v > 15 {
return 0, false
}
return v << 17, true
case ARNG_H:
if v > 7 {
return 0, false
}
return v << 18, true
case ARNG_S:
if v > 3 {
return 0, false
}
return v << 19, true
case ARNG_D:
if v > 1 {
return 0, false
}
return v << 20, true
case ARNG_Q:
if v > 0 {
return 0, false
}
return 0, true
default:
return 0, false
}
}
// encodeImm2Tsz is the implementation of the following encoding logic:
// Is the immediate index, in the range 0 to one less than the number of elements in 512 bits, encoded in "imm2:tsz".
// bit range mappings:
// imm2: [22:24)
// tsz: [16:21)
// Note:
//
// arr is the arrangement.
// This encoding is aligned to the high bit of the box, according to the spec.
func encodeImm2Tsz(v, arr uint32) (uint32, bool) {
switch arr {
case ARNG_B:
if v > 63 {
return 0, false
}
v <<= 1
return (v&31)<<16 | (v>>5)<<22, true
case ARNG_H:
if v > 31 {
return 0, false
}
v <<= 2
return (v&31)<<16 | (v>>5)<<22, true
case ARNG_S:
if v > 15 {
return 0, false
}
v <<= 3
return (v&31)<<16 | (v>>5)<<22, true
case ARNG_D:
if v > 7 {
return 0, false
}
v <<= 4
return (v&31)<<16 | (v>>5)<<22, true
case ARNG_Q:
if v > 3 {
return 0, false
}
v <<= 5
return (v&31)<<16 | (v>>5)<<22, true
default:
return 0, false
}
}
type arrAlignType int
const (
arrAlignBHSD arrAlignType = iota
arrAlignHSD
arrAlignBHS
)
// encodeShiftTriple encodes an shift immediate value in "tszh:tszl:imm3".
// tszh, tszl, imm3 are in ranges, sorted by bit position.
// These shifts are also bounded by arrangement element size.
func encodeShiftTriple(v uint32, r [6]int, prevAddr *obj.Addr, op obj.As) (uint32, bool) {
// The previous op must be a scalable vector, and we need its arrangement.
acl := aclass(prevAddr)
if acl != AC_ARNG {
return 0, false
}
arr := addrComponent(prevAddr, acl, 1) // Get arrangement
elemBits := uint32(0)
switch arr {
case ARNG_B:
elemBits = 8
case ARNG_H:
elemBits = 16
case ARNG_S:
elemBits = 32
case ARNG_D:
elemBits = 64
default:
return 0, false
}
if v >= elemBits {
return 0, false
}
var C uint32
// Unfortunately these information are in the decoding ASL.
// For these instructions, the esize (see comment in the switch below)
// is derived from the destination arrangement, however how this function is called is deriving
// the esize from one of the source.
// We need to address this discrepancy.
effectiveEsize := elemBits
switch op {
case AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT,
AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT:
effectiveEsize = elemBits / 2
}
switch op {
case AZASR, AZLSR, AZURSHR, AZASRD,
AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT,
AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZSRSHR, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT,
AZURSRA, AZUSRA, AZXAR, AZSRI, AZSRSRA, AZSSRA:
// ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3);
if v == 0 {
return 0, false
}
C = (2 * effectiveEsize) - v
default:
// ASL: let shift : integer = UInt(tsize::imm3) - esize;
C = effectiveEsize + v
}
var chunks [3]uint32
for i := 0; i < 6; i += 2 {
chunks[i/2] = C & ((1 << (r[i+1] - r[i])) - 1)
C >>= (r[i+1] - r[i])
}
return uint32((chunks[0] << r[0]) |
(chunks[1] << r[2]) |
(chunks[2] << r[4])), true
}
// encodeLogicalImmEncoding is the implementation of the following encoding logic:
// Is the size specifier,
// imm13 <T>
// 0xxxxxx0xxxxx S
// 0xxxxxx10xxxx H
// 0xxxxxx110xxx B
// 0xxxxxx1110xx B
// 0xxxxxx11110x B
// 0xxxxxx11111x RESERVED
// 1xxxxxxxxxxxx D
// At the meantime:
// Is a 64, 32, 16 or 8-bit bitmask consisting of replicated 2, 4, 8, 16, 32 or 64 bit fields,
// each field containing a rotated run of non-zero bits, encoded in the "imm13" field.
//
// bit range mappings:
// imm13: [5:18)
//
// ARM created a "clever" recipe that can generate useful repeating 8-64 bit bitmasks.
// Instead of storing the literal binary number, the processor reads a 13-bit recipe
// using three fields (bits from high to low):
// N (1 bit), immr (6 bits), and imms (6 bits).
//
// How the recipe works:
// Every logical immediate represents a repeating pattern (like repeating tiles). The processor
// uses the three fields to figure out the size of the tile, how many 1s are in the tile, and
// how far to rotate it.
// The N bit combined with the upper bits of imms determines the width of the repeating block.
// Depending on these bits, the fundamental block can be 2, 4, 8, 16, 32, or 64 bits wide.
// The lower bits of imms dictate exactly how many contiguous 1s exist inside that block.
// The immr value tells the processor how many bits to rotate that block to the right.
// Finally, the resulting block is duplicated to fill a standard 64-bit lane.
func encodeLogicalImmArrEncoding(v uint64, adjacentAddr *obj.Addr) (uint32, bool) {
acl := aclass(adjacentAddr)
if acl != AC_ARNG {
return 0, false
}
arr := addrComponent(adjacentAddr, acl, 1)
// Replicate the given immediate to fill a full 64-bit lane.
// This ensures our pattern-shrinking logic naturally respects the vector lane bounds.
var val uint64
switch arr {
case ARNG_B: // 8-bit lane
v8 := uint64(v & 0xFF)
val = v8 * 0x0101010101010101
case ARNG_H: // 16-bit lane
v16 := uint64(v & 0xFFFF)
val = v16 * 0x0001000100010001
case ARNG_S: // 32-bit lane
v32 := uint64(v)
val = v32 | (v32 << 32)
case ARNG_D: // 64-bit lane
val = uint64(v)
default:
return 0, false
}
// Reject all zeros or all ones (handled by MOV/EOR, invalid for AND/ORR immediates)
if val == 0 || val == ^uint64(0) {
return 0, false
}
// Find the absolute smallest repeating pattern size (64 down to 2)
size := uint64(64)
for size > 2 {
half := size / 2
mask := (uint64(1) << half) - 1
lower := val & mask
upper := (val >> half) & mask
// If the top half matches the bottom half, shrink our window
if lower == upper {
size = half
val = lower
} else {
break
}
}
// Count the contiguous ones in this minimal pattern
mask := (uint64(1) << size) - 1
val &= mask
ones := bits.OnesCount64(val)
// Find the right-rotation (rot) needed to align the 1s at the bottom
expected := (uint64(1) << ones) - 1
rot := -1
for r := 0; r < int(size); r++ {
// Right rotate 'val' by 'r' bits within a 'size'-bit window
rotated := ((val >> r) | (val << (int(size) - r))) & mask
if rotated == expected {
rot = r
break
}
}
if rot == -1 {
return 0, false
}
// immr is the amount the hardware must right-rotate the base pattern.
// Since 'rot' is how much we right-rotated the target to find the base,
// the hardware needs the inverse rotation.
immr := uint32((int(size) - rot) % int(size))
// If we couldn't find a rotation that forms a perfect contiguous block of 1s, it's invalid.
if rot == -1 {
return 0, false
}
// Encode N, immr, and imms
n := uint32(0)
if size == 64 {
n = 1
}
// The imms prefix is mathematically generated by (~(size*2 - 1) & 0x3F).
// We then OR it with the number of ones (minus 1).
imms := (uint32(^(size*2 - 1)) & 0x3F) | uint32(ones-1)
// Construct the final 13-bit field: N (1) | immr (6) | imms (6)
imm13 := (n << 12) | (immr << 6) | imms
// Shift by 5 to place imm13 into instruction bits [5:17]
return imm13 << 5, true
}
// encodeImm3Tsize1621 is the implementation of the following encoding logic:
// Is the immediate shift amount, in the range 1 to number of bits per element, encoded in "tsize:imm3".
// bit range mappings:
// imm3: [16:19)
// tsize: [19:21)
//
// srcArr is the <Tb> in the source reglist (ranged).
func encodeImm3Tsize1621(v uint32, srcArr uint32) (uint32, bool) {
// From ARM ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3);
// Then tsize::imm3 = (2 * esize) - shift.
var size uint32
switch srcArr {
case ARNG_H:
// It's the destination size, which is half the source.
size = 8
case ARNG_S:
size = 16
default:
return 0, false
}
if v < 1 || v > size {
return 0, false
}
return (((2*size - v) & 0x1f) << 16), true
}
// encodeShiftI1TszhTszl is the implementation of the following encoding logic:
// Is the element index, in the range 0 to one less than the number of vector elements in a 128-bit vector register, encoded in "i1:tszh:tszl".
// bit range mappings:
// i1: [23:24)
// tszh: [22:23)
// tszl: [18:21)
//
// arr is the arrangement
func encodeShiftI1TszhTszl(v uint32, arr uint32) (uint32, bool) {
var shift, max uint32
switch arr {
case ARNG_B:
shift = 1
max = 16
case ARNG_H:
shift = 2
max = 8
case ARNG_S:
shift = 3
max = 4
case ARNG_D:
shift = 4
max = 2
default:
return 0, false
}
if v >= max {
return 0, false
}
v <<= shift
return ((v & 0x7) << 18) | ((v >> 3) << 22), true
}
// tryEncode tries to encode p with i, it returns the encoded binary and ok signal.
func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) {
bin := i.fixedBits
// Some elements are encoded in the same component, they need to be equal.
// For example { <Zn1>.<Tb>-<Zn2>.<Tb> }.
// The 2 instances of <Tb> must encode to the same value.
encoded := map[component]uint32{}
var addrs []*obj.Addr
for addr := range opsInProg(p) {
addrs = append(addrs, addr)
}
if len(addrs) != len(i.args) {
return 0, false
}
for opIdx, addr := range addrs {
if opIdx >= len(i.args) {
return 0, false
}
op := i.args[opIdx]
acl := aclass(addr)
if acl != op.class {
return 0, false
}
for i, enc := range op.elemEncoders {
val := addrComponent(addr, acl, i)
if (p.As == AZFCPY || p.As == AZFDUP) && acl == AC_IMM {
// These instructions expects ARM's 8-bit float encoding.
// Reinterpret the uint32 bits back as a float32, then convert to float64 for chipfloat7
fval := float64(math.Float32frombits(val))
encode := (&ctxt7{}).chipfloat7(fval)
if encode == -1 {
// Handle error or return false to indicate mismatch
return 0, false
}
val = uint32(encode)
}
if b, ok := enc.fn(val); ok || b != 0 {
specialB := uint32(b)
if !ok {
specialB = b
switch b {
case codeI1Tsz:
b, ok = encodeI1Tsz(val, addrComponent(addr, acl, i-1))
case codeImm2Tsz:
b, ok = encodeImm2Tsz(val, addrComponent(addr, acl, i-1))
case codeShift161919212223:
b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 23}, addrs[opIdx+1], p.As)
case codeShift161919212224:
b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 24}, addrs[opIdx+1], p.As)
case codeShift588102224:
b, ok = encodeShiftTriple(val, [6]int{5, 8, 8, 10, 22, 24}, addrs[opIdx+1], p.As)
case codeLogicalImmArrEncoding:
// Now that we know this is an immediate.
// ARM64 allows imm13 to encode up to 64 bits of immediates.
// addrComponent is not the right fit here, we need to extract [Offset] fields manually.
b, ok = encodeLogicalImmArrEncoding(uint64(addr.Offset), addrs[opIdx+1])
case codeImm3Tsize1621:
b, ok = encodeImm3Tsize1621(val, addrComponent(addrs[opIdx+1], aclass(addrs[opIdx+1]), 1))
case codeShiftI1TszhTszl:
b, ok = encodeShiftI1TszhTszl(val, addrComponent(addr, AC_PREGSEL, 1))
case codeNoOp:
b, ok = 0, true
default:
panic(fmt.Errorf("unknown encoding function code %d", b))
}
}
if !ok {
return 0, false
}
bin |= b
if _, ok := encoded[enc.comp]; ok && b != encoded[enc.comp] {
if specialB == codeNoOp {
// NoOp encodings don't need checks.
continue
}
return 0, false
}
if enc.comp != enc_NIL && specialB != codeNoOp {
// NoOp encodings don't need bookkeeping.
encoded[enc.comp] = b
}
} else {
return 0, false
}
}
}
return bin, true
}