// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package arm64

import (
	"cmd/internal/obj"
	"fmt"
	"iter"
	"math"
	"math/bits"
)

// instEncoder represents an instruction encoder.
type instEncoder struct {
	goOp      obj.As    // Go opcode mnemonic
	fixedBits uint32    // Known bits
	args      []operand // Operands, in Go order
}

type varBits struct {
	// The low and high bit index in the binary encoding, exclusive on hi
	lo, hi  int
	encoded bool // If true then its value is already encoded
	bits    uint32
}

// component is the component of an binary encoding.
// e.g. for operand <Zda>.<T>, <T>'s encoding function might be described as:
//
//	For the "Byte and halfword" variant: is the size specifier,
//	sz	<T>
//	0	B
//	1	H
//	bit range mappings:
//	sz: [22:23)
//
// Then sz is the component of the binary encoding.
type component uint16

type elemEncoder struct {
	fn func(uint32) (uint32, bool)
	// comp is the component of the binary encoding.
	comp component
}

// operand is the operand type of an instruction.
type operand struct {
	class AClass // Operand class, register, constant, memory operation etc.
	// The elements that this operand includes, this only includes the encoding-related parts
	// They are represented as a list of pointers to the encoding functions.
	// The first returned value is the encoded binary, the second is the ok signal.
	// The encoding functions return the ok signal for deduplication purposes:
	// For example:
	//	SDOT  <Zda>.<T>, <Zn>.<Tb>, <Zm>.<Tb>
	//	SDOT  <Zda>.H, <Zn>.B, <Zm>.B
	//	SDOT  <Zda>.S, <Zn>.H, <Zm>.H
	//
	// <T> and <Tb> are specified in the encoding text, that there is a constraint "T = 4*Tb".
	// We don't know this fact by looking at the encoding format solely, without this information
	// the first encoding domain entails the other 2. And at instruction matching phase we simply
	// cannot deduplicate them. So we defer this deduplication to the encoding phase.
	// We need the ok signal with [elemEncoder.comp] field to deduplicate them.
	elemEncoders []elemEncoder
}

// opsInProg returns an iterator over the operands ([Addr]) of p
func opsInProg(p *obj.Prog) iter.Seq[*obj.Addr] {
	return func(yield func(*obj.Addr) bool) {
		// Go order: From, Reg, RestArgs..., To
		// For SVE, Reg is unused as it's so common that registers have arrangements.
		if p.From.Type != obj.TYPE_NONE {
			if !yield(&p.From) {
				return
			}
		}
		for j := range p.RestArgs {
			if !yield(&p.RestArgs[j].Addr) {
				return
			}
		}
		if p.To.Type != obj.TYPE_NONE {
			if !yield(&p.To) {
				return
			}
		}
	}
}

// aclass returns the AClass of an Addr.
func aclass(a *obj.Addr) AClass {
	if a.Type == obj.TYPE_REG {
		if a.Offset&(int64(1)<<62) != 0 {
			return AC_PREGSEL
		}
		if a.Reg >= REG_Z0 && a.Reg <= REG_Z31 {
			return AC_ZREG
		}
		if a.Reg >= REG_P0 && a.Reg <= REG_PN15 {
			return AC_PREG
		}
		if a.Reg >= REG_ARNG && a.Reg < REG_ELEM {
			return AC_ARNG
		}
		if a.Reg >= REG_ZARNG && a.Reg < REG_ZARNGELEM {
			return AC_ARNG
		}
		if a.Reg >= REG_ZARNGELEM && a.Reg < REG_PZELEM {
			return AC_ARNGIDX
		}
		if a.Reg >= REG_PZELEM && a.Reg < REG_PARNGZM {
			if a.Reg&(1<<5) == 0 {
				return AC_ZREGIDX
			} else {
				return AC_PREGIDX
			}
		}
		if a.Reg >= REG_PARNGZM && a.Reg < REG_PARNGZM_END {
			switch (a.Reg >> 5) & 15 {
			case PRED_M, PRED_Z:
				return AC_PREGZM
			default:
				return AC_ARNG
			}
		}
		if a.Reg >= REG_V0 && a.Reg <= REG_V31 {
			return AC_VREG
		}
		if a.Reg >= REG_R0 && a.Reg <= REG_R31 || a.Reg == REG_RSP {
			return AC_SPZGREG
		}
	}
	if a.Type == obj.TYPE_CONST || a.Type == obj.TYPE_FCONST {
		return AC_IMM
	}
	if a.Type == obj.TYPE_REGLIST {
		if a.Scale > 0 {
			return AC_REGLIST_RANGE
		}
		switch (a.Offset >> 12) & 0xf {
		case 0x7:
			return AC_REGLIST1
		case 0xa:
			return AC_REGLIST2
		case 0x6:
			return AC_REGLIST3
		case 0x2:
			return AC_REGLIST4
		}
	}
	if a.Type == obj.TYPE_MEM {
		if a.Index == 0 {
			if a.Scale&-32768 != 0 {
				return AC_MEMOFFMULVL
			}
			return AC_MEMOFF
		}
		return AC_MEMEXT
	}
	if a.Type == obj.TYPE_SPECIAL {
		return AC_SPECIAL
	}
	panic(fmt.Errorf("unknown AClass, addr = %v\n", a))
}

// addrComponent returns the binary (component) of the stored element in a at index, for operand
// of type aclass.
//
// For example, for operand of type AC_ARNG, it has 2 permissible components (identified by index)
//  0. register: <reg>
//  1. arrangement: <T>
//
// They are stored in a.Reg as:
//
//	reg | (arrangement << 5)
//
// More details are in the comments in the switch cases of this function.
func addrComponent(a *obj.Addr, acl AClass, index int) uint32 {
	switch acl {
	//	AClass: AC_PREGSEL
	//	GNU mnemonic: <preg>.<T>[<selreg>, <imm>]
	//	Go mnemonic:
	//		[selreg, $idximm](preg.T)
	//	Encoding:
	//		Type = TYPE_REG
	// 		Offset = packed bits: preg (5 bits) | T (4 bits) | selreg (5 bits) | idximm (6 bits) | sentinel (bit 62)
	case AC_PREGSEL:
		switch index {
		case 0:
			return uint32(a.Offset & 31)
		case 1:
			return uint32((a.Offset >> 5) & 15)
		case 2:
			return uint32((a.Offset>>9)&31 + REG_R0) // encoding functions assume a ARM64 register
		case 3:
			// This is to check the width of selreg, which is guaranteed to be W in AC_PREGSEL.
			// W check always returns true as it's resolved in the generator already.
			// So we just return a default value 0 here.
			return 0
		case 4:
			return uint32((a.Offset >> 14) & 63)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_ARNG, AC_PREG, AC_PREGZ, AC_PREGM, AC_ZREG
	//	GNU mnemonic: <reg>.<T> Or <reg>/<T> (T is M or Z)
	//	Go mnemonic:
	//		reg.<T>
	//	Encoding:
	//		Type = TYPE_REG
	// 		Reg = reg | (arrangement or predication << 5)
	case AC_ARNG, AC_PREG, AC_PREGZM, AC_ZREG:
		switch index {
		case 0:
			return uint32(a.Reg & 31)
		case 1:
			return uint32((a.Reg >> 5) & 15)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_ARNGIDX, AC_PREGIDX, AC_ZREGIDX
	//	GNU mnemonic: <reg>.<T>[<index>]
	//	Go mnemonic:
	//		reg.T[index]
	//	Encoding:
	//		Type = TYPE_REG
	// 		Reg = reg | (arrangement << 5)
	//		Index = index
	case AC_ARNGIDX, AC_PREGIDX, AC_ZREGIDX:
		switch index {
		case 0:
			return uint32(a.Reg & 31)
		case 1:
			// Arrangement
			return uint32((a.Reg >> 5) & 15)
		case 2:
			// Index
			return uint32(a.Index)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_SPZGREG, AC_VREG
	//	GNU mnemonic: <width><reg>
	//	Go mnemonic:
	//		reg (the width is already represented in the opcode)
	//	Encoding:
	//		Type = TYPE_REG
	// 		Reg = reg
	case AC_SPZGREG, AC_VREG:
		switch index {
		case 0:
			// These are all width checks, they should map to no-op checks altogether.
			return 0
		case 1:
			return uint32(a.Reg)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_IMM
	//	GNU mnemonic: <imm>, <shift>
	//	Go mnemonic:
	//		$imm<<shift
	//	Encoding:
	//		Type = TYPE_CONST or TYPE_FCONST
	//		Offset = imm (shift already applied)
	case AC_IMM:
		switch index {
		case 0:
			if a.Type == obj.TYPE_FCONST {
				switch v := a.Val.(type) {
				case float64:
					return math.Float32bits(float32(v))
				default:
					panic(fmt.Errorf("unknown float immediate value %v", a.Val))
				}
			}
			return uint32(a.Offset)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_REGLIST1, AC_REGLIST2, AC_REGLIST3, AC_REGLIST4, AC_REGLIST_RANGE
	//	GNU mnemonic: {reg1.T, reg2.T, ...}
	//	Go mnemonic:
	//		[reg1.T, reg2.T, ...]
	//	Encoding:
	//		Type = TYPE_REGLIST
	// 		Offset = register prefix | register count | arrangement (opcode) | first register
	//		Scale = range size - 1 (if REGLIST_RANGE)
	case AC_REGLIST1, AC_REGLIST2, AC_REGLIST3, AC_REGLIST4, AC_REGLIST_RANGE:
		firstReg := int(a.Offset & 31)
		prefix := a.Offset >> 32 & 0b11
		sum := 32
		if prefix == 2 {
			sum = 16
		}
		switch acl {
		case AC_REGLIST1:
			if index > 2 {
				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
			}
		case AC_REGLIST2:
			if index > 4 {
				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
			}
		case AC_REGLIST3:
			if index > 6 {
				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
			}
		case AC_REGLIST4:
			if index > 8 {
				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
			}
		case AC_REGLIST_RANGE:
			// It behaves just like a AC_REGLIST2
			if index > 4 {
				panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
			}
		}
		switch index % 2 {
		case 0:
			// register
			if a.Scale > 0 {
				// For register ranges in SVE we allow discontiguous registers.
				return uint32((firstReg + (index/2)*int(a.Scale)) % sum)
			}
			return uint32((firstReg + index/2) % sum)
		case 1:
			// arrangement
			curQ := a.Offset >> 30 & 0b11
			curSize := a.Offset >> 10 & 0b11
			switch curQ {
			case 0:
				switch curSize {
				case 0:
					return ARNG_8B
				case 1:
					return ARNG_4H
				case 2:
					return ARNG_2S
				case 3:
					return ARNG_1D
				default:
					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
				}
			case 1:
				switch curSize {
				case 0:
					return ARNG_16B
				case 1:
					return ARNG_8H
				case 2:
					return ARNG_4S
				case 3:
					return ARNG_2D
				default:
					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
				}
			case 2:
				switch curSize {
				case 1:
					return ARNG_B
				case 2:
					return ARNG_H
				case 3:
					return ARNG_S
				default:
					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
				}
			case 3:
				switch curSize {
				case 1:
					return ARNG_D
				case 2:
					return ARNG_Q
				default:
					panic(fmt.Errorf("unknown size value at %d in AClass %d", index, acl))
				}
			default:
				panic(fmt.Errorf("unknown Q value at %d in AClass %d", index, acl))
			}
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_SPECIAL
	//	GNU mnemonic: <special>
	//	Go mnemonic:
	//		special
	//	Encoding:
	//		Type = TYPE_SPECIAL
	//		Offset = SpecialOperand enum value
	case AC_SPECIAL:
		switch index {
		case 0:
			return uint32(a.Offset)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_MEMOFF, AC_MEMOFFMULVL
	//	GNU mnemonic: [<reg>.<T>, #<imm>]
	//	Go mnemonic:
	//		imm(reg.T)
	//	Encoding:
	//		Type = TYPE_MEM
	//		Reg = Base register (with arrangement if applicable)
	//		Offset = Immediate offset
	case AC_MEMOFF, AC_MEMOFFMULVL:
		switch index {
		case 0:
			return uint32(a.Reg & 31)
		case 1:
			return uint32((a.Reg >> 5) & 15)
		case 2:
			return uint32(a.Offset)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	//	AClass: AC_MEMEXT
	//	GNU mnemonic: [<reg1>.<T1>, <reg2>.<T2>, <mod> <amount>]
	//	Go mnemonic:
	//		(reg2.T2.mod<<amount)(reg1.T1)
	//	Encoding:
	//		Type = TYPE_MEM
	//		Reg = Index register (with arrangement if applicable)
	//		Index = Base register (with arrangement if applicable)
	//		Scale = Packed mod and amount
	case AC_MEMEXT:
		switch index {
		case 0:
			return uint32(a.Index)
		case 1:
			return uint32((a.Index >> 5) & 15)
		case 2:
			return uint32(a.Reg)
		case 3:
			return uint32((a.Reg >> 5) & 15)
		case 4:
			// mod is either 1 (UXTW), 2 (SXTW), or 4 (LSL)
			mod := uint32((a.Scale >> 9) & 0x7)
			amount := uint32((a.Scale >> 12) & 0x7)
			if mod == 0 && amount > 0 {
				// LSL is implied when no extension is specified but amount > 0
				mod |= 1 << 2
			}
			return mod
		case 5:
			return uint32((a.Scale >> 12) & 0x7)
		default:
			panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl))
		}
	}
	// TODO: handle more AClasses.
	panic(fmt.Errorf("unknown AClass %d", acl))
}

var codeI1Tsz uint32 = 0xffffffff
var codeImm2Tsz uint32 = 0xfffffffe
var codeShift161919212223 uint32 = 0xfffffffd
var codeShift161919212224 uint32 = 0xfffffffc
var codeShift588102224 uint32 = 0xfffffffb
var codeLogicalImmArrEncoding uint32 = 0xfffffffa
var codeImm3Tsize1621 uint32 = 0xfffffff9
var codeShiftI1TszhTszl uint32 = 0xfffffff8
var codeNoOp uint32 = 0xfffffff7

// encodeI1Tsz is the implementation of the following encoding logic:
// Is the immediate index, in the range 0 to one less than the number of elements in 128 bits, encoded in "i1:tsz".
// bit range mappings:
// i1: [20:21)
// tsz: [16:20)
// Note:
//
//	arr is the arrangement.
//	This encoding is aligned to the high bit of the box, according to the spec.
func encodeI1Tsz(v, arr uint32) (uint32, bool) {
	switch arr {
	case ARNG_B:
		if v > 15 {
			return 0, false
		}
		return v << 17, true
	case ARNG_H:
		if v > 7 {
			return 0, false
		}
		return v << 18, true
	case ARNG_S:
		if v > 3 {
			return 0, false
		}
		return v << 19, true
	case ARNG_D:
		if v > 1 {
			return 0, false
		}
		return v << 20, true
	case ARNG_Q:
		if v > 0 {
			return 0, false
		}
		return 0, true
	default:
		return 0, false
	}
}

// encodeImm2Tsz is the implementation of the following encoding logic:
// Is the immediate index, in the range 0 to one less than the number of elements in 512 bits, encoded in "imm2:tsz".
// bit range mappings:
// imm2: [22:24)
// tsz: [16:21)
// Note:
//
//	arr is the arrangement.
//	This encoding is aligned to the high bit of the box, according to the spec.
func encodeImm2Tsz(v, arr uint32) (uint32, bool) {
	switch arr {
	case ARNG_B:
		if v > 63 {
			return 0, false
		}
		v <<= 1
		return (v&31)<<16 | (v>>5)<<22, true
	case ARNG_H:
		if v > 31 {
			return 0, false
		}
		v <<= 2
		return (v&31)<<16 | (v>>5)<<22, true
	case ARNG_S:
		if v > 15 {
			return 0, false
		}
		v <<= 3
		return (v&31)<<16 | (v>>5)<<22, true
	case ARNG_D:
		if v > 7 {
			return 0, false
		}
		v <<= 4
		return (v&31)<<16 | (v>>5)<<22, true
	case ARNG_Q:
		if v > 3 {
			return 0, false
		}
		v <<= 5
		return (v&31)<<16 | (v>>5)<<22, true
	default:
		return 0, false
	}
}

type arrAlignType int

const (
	arrAlignBHSD arrAlignType = iota
	arrAlignHSD
	arrAlignBHS
)

// encodeShiftTriple encodes an shift immediate value in "tszh:tszl:imm3".
// tszh, tszl, imm3 are in ranges, sorted by bit position.
// These shifts are also bounded by arrangement element size.
func encodeShiftTriple(v uint32, r [6]int, prevAddr *obj.Addr, op obj.As) (uint32, bool) {
	// The previous op must be a scalable vector, and we need its arrangement.
	acl := aclass(prevAddr)
	if acl != AC_ARNG {
		return 0, false
	}
	arr := addrComponent(prevAddr, acl, 1) // Get arrangement
	elemBits := uint32(0)
	switch arr {
	case ARNG_B:
		elemBits = 8
	case ARNG_H:
		elemBits = 16
	case ARNG_S:
		elemBits = 32
	case ARNG_D:
		elemBits = 64
	default:
		return 0, false
	}
	if v >= elemBits {
		return 0, false
	}
	var C uint32
	// Unfortunately these information are in the decoding ASL.
	// For these instructions, the esize (see comment in the switch below)
	// is derived from the destination arrangement, however how this function is called is deriving
	// the esize from one of the source.
	// We need to address this discrepancy.
	effectiveEsize := elemBits
	switch op {
	case AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT,
		AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT:
		effectiveEsize = elemBits / 2
	}
	switch op {
	case AZASR, AZLSR, AZURSHR, AZASRD,
		AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT,
		AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZSRSHR, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT,
		AZURSRA, AZUSRA, AZXAR, AZSRI, AZSRSRA, AZSSRA:
		// ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3);
		if v == 0 {
			return 0, false
		}
		C = (2 * effectiveEsize) - v
	default:
		// ASL: let shift : integer = UInt(tsize::imm3) - esize;
		C = effectiveEsize + v
	}
	var chunks [3]uint32
	for i := 0; i < 6; i += 2 {
		chunks[i/2] = C & ((1 << (r[i+1] - r[i])) - 1)
		C >>= (r[i+1] - r[i])
	}
	return uint32((chunks[0] << r[0]) |
		(chunks[1] << r[2]) |
		(chunks[2] << r[4])), true
}

// encodeLogicalImmEncoding is the implementation of the following encoding logic:
// Is the size specifier,
// imm13	<T>
// 0xxxxxx0xxxxx	S
// 0xxxxxx10xxxx	H
// 0xxxxxx110xxx	B
// 0xxxxxx1110xx	B
// 0xxxxxx11110x	B
// 0xxxxxx11111x	RESERVED
// 1xxxxxxxxxxxx	D
// At the meantime:
// Is a 64, 32, 16 or 8-bit bitmask consisting of replicated 2, 4, 8, 16, 32 or 64 bit fields,
// each field containing a rotated run of non-zero bits, encoded in the "imm13" field.
//
// bit range mappings:
// imm13: [5:18)
//
// ARM created a "clever" recipe that can generate useful repeating 8-64 bit bitmasks.
// Instead of storing the literal binary number, the processor reads a 13-bit recipe
// using three fields (bits from high to low):
// N (1 bit), immr (6 bits), and imms (6 bits).
//
// How the recipe works:
// Every logical immediate represents a repeating pattern (like repeating tiles). The processor
// uses the three fields to figure out the size of the tile, how many 1s are in the tile, and
// how far to rotate it.
// The N bit combined with the upper bits of imms determines the width of the repeating block.
// Depending on these bits, the fundamental block can be 2, 4, 8, 16, 32, or 64 bits wide.
// The lower bits of imms dictate exactly how many contiguous 1s exist inside that block.
// The immr value tells the processor how many bits to rotate that block to the right.
// Finally, the resulting block is duplicated to fill a standard 64-bit lane.
func encodeLogicalImmArrEncoding(v uint64, adjacentAddr *obj.Addr) (uint32, bool) {
	acl := aclass(adjacentAddr)
	if acl != AC_ARNG {
		return 0, false
	}
	arr := addrComponent(adjacentAddr, acl, 1)

	// Replicate the given immediate to fill a full 64-bit lane.
	// This ensures our pattern-shrinking logic naturally respects the vector lane bounds.
	var val uint64
	switch arr {
	case ARNG_B: // 8-bit lane
		v8 := uint64(v & 0xFF)
		val = v8 * 0x0101010101010101
	case ARNG_H: // 16-bit lane
		v16 := uint64(v & 0xFFFF)
		val = v16 * 0x0001000100010001
	case ARNG_S: // 32-bit lane
		v32 := uint64(v)
		val = v32 | (v32 << 32)
	case ARNG_D: // 64-bit lane
		val = uint64(v)
	default:
		return 0, false
	}

	// Reject all zeros or all ones (handled by MOV/EOR, invalid for AND/ORR immediates)
	if val == 0 || val == ^uint64(0) {
		return 0, false
	}

	// Find the absolute smallest repeating pattern size (64 down to 2)
	size := uint64(64)
	for size > 2 {
		half := size / 2
		mask := (uint64(1) << half) - 1
		lower := val & mask
		upper := (val >> half) & mask

		// If the top half matches the bottom half, shrink our window
		if lower == upper {
			size = half
			val = lower
		} else {
			break
		}
	}

	// Count the contiguous ones in this minimal pattern
	mask := (uint64(1) << size) - 1
	val &= mask
	ones := bits.OnesCount64(val)

	// Find the right-rotation (rot) needed to align the 1s at the bottom
	expected := (uint64(1) << ones) - 1
	rot := -1
	for r := 0; r < int(size); r++ {
		// Right rotate 'val' by 'r' bits within a 'size'-bit window
		rotated := ((val >> r) | (val << (int(size) - r))) & mask
		if rotated == expected {
			rot = r
			break
		}
	}

	if rot == -1 {
		return 0, false
	}

	// immr is the amount the hardware must right-rotate the base pattern.
	// Since 'rot' is how much we right-rotated the target to find the base,
	// the hardware needs the inverse rotation.
	immr := uint32((int(size) - rot) % int(size))

	// If we couldn't find a rotation that forms a perfect contiguous block of 1s, it's invalid.
	if rot == -1 {
		return 0, false
	}

	// Encode N, immr, and imms
	n := uint32(0)
	if size == 64 {
		n = 1
	}

	// The imms prefix is mathematically generated by (~(size*2 - 1) & 0x3F).
	// We then OR it with the number of ones (minus 1).
	imms := (uint32(^(size*2 - 1)) & 0x3F) | uint32(ones-1)

	// Construct the final 13-bit field: N (1) | immr (6) | imms (6)
	imm13 := (n << 12) | (immr << 6) | imms

	// Shift by 5 to place imm13 into instruction bits [5:17]
	return imm13 << 5, true
}

// encodeImm3Tsize1621 is the implementation of the following encoding logic:
// Is the immediate shift amount, in the range 1 to number of bits per element, encoded in "tsize:imm3".
// bit range mappings:
// imm3: [16:19)
// tsize: [19:21)
//
// srcArr is the <Tb> in the source reglist (ranged).
func encodeImm3Tsize1621(v uint32, srcArr uint32) (uint32, bool) {
	// From ARM ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3);
	// Then tsize::imm3 = (2 * esize) - shift.
	var size uint32
	switch srcArr {
	case ARNG_H:
		// It's the destination size, which is half the source.
		size = 8
	case ARNG_S:
		size = 16
	default:
		return 0, false
	}
	if v < 1 || v > size {
		return 0, false
	}
	return (((2*size - v) & 0x1f) << 16), true
}

// encodeShiftI1TszhTszl is the implementation of the following encoding logic:
// Is the element index, in the range 0 to one less than the number of vector elements in a 128-bit vector register, encoded in "i1:tszh:tszl".
// bit range mappings:
// i1: [23:24)
// tszh: [22:23)
// tszl: [18:21)
//
// arr is the arrangement
func encodeShiftI1TszhTszl(v uint32, arr uint32) (uint32, bool) {
	var shift, max uint32
	switch arr {
	case ARNG_B:
		shift = 1
		max = 16
	case ARNG_H:
		shift = 2
		max = 8
	case ARNG_S:
		shift = 3
		max = 4
	case ARNG_D:
		shift = 4
		max = 2
	default:
		return 0, false
	}
	if v >= max {
		return 0, false
	}
	v <<= shift
	return ((v & 0x7) << 18) | ((v >> 3) << 22), true
}

// tryEncode tries to encode p with i, it returns the encoded binary and ok signal.
func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) {
	bin := i.fixedBits
	// Some elements are encoded in the same component, they need to be equal.
	// For example { <Zn1>.<Tb>-<Zn2>.<Tb> }.
	// The 2 instances of <Tb> must encode to the same value.
	encoded := map[component]uint32{}
	var addrs []*obj.Addr
	for addr := range opsInProg(p) {
		addrs = append(addrs, addr)
	}
	if len(addrs) != len(i.args) {
		return 0, false
	}
	for opIdx, addr := range addrs {
		if opIdx >= len(i.args) {
			return 0, false
		}
		op := i.args[opIdx]
		acl := aclass(addr)
		if acl != op.class {
			return 0, false
		}
		for i, enc := range op.elemEncoders {
			val := addrComponent(addr, acl, i)
			if (p.As == AZFCPY || p.As == AZFDUP) && acl == AC_IMM {
				// These instructions expects ARM's 8-bit float encoding.
				// Reinterpret the uint32 bits back as a float32, then convert to float64 for chipfloat7
				fval := float64(math.Float32frombits(val))
				encode := (&ctxt7{}).chipfloat7(fval)
				if encode == -1 {
					// Handle error or return false to indicate mismatch
					return 0, false
				}
				val = uint32(encode)
			}
			if b, ok := enc.fn(val); ok || b != 0 {
				specialB := uint32(b)
				if !ok {
					specialB = b
					switch b {
					case codeI1Tsz:
						b, ok = encodeI1Tsz(val, addrComponent(addr, acl, i-1))
					case codeImm2Tsz:
						b, ok = encodeImm2Tsz(val, addrComponent(addr, acl, i-1))
					case codeShift161919212223:
						b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 23}, addrs[opIdx+1], p.As)
					case codeShift161919212224:
						b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 24}, addrs[opIdx+1], p.As)
					case codeShift588102224:
						b, ok = encodeShiftTriple(val, [6]int{5, 8, 8, 10, 22, 24}, addrs[opIdx+1], p.As)
					case codeLogicalImmArrEncoding:
						// Now that we know this is an immediate.
						// ARM64 allows imm13 to encode up to 64 bits of immediates.
						// addrComponent is not the right fit here, we need to extract [Offset] fields manually.
						b, ok = encodeLogicalImmArrEncoding(uint64(addr.Offset), addrs[opIdx+1])
					case codeImm3Tsize1621:
						b, ok = encodeImm3Tsize1621(val, addrComponent(addrs[opIdx+1], aclass(addrs[opIdx+1]), 1))
					case codeShiftI1TszhTszl:
						b, ok = encodeShiftI1TszhTszl(val, addrComponent(addr, AC_PREGSEL, 1))
					case codeNoOp:
						b, ok = 0, true
					default:
						panic(fmt.Errorf("unknown encoding function code %d", b))
					}
				}
				if !ok {
					return 0, false
				}
				bin |= b
				if _, ok := encoded[enc.comp]; ok && b != encoded[enc.comp] {
					if specialB == codeNoOp {
						// NoOp encodings don't need checks.
						continue
					}
					return 0, false
				}
				if enc.comp != enc_NIL && specialB != codeNoOp {
					// NoOp encodings don't need bookkeeping.
					encoded[enc.comp] = b
				}
			} else {
				return 0, false
			}
		}
	}
	return bin, true
}
