blob: 71cae158f770fc40bf8b8ceecfbe297ef03f19e4 [file] [log] [blame]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"log"
"regexp"
"slices"
"strconv"
"strings"
"unicode"
"simd/archsimd/_gen/unify"
)
type Operation struct {
rawOperation
// Go is the Go method name of this operation.
//
// It is derived from the raw Go method name by adding optional suffixes.
// Currently, "Masked" is the only suffix.
Go string
// Documentation is the doc string for this API.
//
// It is computed from the raw documentation:
//
// - "NAME" is replaced by the Go method name.
//
// - For masked operation, a sentence about masking is added.
Documentation string
// In is the sequence of parameters to the Go method.
//
// For masked operations, this will have the mask operand appended.
In []Operand
}
// rawOperation is the unifier representation of an [Operation]. It is
// translated into a more parsed form after unifier decoding.
type rawOperation struct {
Go string // Base Go method name
GoArch string // GOARCH for this definition
Asm string // Assembly mnemonic
OperandOrder *string // optional Operand order for better Go declarations
// Optional tag to indicate this operation is paired with special generic->machine ssa lowering rules.
// Should be paired with special templates in gen_simdrules.go
SpecialLower *string
In []Operand // Parameters
InVariant []Operand // Optional parameters
Out []Operand // Results
MemFeatures *string // The memory operand feature this operation supports
MemFeaturesData *string // Additional data associated with MemFeatures
Commutative bool // Commutativity
CPUFeature string // CPUID/Has* feature name
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"
Documentation *string // Documentation will be appended to the stubs comments.
AddDoc *string // Additional doc to be appended.
// ConstMask is a hack to reduce the size of defs the user writes for const-immediate
// If present, it will be copied to [In[0].Const].
ConstImm *string
// NameAndSizeCheck is used to check [BWDQ] maps to (8|16|32|64) elemBits.
NameAndSizeCheck *bool
// If non-nil, all generation in gen_simdTypes.go and gen_intrinsics will be skipped.
NoTypes *string
// If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped.
NoGenericOps *string
// If non-nil, this string will be attached to the machine ssa op name. E.g. "const"
SSAVariant *string
// If true, do not emit method declarations, generic ops, or intrinsics for masked variants
// DO emit the architecture-specific opcodes and optimizations.
HideMaskMethods *bool
}
func (o *Operation) IsMasked() bool {
if len(o.InVariant) == 0 {
return false
}
if len(o.InVariant) == 1 && o.InVariant[0].Class == "mask" {
return true
}
panic(fmt.Errorf("unknown inVariant"))
}
func (o *Operation) SkipMaskedMethod() bool {
if o.HideMaskMethods == nil {
return false
}
if *o.HideMaskMethods && o.IsMasked() {
return true
}
return false
}
var reForName = regexp.MustCompile(`\bNAME\b`)
func (o *Operation) DecodeUnified(v *unify.Value) error {
if err := v.Decode(&o.rawOperation); err != nil {
return err
}
isMasked := o.IsMasked()
// Compute full Go method name.
o.Go = o.rawOperation.Go
if isMasked {
o.Go += "Masked"
}
// Compute doc string.
if o.rawOperation.Documentation != nil {
o.Documentation = *o.rawOperation.Documentation
} else {
o.Documentation = "// UNDOCUMENTED"
}
o.Documentation = reForName.ReplaceAllString(o.Documentation, o.Go)
if isMasked {
o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
// Suppress generic op and method declaration for exported methods, if a mask is present.
if unicode.IsUpper([]rune(o.Go)[0]) {
trueVal := "true"
o.NoGenericOps = &trueVal
o.NoTypes = &trueVal
}
}
if o.rawOperation.AddDoc != nil {
o.Documentation += "\n" + reForName.ReplaceAllString(*o.rawOperation.AddDoc, o.Go)
}
o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
// For down conversions, the high elements are zeroed if the result has more elements.
// TODO: we should encode this logic in the YAML file, instead of hardcoding it here.
if len(o.In) > 0 && len(o.Out) > 0 {
inLanes := o.In[0].Lanes
outLanes := o.Out[0].Lanes
if inLanes != nil && outLanes != nil && *inLanes < *outLanes {
if (strings.Contains(o.Go, "Saturate") || strings.Contains(o.Go, "Truncate")) &&
!strings.Contains(o.Go, "Concat") {
o.Documentation += "\n// Results are packed to low elements in the returned vector, its upper elements are zeroed."
}
}
}
return nil
}
func (o *Operation) VectorWidth() int {
out := o.Out[0]
if out.Class == "vreg" {
return *out.Bits
} else if out.Class == "greg" || out.Class == "mask" {
for i := range o.In {
if o.In[i].Class == "vreg" {
return *o.In[i].Bits
}
}
}
panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o))
}
// Right now simdgen computes the machine op name for most instructions
// as $Name$OutputSize, by this denotation, these instructions are "overloaded".
// for example:
// (Uint16x8) ConvertToInt8
// (Uint16x16) ConvertToInt8
// are both VPMOVWB128.
// To make them distinguishable we need to append the input size to them as well.
// TODO: document them well in the generated code.
var demotingConvertOps = map[string]bool{
"VPMOVQD128": true, "VPMOVSQD128": true, "VPMOVUSQD128": true, "VPMOVQW128": true, "VPMOVSQW128": true,
"VPMOVUSQW128": true, "VPMOVDW128": true, "VPMOVSDW128": true, "VPMOVUSDW128": true, "VPMOVQB128": true,
"VPMOVSQB128": true, "VPMOVUSQB128": true, "VPMOVDB128": true, "VPMOVSDB128": true, "VPMOVUSDB128": true,
"VPMOVWB128": true, "VPMOVSWB128": true, "VPMOVUSWB128": true,
"VPMOVQDMasked128": true, "VPMOVSQDMasked128": true, "VPMOVUSQDMasked128": true, "VPMOVQWMasked128": true, "VPMOVSQWMasked128": true,
"VPMOVUSQWMasked128": true, "VPMOVDWMasked128": true, "VPMOVSDWMasked128": true, "VPMOVUSDWMasked128": true, "VPMOVQBMasked128": true,
"VPMOVSQBMasked128": true, "VPMOVUSQBMasked128": true, "VPMOVDBMasked128": true, "VPMOVSDBMasked128": true, "VPMOVUSDBMasked128": true,
"VPMOVWBMasked128": true, "VPMOVSWBMasked128": true, "VPMOVUSWBMasked128": true,
}
func machineOpName(maskType maskShape, gOp Operation) string {
asm := gOp.Asm
if maskType == OneMask {
asm += "Masked"
}
asm = fmt.Sprintf("%s%d", asm, gOp.VectorWidth())
if gOp.SSAVariant != nil {
asm += *gOp.SSAVariant
}
if demotingConvertOps[asm] {
// Need to append the size of the source as well.
// TODO: should be "%sto%d".
asm = fmt.Sprintf("%s_%d", asm, *gOp.In[0].Bits)
}
return asm
}
func compareStringPointers(x, y *string) int {
if x != nil && y != nil {
return compareNatural(*x, *y)
}
if x == nil && y == nil {
return 0
}
if x == nil {
return -1
}
return 1
}
func compareIntPointers(x, y *int) int {
if x != nil && y != nil {
return *x - *y
}
if x == nil && y == nil {
return 0
}
if x == nil {
return -1
}
return 1
}
func compareOperations(x, y Operation) int {
if c := compareNatural(x.Go, y.Go); c != 0 {
return c
}
xIn, yIn := x.In, y.In
if len(xIn) > len(yIn) && xIn[len(xIn)-1].Class == "mask" {
xIn = xIn[:len(xIn)-1]
} else if len(xIn) < len(yIn) && yIn[len(yIn)-1].Class == "mask" {
yIn = yIn[:len(yIn)-1]
}
if len(xIn) < len(yIn) {
return -1
}
if len(xIn) > len(yIn) {
return 1
}
if len(x.Out) < len(y.Out) {
return -1
}
if len(x.Out) > len(y.Out) {
return 1
}
for i := range xIn {
ox, oy := &xIn[i], &yIn[i]
if c := compareOperands(ox, oy); c != 0 {
return c
}
}
return 0
}
func compareOperands(x, y *Operand) int {
if c := compareNatural(x.Class, y.Class); c != 0 {
return c
}
if x.Class == "immediate" {
return compareStringPointers(x.ImmOffset, y.ImmOffset)
} else {
if c := compareStringPointers(x.Base, y.Base); c != 0 {
return c
}
if c := compareIntPointers(x.ElemBits, y.ElemBits); c != 0 {
return c
}
if c := compareIntPointers(x.Bits, y.Bits); c != 0 {
return c
}
return 0
}
}
type Operand struct {
Class string // One of "mask", "immediate", "vreg", "greg", and "mem"
Go *string // Go type of this operand
AsmPos int // Position of this operand in the assembly instruction
Base *string // Base Go type ("int", "uint", "float")
ElemBits *int // Element bit width
Bits *int // Total vector bit width
Const *string // Optional constant value for immediates.
// Optional immediate arg offsets. If this field is non-nil,
// This operand will be an immediate operand:
// The compiler will right-shift the user-passed value by ImmOffset and set it as the AuxInt
// field of the operation.
ImmOffset *string
Name *string // optional name in the Go intrinsic declaration
Lanes *int // *Lanes equals Bits/ElemBits except for scalars, when *Lanes == 1
// TreatLikeAScalarOfSize means only the lower $TreatLikeAScalarOfSize bits of the vector
// is used, so at the API level we can make it just a scalar value of this size; Then we
// can overwrite it to a vector of the right size during intrinsics stage.
TreatLikeAScalarOfSize *int
// If non-nil, it means the [Class] field is overwritten here, right now this is used to
// overwrite the results of AVX2 compares to masks.
OverwriteClass *string
// If non-nil, it means the [Base] field is overwritten here. This field exist solely
// because Intel's XED data is inconsistent. e.g. VANDNP[SD] marks its operand int.
OverwriteBase *string
// If non-nil, it means the [ElementBits] field is overwritten. This field exist solely
// because Intel's XED data is inconsistent. e.g. AVX512 VPMADDUBSW marks its operand
// elemBits 16, which should be 8.
OverwriteElementBits *int
// FixedReg is the name of the fixed registers
FixedReg *string
}
// isDigit returns true if the byte is an ASCII digit.
func isDigit(b byte) bool {
return b >= '0' && b <= '9'
}
// compareNatural performs a "natural sort" comparison of two strings.
// It compares non-digit sections lexicographically and digit sections
// numerically. In the case of string-unequal "equal" strings like
// "a01b" and "a1b", strings.Compare breaks the tie.
//
// It returns:
//
// -1 if s1 < s2
// 0 if s1 == s2
// +1 if s1 > s2
func compareNatural(s1, s2 string) int {
i, j := 0, 0
len1, len2 := len(s1), len(s2)
for i < len1 && j < len2 {
// Find a non-digit segment or a number segment in both strings.
if isDigit(s1[i]) && isDigit(s2[j]) {
// Number segment comparison.
numStart1 := i
for i < len1 && isDigit(s1[i]) {
i++
}
num1, _ := strconv.Atoi(s1[numStart1:i])
numStart2 := j
for j < len2 && isDigit(s2[j]) {
j++
}
num2, _ := strconv.Atoi(s2[numStart2:j])
if num1 < num2 {
return -1
}
if num1 > num2 {
return 1
}
// If numbers are equal, continue to the next segment.
} else {
// Non-digit comparison.
if s1[i] < s2[j] {
return -1
}
if s1[i] > s2[j] {
return 1
}
i++
j++
}
}
// deal with a01b vs a1b; there needs to be an order.
return strings.Compare(s1, s2)
}
const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
`
func writeGoDefs(path string, cl unify.Closure) error {
// TODO: Merge operations with the same signature but multiple
// implementations (e.g., SSE vs AVX)
var ops []Operation
for def := range cl.All() {
var op Operation
if !def.Exact() {
continue
}
if err := def.Decode(&op); err != nil {
log.Println(err.Error())
log.Println(def)
continue
}
// TODO: verify that this is safe.
op.sortOperand()
op.adjustAsm()
ops = append(ops, op)
}
slices.SortFunc(ops, compareOperations)
// The parsed XED data might contain duplicates, like
// 512 bits VPADDP.
deduped := dedup(ops)
slices.SortFunc(deduped, compareOperations)
if *Verbose {
log.Printf("dedup len: %d\n", len(ops))
}
var err error
if err = overwrite(deduped); err != nil {
return err
}
if *Verbose {
log.Printf("dedup len: %d\n", len(deduped))
}
if !*FlagNoDedup {
// TODO: This can hide mistakes in the API definitions, especially when
// multiple patterns result in the same API unintentionally. Make it stricter.
if deduped, err = dedupGodef(deduped); err != nil {
return err
}
}
if *Verbose {
log.Printf("dedup len: %d\n", len(deduped))
}
if !*FlagNoConstImmPorting {
if err = copyConstImm(deduped); err != nil {
return err
}
}
if *Verbose {
log.Printf("dedup len: %d\n", len(deduped))
}
reportXEDInconsistency(deduped)
typeMap := parseSIMDTypes(deduped)
formatWriteAndClose(writeSIMDTypes(typeMap), path, "src/"+simdPackage+"/types_amd64.go")
formatWriteAndClose(writeSIMDFeatures(deduped), path, "src/"+simdPackage+"/cpu.go")
f, fI := writeSIMDStubs(deduped, typeMap)
formatWriteAndClose(f, path, "src/"+simdPackage+"/ops_amd64.go")
formatWriteAndClose(fI, path, "src/"+simdPackage+"/ops_internal_amd64.go")
formatWriteAndClose(writeSIMDIntrinsics(deduped, typeMap), path, "src/cmd/compile/internal/ssagen/simdintrinsics.go")
formatWriteAndClose(writeSIMDGenericOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdgenericOps.go")
formatWriteAndClose(writeSIMDMachineOps(deduped), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go")
formatWriteAndClose(writeSIMDSSA(deduped), path, "src/cmd/compile/internal/amd64/simdssa.go")
writeAndClose(writeSIMDRules(deduped).Bytes(), path, "src/cmd/compile/internal/ssa/_gen/simdAMD64.rules")
return nil
}