// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import ( "cmp" "fmt" "log" "maps" "reflect" "regexp" "slices" "strconv" "strings" "simd/_gen/unify" "golang.org/x/arch/x86/xeddata" "gopkg.in/yaml.v3" ) const ( NOT_REG_CLASS = iota // not a register VREG_CLASS // classify as a vector register; see GREG_CLASS // classify as a general register ) // instVariant is a bitmap indicating a variant of an instruction that has // optional parameters. type instVariant uint8 const ( instVariantNone instVariant = 0 // instVariantMasked indicates that this is the masked variant of an // optionally-masked instruction. instVariantMasked instVariant = 1 << iota ) var operandRemarks int // TODO: Doc. Returns Values with Def domains. func loadXED(xedPath string) []*unify.Value { // TODO: Obviously a bunch more to do here. db, err := xeddata.NewDatabase(xedPath) if err != nil { log.Fatalf("open database: %v", err) } var defs []*unify.Value type opData struct { inst *xeddata.Inst ops []operand mem string } // Maps from opcode to opdata(s). memOps := make(map[string][]opData, 0) otherOps := make(map[string][]opData, 0) appendDefs := func(inst *xeddata.Inst, ops []operand, addFields map[string]string) { applyQuirks(inst, ops) defsPos := len(defs) defs = append(defs, instToUVal(inst, ops, addFields)...) if *flagDebugXED { for i := defsPos; i < len(defs); i++ { y, _ := yaml.Marshal(defs[i]) fmt.Printf("==>\n%s\n", y) } } } err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) { inst.Pattern = xeddata.ExpandStates(db, inst.Pattern) switch { case inst.RealOpcode == "N": return // Skip unstable instructions case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA")): // We're only interested in AVX and SHA instructions. return } if *flagDebugXED { fmt.Printf("%s:\n%+v\n", inst.Pos, inst) } ops, err := decodeOperands(db, strings.Fields(inst.Operands)) if err != nil { operandRemarks++ if *Verbose { log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err) } return } var data map[string][]opData mem := checkMem(ops) if mem == "vbcst" { // A pure vreg variant might exist, wait for later to see if we can // merge them data = memOps } else { data = otherOps } opcode := inst.Opcode() if _, ok := data[opcode]; !ok { s := make([]opData, 1) s[0] = opData{inst, ops, mem} data[opcode] = s } else { data[opcode] = append(data[opcode], opData{inst, ops, mem}) } }) for _, s := range otherOps { for _, o := range s { addFields := map[string]string{} if o.mem == "noMem" { opcode := o.inst.Opcode() // Checking if there is a vbcst variant of this operation exist // First check the opcode // Keep this logic in sync with [decodeOperands] if ms, ok := memOps[opcode]; ok { feat1, ok1 := decodeCPUFeature(o.inst) // Then check if there exist such an operation that for all vreg // shapes they are the same at the same index var feat1Match, feat2Match string matchIdx := -1 var featMismatchCnt int outer: for i, m := range ms { // Their CPU feature should match first var featMismatch bool feat2, ok2 := decodeCPUFeature(m.inst) if !ok1 || !ok2 { continue } if feat1 != feat2 { featMismatch = true featMismatchCnt++ } if len(o.ops) == len(m.ops) { for j := range o.ops { if reflect.TypeOf(o.ops[j]) == reflect.TypeOf(m.ops[j]) { v1, ok3 := o.ops[j].(operandVReg) v2, _ := m.ops[j].(operandVReg) if !ok3 { continue } if v1.vecShape != v2.vecShape { // A mismatch, skip this memOp continue outer } } else { _, ok3 := o.ops[j].(operandVReg) _, ok4 := m.ops[j].(operandMem) // The only difference must be the vreg and mem, no other cases. if !ok3 || !ok4 { // A mismatch, skip this memOp continue outer } } } // Found a match, break early matchIdx = i feat1Match = feat1 feat2Match = feat2 if featMismatchCnt > 1 { panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish") } if !featMismatch { // Mismatch feat is ok but should prioritize matching cases. break } } } // Remove the match from memOps, it's now merged to this pure vreg operation if matchIdx != -1 { memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...) // Merge is done by adding a new field // Right now we only have vbcst addFields["memFeatures"] = "vbcst" if feat1Match != feat2Match { addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match) } } } } appendDefs(o.inst, o.ops, addFields) } } for _, ms := range memOps { for _, m := range ms { if *Verbose { log.Printf("mem op not merged: %s, %v\n", m.inst.Opcode(), m) } appendDefs(m.inst, m.ops, nil) } } if err != nil { log.Fatalf("walk insts: %v", err) } if len(unknownFeatures) > 0 { if !*Verbose { nInst := 0 for _, insts := range unknownFeatures { nInst += len(insts) } log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst) } else { keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int { return cmp.Or(cmp.Compare(a.Extension, b.Extension), cmp.Compare(a.ISASet, b.ISASet)) }) for _, key := range keys { if key.ISASet == "" || key.ISASet == key.Extension { log.Printf("unhandled Extension %s", key.Extension) } else { log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet) } log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key]))) } } } return defs } var ( maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`) maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`) ) func applyQuirks(inst *xeddata.Inst, ops []operand) { opc := inst.Opcode() switch { case maskRequiredRe.MatchString(opc): // The mask on these instructions is marked optional, but the // instruction is pointless without the mask. for i, op := range ops { if op, ok := op.(operandMask); ok { op.optional = false ops[i] = op } } case maskOptionalRe.MatchString(opc): // Conversely, these masks should be marked optional and aren't. for i, op := range ops { if op, ok := op.(operandMask); ok && op.action.r { op.optional = true ops[i] = op } } } } type operandCommon struct { action operandAction } // operandAction defines whether this operand is read and/or written. // // TODO: Should this live in [xeddata.Operand]? type operandAction struct { r bool // Read w bool // Written cr bool // Read is conditional (implies r==true) cw bool // Write is conditional (implies w==true) } type operandMem struct { operandCommon vecShape elemBaseType scalarBaseType // The following fields are not flushed to the final output // Supports full-vector broadcasting; implies the operand having a "vv"(vector vector) type specified in width and // the instruction is with attribute TXT=BCASTSTR. vbcst bool unknown bool // unknown kind } type vecShape struct { elemBits int // Element size in bits bits int // Register width in bits (total vector bits) fixedName string // the fixed register name } type operandVReg struct { // Vector register operandCommon vecShape elemBaseType scalarBaseType } type operandGReg struct { // Vector register operandCommon vecShape elemBaseType scalarBaseType } // operandMask is a vector mask. // // Regardless of the actual mask representation, the [vecShape] of this operand // corresponds to the "bit for bit" type of mask. That is, elemBits gives the // element width covered by each mask element, and bits/elemBits gives the total // number of mask elements. (bits gives the total number of bits as if this were // a bit-for-bit mask, which may be meaningless on its own.) type operandMask struct { operandCommon vecShape // Bits in the mask is w/bits. allMasks bool // If set, size cannot be inferred because all operands are masks. // Mask can be omitted, in which case it defaults to K0/"no mask" optional bool } type operandImm struct { operandCommon bits int // Immediate size in bits } type operand interface { common() operandCommon addToDef(b *unify.DefBuilder) } func strVal(s any) *unify.Value { return unify.NewValue(unify.NewStringExact(fmt.Sprint(s))) } func (o operandCommon) common() operandCommon { return o } func (o operandMem) addToDef(b *unify.DefBuilder) { b.Add("class", strVal("memory")) if o.unknown { return } baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) if err != nil { panic("parsing baseRe: " + err.Error()) } b.Add("base", unify.NewValue(baseDomain)) b.Add("bits", strVal(o.bits)) if o.elemBits != o.bits { b.Add("elemBits", strVal(o.elemBits)) } } func (o operandVReg) addToDef(b *unify.DefBuilder) { baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) if err != nil { panic("parsing baseRe: " + err.Error()) } b.Add("class", strVal("vreg")) b.Add("bits", strVal(o.bits)) b.Add("base", unify.NewValue(baseDomain)) // If elemBits == bits, then the vector can be ANY shape. This happens with, // for example, logical ops. if o.elemBits != o.bits { b.Add("elemBits", strVal(o.elemBits)) } if o.fixedName != "" { b.Add("fixedReg", strVal(o.fixedName)) } } func (o operandGReg) addToDef(b *unify.DefBuilder) { baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex()) if err != nil { panic("parsing baseRe: " + err.Error()) } b.Add("class", strVal("greg")) b.Add("bits", strVal(o.bits)) b.Add("base", unify.NewValue(baseDomain)) if o.elemBits != o.bits { b.Add("elemBits", strVal(o.elemBits)) } if o.fixedName != "" { b.Add("fixedReg", strVal(o.fixedName)) } } func (o operandMask) addToDef(b *unify.DefBuilder) { b.Add("class", strVal("mask")) if o.allMasks { // If all operands are masks, omit sizes and let unification determine mask sizes. return } b.Add("elemBits", strVal(o.elemBits)) b.Add("bits", strVal(o.bits)) if o.fixedName != "" { b.Add("fixedReg", strVal(o.fixedName)) } } func (o operandImm) addToDef(b *unify.DefBuilder) { b.Add("class", strVal("immediate")) b.Add("bits", strVal(o.bits)) } var actionEncoding = map[string]operandAction{ "r": {r: true}, "cr": {r: true, cr: true}, "w": {w: true}, "cw": {w: true, cw: true}, "rw": {r: true, w: true}, "crw": {r: true, w: true, cr: true}, "rcw": {r: true, w: true, cw: true}, } func decodeOperand(db *xeddata.Database, operand string) (operand, error) { op, err := xeddata.NewOperand(db, operand) if err != nil { log.Fatalf("parsing operand %q: %v", operand, err) } if *flagDebugXED { fmt.Printf(" %+v\n", op) } if strings.HasPrefix(op.Name, "EMX_BROADCAST") { // This refers to a set of macros defined in all-state.txt that set a // BCAST operand to various fixed values. But the BCAST operand is // itself suppressed and "internal", so I think we can just ignore this // operand. return nil, nil } // TODO: See xed_decoded_inst_operand_action. This might need to be more // complicated. action, ok := actionEncoding[op.Action] if !ok { return nil, fmt.Errorf("unknown action %q", op.Action) } common := operandCommon{action: action} lhs := op.NameLHS() if strings.HasPrefix(lhs, "MEM") { // looks like XED data has an inconsistency on VPADDD, marking attribute // VPBROADCASTD instead of the canonical BCASTSTR. if op.Width == "vv" && (op.Attributes["TXT=BCASTSTR"] || op.Attributes["TXT=VPBROADCASTD"]) { baseType, elemBits, ok := decodeType(op) if !ok { return nil, fmt.Errorf("failed to decode memory width %q", operand) } // This operand has two possible width([bits]): // 1. the same as the other operands // 2. the element width as the other operands (broaccasting) // left it default to 2, later we will set a new field in the operation // to indicate this dual-width property. shape := vecShape{elemBits: elemBits, bits: elemBits} return operandMem{ operandCommon: common, vecShape: shape, elemBaseType: baseType, vbcst: true, unknown: false, }, nil } // TODO: parse op.Width better to handle all cases // Right now this will at least miss VPBROADCAST. return operandMem{ operandCommon: common, unknown: true, }, nil } else if strings.HasPrefix(lhs, "REG") { if op.Width == "mskw" { // The mask operand doesn't specify a width. We have to infer it. // // XED uses the marker ZEROSTR to indicate that a mask operand is // optional and, if omitted, implies K0, aka "no mask". return operandMask{ operandCommon: common, optional: op.Attributes["TXT=ZEROSTR"], }, nil } else { class, regBits, fixedReg := decodeReg(op) if class == NOT_REG_CLASS { return nil, fmt.Errorf("failed to decode register %q", operand) } baseType, elemBits, ok := decodeType(op) if !ok { return nil, fmt.Errorf("failed to decode register width %q", operand) } shape := vecShape{elemBits: elemBits, bits: regBits, fixedName: fixedReg} if class == VREG_CLASS { return operandVReg{ operandCommon: common, vecShape: shape, elemBaseType: baseType, }, nil } // general register m := min(shape.bits, shape.elemBits) shape.bits, shape.elemBits = m, m return operandGReg{ operandCommon: common, vecShape: shape, elemBaseType: baseType, }, nil } } else if strings.HasPrefix(lhs, "IMM") { _, bits, ok := decodeType(op) if !ok { return nil, fmt.Errorf("failed to decode register width %q", operand) } return operandImm{ operandCommon: common, bits: bits, }, nil } // TODO: BASE and SEG return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand) } func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) { // Decode the XED operand descriptions. for _, o := range operands { op, err := decodeOperand(db, o) if err != nil { return nil, err } if op != nil { ops = append(ops, op) } } // XED doesn't encode the size of mask operands. If there are mask operands, // try to infer their sizes from other operands. if err := inferMaskSizes(ops); err != nil { return nil, fmt.Errorf("%w in operands %+v", err, operands) } return ops, nil } func inferMaskSizes(ops []operand) error { // This is a heuristic and it falls apart in some cases: // // - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate // mask size. // // - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have // mixed input sizes and the XED doesn't indicate which operands the mask // applies to. // // - VPDP* and VP4DP* have really complex mixed operand patterns. // // I think for these we may just have to hand-write a table of which // operands each mask applies to. inferMask := func(r, w bool) error { var masks []int var rSizes, wSizes, sizes []vecShape allMasks := true hasWMask := false for i, op := range ops { action := op.common().action if _, ok := op.(operandMask); ok { if action.r && action.w { return fmt.Errorf("unexpected rw mask") } if action.r == r || action.w == w { masks = append(masks, i) } if action.w { hasWMask = true } } else { allMasks = false if reg, ok := op.(operandVReg); ok { if action.r { rSizes = append(rSizes, reg.vecShape) } if action.w { wSizes = append(wSizes, reg.vecShape) } } } } if len(masks) == 0 { return nil } if r { sizes = rSizes if len(sizes) == 0 { sizes = wSizes } } if w { sizes = wSizes if len(sizes) == 0 { sizes = rSizes } } if len(sizes) == 0 { // If all operands are masks, leave the mask inferrence to the users. if allMasks { for _, i := range masks { m := ops[i].(operandMask) m.allMasks = true ops[i] = m } return nil } return fmt.Errorf("cannot infer mask size: no register operands") } shape, ok := singular(sizes) if !ok { if !hasWMask && len(wSizes) == 1 && len(masks) == 1 { // This pattern looks like predicate mask, so its shape should align with the // output. TODO: verify this is a safe assumption. shape = wSizes[0] } else { return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes) } } for _, i := range masks { m := ops[i].(operandMask) m.vecShape = shape ops[i] = m } return nil } if err := inferMask(true, false); err != nil { return err } if err := inferMask(false, true); err != nil { return err } return nil } // addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def. // // Optional mask input operands are added to the inVariant field if // variant&instVariantMasked, and omitted otherwise. func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) { var inVals, inVar, outVals []*unify.Value asmPos := 0 for _, op := range ops { var db unify.DefBuilder op.addToDef(&db) db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos)))) action := op.common().action asmCount := 1 // # of assembly operands; 0 or 1 if action.r { inVal := unify.NewValue(db.Build()) // If this is an optional mask, put it in the input variant tuple. if mask, ok := op.(operandMask); ok && mask.optional { if variant&instVariantMasked != 0 { inVar = append(inVar, inVal) } else { // This operand doesn't appear in the assembly at all. asmCount = 0 } } else { // Just a regular input operand. inVals = append(inVals, inVal) } } if action.w { outVal := unify.NewValue(db.Build()) outVals = append(outVals, outVal) } asmPos += asmCount } instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...))) instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...))) instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...))) memFeatures := checkMem(ops) if memFeatures != "noMem" { instDB.Add("memFeatures", unify.NewValue(unify.NewStringExact(memFeatures))) } } // checkMem checks the shapes of memory operand in the operation and returns the shape. // Keep this function in sync with [decodeOperand]. func checkMem(ops []operand) string { memState := "noMem" var mem *operandMem memCnt := 0 for _, op := range ops { if m, ok := op.(operandMem); ok { mem = &m memCnt++ } } if mem != nil { if mem.unknown { memState = "unknown" } else if memCnt > 1 { memState = "tooManyMem" } else { // We only have vbcst case as of now. // This shape has an indication that [bits] fields has two possible value: // 1. The element broadcast width, which is its peer vreg operand's [elemBits] (default val in the parsed XED data) // 2. The full vector width, which is its peer vreg operand's [bits] (godefs should be aware of this) memState = "vbcst" } } return memState } func instToUVal(inst *xeddata.Inst, ops []operand, addFields map[string]string) []*unify.Value { feature, ok := decodeCPUFeature(inst) if !ok { return nil } var vals []*unify.Value vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone, addFields)) if hasOptionalMask(ops) { vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked, addFields)) } return vals } func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant, addFields map[string]string) *unify.Value { var db unify.DefBuilder db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64"))) db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode()))) addOperandsToDef(ops, &db, variant) db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature))) for k, v := range addFields { db.Add(k, unify.NewValue(unify.NewStringExact(v))) } if strings.Contains(inst.Pattern, "ZEROING=0") { // This is an EVEX instruction, but the ".Z" (zero-merging) // instruction flag is NOT valid. EVEX.z must be zero. // // This can mean a few things: // // - The output of an instruction is a mask, so merging modes don't // make any sense. E.g., VCMPPS. // // - There are no masks involved anywhere. (Maybe MASK=0 is also set // in this case?) E.g., VINSERTPS. // // - The operation inherently performs merging. E.g., VCOMPRESSPS // with a mem operand. // // There may be other reasons. db.Add("zeroing", unify.NewValue(unify.NewStringExact("false"))) } pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line} return unify.NewValuePos(db.Build(), pos) } // decodeCPUFeature returns the CPU feature name required by inst. These match // the names of the "Has*" feature checks in the simd package. func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { key := cpuFeatureKey{ Extension: inst.Extension, ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""), } feat, ok := cpuFeatureMap[key] if !ok { imap := unknownFeatures[key] if imap == nil { imap = make(map[string]struct{}) unknownFeatures[key] = imap } imap[inst.Opcode()] = struct{}{} return "", false } if feat == "ignore" { return "", false } return feat, true } var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$") type cpuFeatureKey struct { Extension, ISASet string } // cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name // that can be used in the SIMD API. var cpuFeatureMap = map[cpuFeatureKey]string{ {"SHA", "SHA"}: "SHA", {"AVX", ""}: "AVX", {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", {"AVX2", ""}: "AVX2", {"AVXAES", ""}: "AVX, AES", // AVX-512 foundational features. We combine all of these into one "AVX512" feature. {"AVX512EVEX", "AVX512F"}: "AVX512", {"AVX512EVEX", "AVX512CD"}: "AVX512", {"AVX512EVEX", "AVX512BW"}: "AVX512", {"AVX512EVEX", "AVX512DQ"}: "AVX512", // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by // the vector length suffix. // AVX-512 extension features {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG", {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI", {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2", {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI", {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI", {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ", {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES", // AVX 10.2 (not yet supported) {"AVX512EVEX", "AVX10_2_RC"}: "ignore", } var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{} // hasOptionalMask returns whether there is an optional mask operand in ops. func hasOptionalMask(ops []operand) bool { for _, op := range ops { if op, ok := op.(operandMask); ok && op.optional { return true } } return false } func singular[T comparable](xs []T) (T, bool) { if len(xs) == 0 { return *new(T), false } for _, x := range xs[1:] { if x != xs[0] { return *new(T), false } } return xs[0], true } type fixedReg struct { class int name string width int } var fixedRegMap = map[string]fixedReg{ "XED_REG_XMM0": {VREG_CLASS, "x0", 128}, } // decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS, VREG_CLASS_FIXED, // GREG_CLASS_FIXED), width in bits and reg name(if fixed). // If the operand cannot be decided as a register, then the clas is NOT_REG_CLASS. func decodeReg(op *xeddata.Operand) (class, width int, name string) { // op.Width tells us the total width, e.g.,: // // dq => 128 bits (XMM) // qq => 256 bits (YMM) // mskw => K // z[iuf?](8|16|32|...) => 512 bits (ZMM) // // But the encoding is really weird and it's not clear if these *always* // mean XMM/YMM/ZMM or if other irregular things can use these large widths. // Hence, we dig into the register sets themselves. if !strings.HasPrefix(op.NameLHS(), "REG") { return NOT_REG_CLASS, 0, "" } // TODO: We shouldn't be relying on the macro naming conventions. We should // use all-dec-patterns.txt, but xeddata doesn't support that table right now. rhs := op.NameRHS() if !strings.HasSuffix(rhs, "()") { if fixedReg, ok := fixedRegMap[rhs]; ok { return fixedReg.class, fixedReg.width, fixedReg.name } return NOT_REG_CLASS, 0, "" } switch { case strings.HasPrefix(rhs, "XMM_"): return VREG_CLASS, 128, "" case strings.HasPrefix(rhs, "YMM_"): return VREG_CLASS, 256, "" case strings.HasPrefix(rhs, "ZMM_"): return VREG_CLASS, 512, "" case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"): return GREG_CLASS, 64, "" case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"): return GREG_CLASS, 32, "" } return NOT_REG_CLASS, 0, "" } var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`) // scalarBaseType describes the base type of a scalar element. This is a Go // type, but without the bit width suffix (with the exception of // scalarBaseIntOrUint). type scalarBaseType int const ( scalarBaseInt scalarBaseType = iota scalarBaseUint scalarBaseIntOrUint // Signed or unsigned is unspecified scalarBaseFloat scalarBaseComplex scalarBaseBFloat scalarBaseHFloat ) func (s scalarBaseType) regex() string { switch s { case scalarBaseInt: return "int" case scalarBaseUint: return "uint" case scalarBaseIntOrUint: return "int|uint" case scalarBaseFloat: return "float" case scalarBaseComplex: return "complex" case scalarBaseBFloat: return "BFloat" case scalarBaseHFloat: return "HFloat" } panic(fmt.Sprintf("unknown scalar base type %d", s)) } func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) { // The xtype tells you the element type. i8, i16, i32, i64, f32, etc. // // TODO: Things like AVX2 VPAND have an xtype of u256 because they're // element-width agnostic. Do I map that to all widths, or just omit the // element width and let unification flesh it out? There's no u512 // (presumably those are all masked, so elem width matters). These are all // Category: LOGICAL, so maybe we could use that info? // Handle some weird ones. switch op.Xtype { // 8-bit float formats as defined by Open Compute Project "OCP 8-bit // Floating Point Specification (OFP8)". case "bf8": // E5M2 float return scalarBaseBFloat, 8, true case "hf8": // E4M3 float return scalarBaseHFloat, 8, true case "bf16": // bfloat16 float return scalarBaseBFloat, 16, true case "2f16": // Complex consisting of 2 float16s. Doesn't exist in Go, but we can say // what it would be. return scalarBaseComplex, 32, true case "2i8", "2I8": // These just use the lower INT8 in each 16 bit field. // As far as I can tell, "2I8" is a typo. return scalarBaseInt, 8, true case "2u16", "2U16": // some VPDP* has it // TODO: does "z" means it has zeroing? return scalarBaseUint, 16, true case "2i16", "2I16": // some VPDP* has it return scalarBaseInt, 16, true case "4u8", "4U8": // some VPDP* has it return scalarBaseUint, 8, true case "4i8", "4I8": // some VPDP* has it return scalarBaseInt, 8, true } // The rest follow a simple pattern. m := xtypeRe.FindStringSubmatch(op.Xtype) if m == nil { // TODO: Report unrecognized xtype return 0, 0, false } bits, _ = strconv.Atoi(m[2]) switch m[1] { case "i", "u": // XED is rather inconsistent about what's signed, unsigned, or doesn't // matter, so merge them together and let the Go definitions narrow as // appropriate. Maybe there's a better way to do this. return scalarBaseIntOrUint, bits, true case "f": return scalarBaseFloat, bits, true default: panic("unreachable") } }