diff options
Diffstat (limited to 'src/cmd/internal/obj/arm64/inst.go')
| -rw-r--r-- | src/cmd/internal/obj/arm64/inst.go | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/arm64/inst.go b/src/cmd/internal/obj/arm64/inst.go index dd9c802aab..131d0211af 100644 --- a/src/cmd/internal/obj/arm64/inst.go +++ b/src/cmd/internal/obj/arm64/inst.go @@ -8,6 +8,8 @@ import ( "cmd/internal/obj" "fmt" "iter" + "math" + "math/bits" ) // instEncoder represents an instruction encoder. @@ -126,6 +128,9 @@ func aclass(a *obj.Addr) AClass { return AC_SPZGREG } } + if a.Type == obj.TYPE_CONST || a.Type == obj.TYPE_FCONST { + return AC_IMM + } panic("unknown AClass") } @@ -197,6 +202,28 @@ func addrComponent(a *obj.Addr, acl AClass, index int) uint32 { default: panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl)) } + // AClass: AC_IMM + // GNU mnemonic: <imm>, <shift> + // Go mnemonic: + // $imm<<shift + // Encoding: + // Type = TYPE_CONST or TYPE_FCONST + // Offset = imm (shift already applied) + case AC_IMM: + switch index { + case 0: + if a.Type == obj.TYPE_FCONST { + switch v := a.Val.(type) { + case float64: + return math.Float32bits(float32(v)) + default: + panic(fmt.Errorf("unknown float immediate value %v", a.Val)) + } + } + return uint32(a.Offset) + default: + panic(fmt.Errorf("unknown elm index at %d in AClass %d", index, acl)) + } } // TODO: handle more AClasses. panic(fmt.Errorf("unknown AClass %d", acl)) @@ -204,6 +231,11 @@ func addrComponent(a *obj.Addr, acl AClass, index int) uint32 { var codeI1Tsz uint32 = 0xffffffff var codeImm2Tsz uint32 = 0xfffffffe +var codeShift161919212223 uint32 = 0xfffffffd +var codeShift161919212224 uint32 = 0xfffffffc +var codeShift588102224 uint32 = 0xfffffffb +var codeLogicalImmArrEncoding uint32 = 0xfffffffa +var codeNoOp uint32 = 0xfffffff9 // encodeI1Tsz is the implementation of the following encoding logic: // Is the immediate index, in the range 0 to one less than the number of elements in 128 bits, encoded in "i1:tsz". @@ -292,6 +324,203 @@ func encodeImm2Tsz(v, arr uint32) (uint32, bool) { } } +type arrAlignType int + +const ( + arrAlignBHSD arrAlignType = iota + arrAlignHSD + arrAlignBHS +) + +// encodeShiftTriple encodes an shift immediate value in "tszh:tszl:imm3". +// tszh, tszl, imm3 are in ranges, sorted by bit position. +// These shifts are also bounded by arrangement element size. +func encodeShiftTriple(v uint32, r [6]int, prevAddr *obj.Addr, op obj.As) (uint32, bool) { + // The previous op must be a scalable vector, and we need its arrangement. + acl := aclass(prevAddr) + if acl != AC_ARNG { + return 0, false + } + arr := addrComponent(prevAddr, acl, 1) // Get arrangement + elemBits := uint32(0) + switch arr { + case ARNG_B: + elemBits = 8 + case ARNG_H: + elemBits = 16 + case ARNG_S: + elemBits = 32 + case ARNG_D: + elemBits = 64 + default: + return 0, false + } + if v >= elemBits { + return 0, false + } + var C uint32 + // Unfortunately these information are in the decoding ASL. + // For these instructions, the esize (see comment in the switch below) + // is derived from the destination arrangement, however how this function is called is deriving + // the esize from one of the source. + // We need to address this discrepancy. + effectiveEsize := elemBits + switch op { + case AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT, + AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT: + effectiveEsize = elemBits / 2 + } + switch op { + case AZASR, AZLSR, AZURSHR, AZASRD, + AZRSHRNB, AZRSHRNT, AZSHRNB, AZSHRNT, AZSQRSHRNB, AZSQRSHRNT, AZSQRSHRUNB, AZSQRSHRUNT, + AZSQSHRNB, AZSQSHRNT, AZSQSHRUNB, AZSQSHRUNT, AZSRSHR, AZUQRSHRNB, AZUQRSHRNT, AZUQSHRNB, AZUQSHRNT, + AZURSRA, AZUSRA, AZXAR, AZSRI, AZSRSRA, AZSSRA: + // ASL: let shift : integer = (2 * esize) - UInt(tsize::imm3); + if v == 0 { + return 0, false + } + C = (2 * effectiveEsize) - v + default: + // ASL: let shift : integer = UInt(tsize::imm3) - esize; + C = effectiveEsize + v + } + var chunks [3]uint32 + for i := 0; i < 6; i += 2 { + chunks[i/2] = C & ((1 << (r[i+1] - r[i])) - 1) + C >>= (r[i+1] - r[i]) + } + return uint32((chunks[0] << r[0]) | + (chunks[1] << r[2]) | + (chunks[2] << r[4])), true +} + +// encodeLogicalImmEncoding is the implementation of the following encoding logic: +// Is the size specifier, +// imm13 <T> +// 0xxxxxx0xxxxx S +// 0xxxxxx10xxxx H +// 0xxxxxx110xxx B +// 0xxxxxx1110xx B +// 0xxxxxx11110x B +// 0xxxxxx11111x RESERVED +// 1xxxxxxxxxxxx D +// At the meantime: +// Is a 64, 32, 16 or 8-bit bitmask consisting of replicated 2, 4, 8, 16, 32 or 64 bit fields, +// each field containing a rotated run of non-zero bits, encoded in the "imm13" field. +// +// bit range mappings: +// imm13: [5:18) +// +// ARM created a "clever" recipe that can generate useful repeating 8-64 bit bitmasks. +// Instead of storing the literal binary number, the processor reads a 13-bit recipe +// using three fields (bits from high to low): +// N (1 bit), immr (6 bits), and imms (6 bits). +// +// How the recipe works: +// Every logical immediate represents a repeating pattern (like repeating tiles). The processor +// uses the three fields to figure out the size of the tile, how many 1s are in the tile, and +// how far to rotate it. +// The N bit combined with the upper bits of imms determines the width of the repeating block. +// Depending on these bits, the fundamental block can be 2, 4, 8, 16, 32, or 64 bits wide. +// The lower bits of imms dictate exactly how many contiguous 1s exist inside that block. +// The immr value tells the processor how many bits to rotate that block to the right. +// Finally, the resulting block is duplicated to fill a standard 64-bit lane. +func encodeLogicalImmArrEncoding(v uint32, adjacentAddr *obj.Addr) (uint32, bool) { + acl := aclass(adjacentAddr) + if acl != AC_ARNG { + return 0, false + } + arr := addrComponent(adjacentAddr, acl, 1) + + // Replicate the given immediate to fill a full 64-bit lane. + // This ensures our pattern-shrinking logic naturally respects the vector lane bounds. + var val uint64 + switch arr { + case ARNG_B: // 8-bit lane + v8 := uint64(v & 0xFF) + val = v8 * 0x0101010101010101 + case ARNG_H: // 16-bit lane + v16 := uint64(v & 0xFFFF) + val = v16 * 0x0001000100010001 + case ARNG_S: // 32-bit lane + v32 := uint64(v) + val = v32 | (v32 << 32) + case ARNG_D: // 64-bit lane + val = uint64(v) // Top 32 bits are implicitly 0 + default: + return 0, false + } + + // Reject all zeros or all ones (handled by MOV/EOR, invalid for AND/ORR immediates) + if val == 0 || val == ^uint64(0) { + return 0, false + } + + // Find the absolute smallest repeating pattern size (64 down to 2) + size := uint64(64) + for size > 2 { + half := size / 2 + mask := (uint64(1) << half) - 1 + lower := val & mask + upper := (val >> half) & mask + + // If the top half matches the bottom half, shrink our window + if lower == upper { + size = half + val = lower + } else { + break + } + } + + // Count the contiguous ones in this minimal pattern + mask := (uint64(1) << size) - 1 + val &= mask + ones := bits.OnesCount64(val) + + // Find the right-rotation (rot) needed to align the 1s at the bottom + expected := (uint64(1) << ones) - 1 + rot := -1 + for r := 0; r < int(size); r++ { + // Right rotate 'val' by 'r' bits within a 'size'-bit window + rotated := ((val >> r) | (val << (int(size) - r))) & mask + if rotated == expected { + rot = r + break + } + } + + if rot == -1 { + return 0, false + } + + // immr is the amount the hardware must right-rotate the base pattern. + // Since 'rot' is how much we right-rotated the target to find the base, + // the hardware needs the inverse rotation. + immr := uint32((int(size) - rot) % int(size)) + + // If we couldn't find a rotation that forms a perfect contiguous block of 1s, it's invalid. + if rot == -1 { + return 0, false + } + + // Encode N, immr, and imms + n := uint32(0) + if size == 64 { + n = 1 + } + + // The imms prefix is mathematically generated by (~(size*2 - 1) & 0x3F). + // We then OR it with the number of ones (minus 1). + imms := (uint32(^(size*2 - 1)) & 0x3F) | uint32(ones-1) + + // Construct the final 13-bit field: N (1) | immr (6) | imms (6) + imm13 := (n << 12) | (immr << 6) | imms + + // Shift by 5 to place imm13 into instruction bits [5:17] + return imm13 << 5, true +} + // tryEncode tries to encode p with i, it returns the encoded binary and ok signal. func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) { bin := i.fixedBits @@ -300,7 +529,11 @@ func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) { // The 2 instances of <Tb> must encode to the same value. encoded := map[component]uint32{} opIdx := 0 + var addrs []*obj.Addr for addr := range opsInProg(p) { + addrs = append(addrs, addr) + } + for ii, addr := range addrs { if opIdx >= len(i.args) { return 0, false } @@ -312,13 +545,36 @@ func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) { } for i, enc := range op.elemEncoders { val := addrComponent(addr, acl, i) + if (p.As == AZFCPY || p.As == AZFDUP) && acl == AC_IMM { + // These instructions expects ARM's 8-bit float encoding. + // Reinterpret the uint32 bits back as a float32, then convert to float64 for chipfloat7 + fval := float64(math.Float32frombits(val)) + encode := (&ctxt7{}).chipfloat7(fval) + if encode == -1 { + // Handle error or return false to indicate mismatch + return 0, false + } + val = uint32(encode) + } if b, ok := enc.fn(val); ok || b != 0 { + specialB := uint32(b) if !ok { + specialB = b switch b { case codeI1Tsz: b, ok = encodeI1Tsz(val, addrComponent(addr, acl, i-1)) case codeImm2Tsz: b, ok = encodeImm2Tsz(val, addrComponent(addr, acl, i-1)) + case codeShift161919212223: + b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 23}, addrs[ii+1], p.As) + case codeShift161919212224: + b, ok = encodeShiftTriple(val, [6]int{16, 19, 19, 21, 22, 24}, addrs[ii+1], p.As) + case codeShift588102224: + b, ok = encodeShiftTriple(val, [6]int{5, 8, 8, 10, 22, 24}, addrs[ii+1], p.As) + case codeLogicalImmArrEncoding: + b, ok = encodeLogicalImmArrEncoding(val, addrs[ii+1]) + case codeNoOp: + b, ok = 0, true default: panic(fmt.Errorf("unknown encoding function code %d", b)) } @@ -328,6 +584,10 @@ func (i *instEncoder) tryEncode(p *obj.Prog) (uint32, bool) { } bin |= b if _, ok := encoded[enc.comp]; ok && b != encoded[enc.comp] { + if specialB == codeNoOp { + // NoOp encodings don't need checks. + continue + } return 0, false } if enc.comp != enc_NIL { |
