From b249703e3c53cd7f1e5f808fb2f03714fec44b43 Mon Sep 17 00:00:00 2001 From: Jeremy Faller Date: Wed, 12 Aug 2020 12:54:03 -0400 Subject: [dev.link] cmd/compile, cmd/asm: add length to hashed symbols While working on deduplicating pcdata, I found that the following hashed symbols would result in the same: [] == [0,0,0,0....] This makes using content addressable symbols untenable for pcdata. Adding the length to the hash keeps the dream alive. No difference in binary size (darwin, cmd/compile), spurious improvements in DWARF phase memory. Change-Id: I21101f7754a3d870922b0dea39c947cc8509432f Reviewed-on: https://go-review.googlesource.com/c/go/+/247903 Run-TryBot: Jeremy Faller TryBot-Result: Gobot Gobot Reviewed-by: Than McIntosh Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/objfile.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 7bc4f4992e..8234697d72 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -372,10 +372,22 @@ func contentHash64(s *LSym) goobj.Hash64Type { // hashed symbols. func (w *writer) contentHash(s *LSym) goobj.HashType { h := sha1.New() + var tmp [14]byte + + // Include the size of the symbol in the hash. + // This preserves the length of symbols, preventing the following two symbols + // from hashing the same: + // + // [2]int{1,2} ≠ [10]int{1,2,0,0,0...} + // + // In this case, if the smaller symbol is alive, the larger is not kept unless + // needed. + binary.LittleEndian.PutUint64(tmp[:8], uint64(s.Size)) + h.Write(tmp[:8]) + // The compiler trims trailing zeros _sometimes_. We just do // it always. h.Write(bytes.TrimRight(s.P, "\x00")) - var tmp [14]byte for i := range s.R { r := &s.R[i] binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off)) -- cgit v1.3 From 5387cdcb24a07f5d0d49d5105ced2b69e6aafde9 Mon Sep 17 00:00:00 2001 From: Jeremy Faller Date: Fri, 7 Aug 2020 11:31:20 -0400 Subject: [dev.link] cmd/link, cmd/compile: create content addressable pcdata syms Switch pcdata over to content addressable symbols. This is the last step before removing these from pclntab_old. No meaningful benchmarks changes come from this work. Change-Id: I3f74f3d6026a278babe437c8010e22992c92bd89 Reviewed-on: https://go-review.googlesource.com/c/go/+/247399 Reviewed-by: Austin Clements Reviewed-by: Than McIntosh --- src/cmd/internal/goobj/funcinfo.go | 82 ++++++++++++++++++---------------- src/cmd/internal/goobj/objfile.go | 12 +++-- src/cmd/internal/obj/link.go | 15 +++---- src/cmd/internal/obj/objfile.go | 71 ++++++++++++++++++++++------- src/cmd/internal/obj/pcln.go | 55 ++++++++++++++--------- src/cmd/internal/objfile/goobj.go | 16 ++++--- src/cmd/link/internal/ld/dwarf.go | 2 +- src/cmd/link/internal/ld/lib.go | 2 +- src/cmd/link/internal/ld/pcln.go | 13 +++--- src/cmd/link/internal/loader/loader.go | 46 +++++++++---------- 10 files changed, 182 insertions(+), 132 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/goobj/funcinfo.go b/src/cmd/internal/goobj/funcinfo.go index e0e6068b4b..2cca8f6c4e 100644 --- a/src/cmd/internal/goobj/funcinfo.go +++ b/src/cmd/internal/goobj/funcinfo.go @@ -23,12 +23,11 @@ type FuncInfo struct { Locals uint32 FuncID objabi.FuncID - Pcsp uint32 - Pcfile uint32 - Pcline uint32 - Pcinline uint32 - Pcdata []uint32 - PcdataEnd uint32 + Pcsp SymRef + Pcfile SymRef + Pcline SymRef + Pcinline SymRef + Pcdata []SymRef Funcdataoff []uint32 File []CUFileIndex @@ -41,20 +40,24 @@ func (a *FuncInfo) Write(w *bytes.Buffer) { binary.LittleEndian.PutUint32(b[:], x) w.Write(b[:]) } + writeSymRef := func(s SymRef) { + writeUint32(s.PkgIdx) + writeUint32(s.SymIdx) + } writeUint32(a.Args) writeUint32(a.Locals) writeUint32(uint32(a.FuncID)) - writeUint32(a.Pcsp) - writeUint32(a.Pcfile) - writeUint32(a.Pcline) - writeUint32(a.Pcinline) + writeSymRef(a.Pcsp) + writeSymRef(a.Pcfile) + writeSymRef(a.Pcline) + writeSymRef(a.Pcinline) writeUint32(uint32(len(a.Pcdata))) - for _, x := range a.Pcdata { - writeUint32(x) + for _, sym := range a.Pcdata { + writeSymRef(sym) } - writeUint32(a.PcdataEnd) + writeUint32(uint32(len(a.Funcdataoff))) for _, x := range a.Funcdataoff { writeUint32(x) @@ -75,21 +78,23 @@ func (a *FuncInfo) Read(b []byte) { b = b[4:] return x } + readSymIdx := func() SymRef { + return SymRef{readUint32(), readUint32()} + } a.Args = readUint32() a.Locals = readUint32() a.FuncID = objabi.FuncID(readUint32()) - a.Pcsp = readUint32() - a.Pcfile = readUint32() - a.Pcline = readUint32() - a.Pcinline = readUint32() - pcdatalen := readUint32() - a.Pcdata = make([]uint32, pcdatalen) + a.Pcsp = readSymIdx() + a.Pcfile = readSymIdx() + a.Pcline = readSymIdx() + a.Pcinline = readSymIdx() + a.Pcdata = make([]SymRef, readUint32()) for i := range a.Pcdata { - a.Pcdata[i] = readUint32() + a.Pcdata[i] = readSymIdx() } - a.PcdataEnd = readUint32() + funcdataofflen := readUint32() a.Funcdataoff = make([]uint32, funcdataofflen) for i := range a.Funcdataoff { @@ -127,11 +132,13 @@ type FuncInfoLengths struct { func (*FuncInfo) ReadFuncInfoLengths(b []byte) FuncInfoLengths { var result FuncInfoLengths - const numpcdataOff = 28 + // Offset to the number of pcdata values. This value is determined by counting + // the number of bytes until we write pcdata to the file. + const numpcdataOff = 44 result.NumPcdata = binary.LittleEndian.Uint32(b[numpcdataOff:]) result.PcdataOff = numpcdataOff + 4 - numfuncdataoffOff := result.PcdataOff + 4*(result.NumPcdata+1) + numfuncdataoffOff := result.PcdataOff + 8*result.NumPcdata result.NumFuncdataoff = binary.LittleEndian.Uint32(b[numfuncdataoffOff:]) result.FuncdataoffOff = numfuncdataoffOff + 4 @@ -154,29 +161,28 @@ func (*FuncInfo) ReadLocals(b []byte) uint32 { return binary.LittleEndian.Uint32 func (*FuncInfo) ReadFuncID(b []byte) uint32 { return binary.LittleEndian.Uint32(b[8:]) } -// return start and end offsets. -func (*FuncInfo) ReadPcsp(b []byte) (uint32, uint32) { - return binary.LittleEndian.Uint32(b[12:]), binary.LittleEndian.Uint32(b[16:]) +func (*FuncInfo) ReadPcsp(b []byte) SymRef { + return SymRef{binary.LittleEndian.Uint32(b[12:]), binary.LittleEndian.Uint32(b[16:])} } -// return start and end offsets. -func (*FuncInfo) ReadPcfile(b []byte) (uint32, uint32) { - return binary.LittleEndian.Uint32(b[16:]), binary.LittleEndian.Uint32(b[20:]) +func (*FuncInfo) ReadPcfile(b []byte) SymRef { + return SymRef{binary.LittleEndian.Uint32(b[20:]), binary.LittleEndian.Uint32(b[24:])} } -// return start and end offsets. -func (*FuncInfo) ReadPcline(b []byte) (uint32, uint32) { - return binary.LittleEndian.Uint32(b[20:]), binary.LittleEndian.Uint32(b[24:]) +func (*FuncInfo) ReadPcline(b []byte) SymRef { + return SymRef{binary.LittleEndian.Uint32(b[28:]), binary.LittleEndian.Uint32(b[32:])} } -// return start and end offsets. -func (*FuncInfo) ReadPcinline(b []byte, pcdataoffset uint32) (uint32, uint32) { - return binary.LittleEndian.Uint32(b[24:]), binary.LittleEndian.Uint32(b[pcdataoffset:]) +func (*FuncInfo) ReadPcinline(b []byte) SymRef { + return SymRef{binary.LittleEndian.Uint32(b[36:]), binary.LittleEndian.Uint32(b[40:])} } -// return start and end offsets. -func (*FuncInfo) ReadPcdata(b []byte, pcdataoffset uint32, k uint32) (uint32, uint32) { - return binary.LittleEndian.Uint32(b[pcdataoffset+4*k:]), binary.LittleEndian.Uint32(b[pcdataoffset+4+4*k:]) +func (*FuncInfo) ReadPcdata(b []byte) []SymRef { + syms := make([]SymRef, binary.LittleEndian.Uint32(b[44:])) + for i := range syms { + syms[i] = SymRef{binary.LittleEndian.Uint32(b[48+i*8:]), binary.LittleEndian.Uint32(b[52+i*8:])} + } + return syms } func (*FuncInfo) ReadFuncdataoff(b []byte, funcdataofffoff uint32, k uint32) int64 { diff --git a/src/cmd/internal/goobj/objfile.go b/src/cmd/internal/goobj/objfile.go index 5d4a253024..9a64f96cd6 100644 --- a/src/cmd/internal/goobj/objfile.go +++ b/src/cmd/internal/goobj/objfile.go @@ -421,8 +421,11 @@ const ( AuxDwarfLoc AuxDwarfRanges AuxDwarfLines - - // TODO: more. Pcdata? + AuxPcsp + AuxPcfile + AuxPcline + AuxPcinline + AuxPcdata ) func (a *Aux) Type() uint8 { return a[0] } @@ -827,11 +830,6 @@ func (r *Reader) Data(i uint32) []byte { return r.BytesAt(base+off, int(end-off)) } -// AuxDataBase returns the base offset of the aux data block. -func (r *Reader) PcdataBase() uint32 { - return r.h.Offsets[BlkPcdata] -} - // NRefName returns the number of referenced symbol names. func (r *Reader) NRefName() int { return int(r.h.Offsets[BlkRefName+1]-r.h.Offsets[BlkRefName]) / RefNameSize diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index dc47e51be9..11fab63065 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -624,11 +624,12 @@ func (s *LSym) CanBeAnSSASym() { } type Pcln struct { - Pcsp Pcdata - Pcfile Pcdata - Pcline Pcdata - Pcinline Pcdata - Pcdata []Pcdata + // Aux symbols for pcln + Pcsp *LSym + Pcfile *LSym + Pcline *LSym + Pcinline *LSym + Pcdata []*LSym Funcdata []*LSym Funcdataoff []int64 UsedFiles map[goobj.CUFileIndex]struct{} // file indices used while generating pcfile @@ -650,10 +651,6 @@ type Auto struct { Gotype *LSym } -type Pcdata struct { - P []byte -} - // Link holds the context for writing object code from a compiler // to be linker input or for reading that input into the linker. type Link struct { diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 8234697d72..a2bbdff24e 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -185,7 +185,11 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { // Pcdata h.Offsets[goobj.BlkPcdata] = w.Offset() for _, s := range ctxt.Text { // iteration order must match genFuncInfoSyms - if s.Func != nil { + // Because of the phase order, it's possible that we try to write an invalid + // object file, and the Pcln variables haven't been filled in. As such, we + // need to check that Pcsp exists, and assume the other pcln variables exist + // as well. Tests like test/fixedbugs/issue22200.go demonstrate this issue. + if s.Func != nil && s.Func.Pcln.Pcsp != nil { pc := &s.Func.Pcln w.Bytes(pc.Pcsp.P) w.Bytes(pc.Pcfile.P) @@ -478,6 +482,22 @@ func (w *writer) Aux(s *LSym) { if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { w.aux1(goobj.AuxDwarfLines, s.Func.dwarfDebugLinesSym) } + if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { + w.aux1(goobj.AuxPcsp, s.Func.Pcln.Pcsp) + } + if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { + w.aux1(goobj.AuxPcfile, s.Func.Pcln.Pcfile) + } + if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { + w.aux1(goobj.AuxPcline, s.Func.Pcln.Pcline) + } + if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { + w.aux1(goobj.AuxPcinline, s.Func.Pcln.Pcinline) + } + for _, pcSym := range s.Func.Pcln.Pcdata { + w.aux1(goobj.AuxPcdata, pcSym) + } + } } @@ -559,6 +579,19 @@ func nAuxSym(s *LSym) int { if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { n++ } + if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { + n++ + } + if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { + n++ + } + if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { + n++ + } + if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { + n++ + } + n += len(s.Func.Pcln.Pcdata) } return n } @@ -566,7 +599,17 @@ func nAuxSym(s *LSym) int { // generate symbols for FuncInfo. func genFuncInfoSyms(ctxt *Link) { infosyms := make([]*LSym, 0, len(ctxt.Text)) - var pcdataoff uint32 + hashedsyms := make([]*LSym, 0, 4*len(ctxt.Text)) + preparePcSym := func(s *LSym) *LSym { + if s == nil { + return s + } + s.PkgIdx = goobj.PkgIdxHashed + s.SymIdx = int32(len(hashedsyms) + len(ctxt.hasheddefs)) + s.Set(AttrIndexed, true) + hashedsyms = append(hashedsyms, s) + return s + } var b bytes.Buffer symidx := int32(len(ctxt.defs)) for _, s := range ctxt.Text { @@ -579,20 +622,14 @@ func genFuncInfoSyms(ctxt *Link) { FuncID: objabi.FuncID(s.Func.FuncID), } pc := &s.Func.Pcln - o.Pcsp = pcdataoff - pcdataoff += uint32(len(pc.Pcsp.P)) - o.Pcfile = pcdataoff - pcdataoff += uint32(len(pc.Pcfile.P)) - o.Pcline = pcdataoff - pcdataoff += uint32(len(pc.Pcline.P)) - o.Pcinline = pcdataoff - pcdataoff += uint32(len(pc.Pcinline.P)) - o.Pcdata = make([]uint32, len(pc.Pcdata)) - for i, pcd := range pc.Pcdata { - o.Pcdata[i] = pcdataoff - pcdataoff += uint32(len(pcd.P)) - } - o.PcdataEnd = pcdataoff + o.Pcsp = makeSymRef(preparePcSym(pc.Pcsp)) + o.Pcfile = makeSymRef(preparePcSym(pc.Pcfile)) + o.Pcline = makeSymRef(preparePcSym(pc.Pcline)) + o.Pcinline = makeSymRef(preparePcSym(pc.Pcinline)) + o.Pcdata = make([]goobj.SymRef, len(pc.Pcdata)) + for i, pcSym := range pc.Pcdata { + o.Pcdata[i] = makeSymRef(preparePcSym(pcSym)) + } o.Funcdataoff = make([]uint32, len(pc.Funcdataoff)) for i, x := range pc.Funcdataoff { o.Funcdataoff[i] = uint32(x) @@ -642,9 +679,9 @@ func genFuncInfoSyms(ctxt *Link) { } } ctxt.defs = append(ctxt.defs, infosyms...) + ctxt.hasheddefs = append(ctxt.hasheddefs, hashedsyms...) } -// debugDumpAux is a dumper for selected aux symbols. func writeAuxSymDebug(ctxt *Link, par *LSym, aux *LSym) { // Most aux symbols (ex: funcdata) are not interesting-- // pick out just the DWARF ones for now. diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go index 1f7ccf47ef..7750637796 100644 --- a/src/cmd/internal/obj/pcln.go +++ b/src/cmd/internal/obj/pcln.go @@ -6,6 +6,7 @@ package obj import ( "cmd/internal/goobj" + "cmd/internal/objabi" "encoding/binary" "log" ) @@ -14,16 +15,19 @@ import ( // returned by valfunc parameterized by arg. The invocation of valfunc to update the // current value is, for each p, // -// val = valfunc(func, val, p, 0, arg); -// record val as value at p->pc; -// val = valfunc(func, val, p, 1, arg); +// sym = valfunc(func, p, 0, arg); +// record sym.P as value at p->pc; +// sym = valfunc(func, p, 1, arg); // // where func is the function, val is the current value, p is the instruction being // considered, and arg can be used to further parameterize valfunc. -func funcpctab(ctxt *Link, dst *Pcdata, func_ *LSym, desc string, valfunc func(*Link, *LSym, int32, *Prog, int32, interface{}) int32, arg interface{}) { +func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, int32, *Prog, int32, interface{}) int32, arg interface{}) *LSym { dbg := desc == ctxt.Debugpcln - - dst.P = dst.P[:0] + dst := []byte{} + sym := &LSym{ + Type: objabi.SRODATA, + Attribute: AttrContentAddressable, + } if dbg { ctxt.Logf("funcpctab %s [valfunc=%s]\n", func_.Name, desc) @@ -32,7 +36,8 @@ func funcpctab(ctxt *Link, dst *Pcdata, func_ *LSym, desc string, valfunc func(* val := int32(-1) oldval := val if func_.Func.Text == nil { - return + // Return the emtpy symbol we've built so far. + return sym } pc := func_.Func.Text.Pc @@ -88,13 +93,13 @@ func funcpctab(ctxt *Link, dst *Pcdata, func_ *LSym, desc string, valfunc func(* if started { pcdelta := (p.Pc - pc) / int64(ctxt.Arch.MinLC) n := binary.PutUvarint(buf, uint64(pcdelta)) - dst.P = append(dst.P, buf[:n]...) + dst = append(dst, buf[:n]...) pc = p.Pc } delta := val - oldval n := binary.PutVarint(buf, int64(delta)) - dst.P = append(dst.P, buf[:n]...) + dst = append(dst, buf[:n]...) oldval = val started = true val = valfunc(ctxt, func_, val, p, 1, arg) @@ -109,18 +114,22 @@ func funcpctab(ctxt *Link, dst *Pcdata, func_ *LSym, desc string, valfunc func(* ctxt.Diag("negative pc offset: %v", v) } n := binary.PutUvarint(buf, uint64(v)) - dst.P = append(dst.P, buf[:n]...) + dst = append(dst, buf[:n]...) // add terminating varint-encoded 0, which is just 0 - dst.P = append(dst.P, 0) + dst = append(dst, 0) } if dbg { - ctxt.Logf("wrote %d bytes to %p\n", len(dst.P), dst) - for _, p := range dst.P { + ctxt.Logf("wrote %d bytes to %p\n", len(dst), dst) + for _, p := range dst { ctxt.Logf(" %02x", p) } ctxt.Logf("\n") } + + sym.Size = int64(len(dst)) + sym.P = dst + return sym } // pctofileline computes either the file number (arg == 0) @@ -268,18 +277,17 @@ func linkpcln(ctxt *Link, cursym *LSym) { } } - pcln.Pcdata = make([]Pcdata, npcdata) - pcln.Pcdata = pcln.Pcdata[:npcdata] + pcln.Pcdata = make([]*LSym, npcdata) pcln.Funcdata = make([]*LSym, nfuncdata) pcln.Funcdataoff = make([]int64, nfuncdata) pcln.Funcdataoff = pcln.Funcdataoff[:nfuncdata] - funcpctab(ctxt, &pcln.Pcsp, cursym, "pctospadj", pctospadj, nil) - funcpctab(ctxt, &pcln.Pcfile, cursym, "pctofile", pctofileline, pcln) - funcpctab(ctxt, &pcln.Pcline, cursym, "pctoline", pctofileline, nil) + pcln.Pcsp = funcpctab(ctxt, cursym, "pctospadj", pctospadj, nil) + pcln.Pcfile = funcpctab(ctxt, cursym, "pctofile", pctofileline, pcln) + pcln.Pcline = funcpctab(ctxt, cursym, "pctoline", pctofileline, nil) pcinlineState := new(pcinlineState) - funcpctab(ctxt, &pcln.Pcinline, cursym, "pctoinline", pcinlineState.pctoinline, nil) + pcln.Pcinline = funcpctab(ctxt, cursym, "pctoinline", pcinlineState.pctoinline, nil) for _, inlMark := range cursym.Func.InlMarks { pcinlineState.setParentPC(ctxt, int(inlMark.id), int32(inlMark.p.Pc)) } @@ -309,9 +317,14 @@ func linkpcln(ctxt *Link, cursym *LSym) { // pcdata. for i := 0; i < npcdata; i++ { if (havepc[i/32]>>uint(i%32))&1 == 0 { - continue + // use an empty symbol. + pcln.Pcdata[i] = &LSym{ + Type: objabi.SRODATA, + Attribute: AttrContentAddressable, + } + } else { + pcln.Pcdata[i] = funcpctab(ctxt, cursym, "pctopcdata", pctopcdata, interface{}(uint32(i))) } - funcpctab(ctxt, &pcln.Pcdata[i], cursym, "pctopcdata", pctopcdata, interface{}(uint32(i))) } // funcdata diff --git a/src/cmd/internal/objfile/goobj.go b/src/cmd/internal/objfile/goobj.go index e838f58aed..8eecebb1df 100644 --- a/src/cmd/internal/objfile/goobj.go +++ b/src/cmd/internal/objfile/goobj.go @@ -236,7 +236,15 @@ func (f *goobjFile) PCToLine(pc uint64) (string, int, *gosym.Func) { if arch == nil { return "", 0, nil } - pcdataBase := r.PcdataBase() + getSymData := func(s goobj.SymRef) []byte { + if s.PkgIdx != goobj.PkgIdxHashed { + // We don't need the data for non-hashed symbols, yet. + panic("not supported") + } + i := uint32(s.SymIdx + uint32(r.NSym()+r.NHashed64def())) + return r.BytesAt(r.DataOff(i), r.DataSize(i)) + } + ndef := uint32(r.NSym() + r.NHashed64def() + r.NHasheddef() + r.NNonpkgdef()) for i := uint32(0); i < ndef; i++ { osym := r.Sym(i) @@ -262,11 +270,9 @@ func (f *goobjFile) PCToLine(pc uint64) (string, int, *gosym.Func) { b := r.BytesAt(r.DataOff(isym), r.DataSize(isym)) var info *goobj.FuncInfo lengths := info.ReadFuncInfoLengths(b) - off, end := info.ReadPcline(b) - pcline := r.BytesAt(pcdataBase+off, int(end-off)) + pcline := getSymData(info.ReadPcline(b)) line := int(pcValue(pcline, pc-addr, arch)) - off, end = info.ReadPcfile(b) - pcfile := r.BytesAt(pcdataBase+off, int(end-off)) + pcfile := getSymData(info.ReadPcfile(b)) fileID := pcValue(pcfile, pc-addr, arch) globalFileID := info.ReadFile(b, lengths.FileOff, uint32(fileID)) fileName := r.File(int(globalFileID)) diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go index d1f2ac583d..2b95ad5a67 100644 --- a/src/cmd/link/internal/ld/dwarf.go +++ b/src/cmd/link/internal/ld/dwarf.go @@ -1421,7 +1421,7 @@ func (d *dwctxt) writeframes(fs loader.Sym) dwarfSecInfo { deltaBuf = dwarf.AppendUleb128(deltaBuf, uint64(thearch.Dwarfreglr)) } - for pcsp.Init(fpcsp); !pcsp.Done; pcsp.Next() { + for pcsp.Init(d.linkctxt.loader.Data(fpcsp)); !pcsp.Done; pcsp.Next() { nextpc := pcsp.NextPC // pciterinit goes up to the end of the function, diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index 09c7bbfb53..caa4566190 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -2252,7 +2252,7 @@ func (sc *stkChk) check(up *chain, depth int) int { var ch1 chain pcsp := obj.NewPCIter(uint32(ctxt.Arch.MinLC)) ri := 0 - for pcsp.Init(info.Pcsp()); !pcsp.Done; pcsp.Next() { + for pcsp.Init(ldr.Data(info.Pcsp())); !pcsp.Done; pcsp.Next() { // pcsp.value is in effect for [pcsp.pc, pcsp.nextpc). // Check stack size in effect for this span. diff --git a/src/cmd/link/internal/ld/pcln.go b/src/cmd/link/internal/ld/pcln.go index c7535f6a61..e9fd5937e7 100644 --- a/src/cmd/link/internal/ld/pcln.go +++ b/src/cmd/link/internal/ld/pcln.go @@ -592,9 +592,8 @@ func (ctxt *Link) pclntab(container loader.Bitmap) *pclntab { fi := ldr.FuncInfo(s) if fi.Valid() { fi.Preload() - npc := fi.NumPcdata() - for i := uint32(0); i < npc; i++ { - pcdata = append(pcdata, sym.Pcdata{P: fi.Pcdata(int(i))}) + for _, dataSym := range fi.Pcdata() { + pcdata = append(pcdata, sym.Pcdata{P: ldr.Data(dataSym)}) } nfd := fi.NumFuncdataoff() for i := uint32(0); i < nfd; i++ { @@ -666,15 +665,15 @@ func (ctxt *Link) pclntab(container loader.Bitmap) *pclntab { cu := ldr.SymUnit(s) if fi.Valid() { - pcsp = sym.Pcdata{P: fi.Pcsp()} - pcfile = sym.Pcdata{P: fi.Pcfile()} - pcline = sym.Pcdata{P: fi.Pcline()} + pcsp = sym.Pcdata{P: ldr.Data(fi.Pcsp())} + pcfile = sym.Pcdata{P: ldr.Data(fi.Pcfile())} + pcline = sym.Pcdata{P: ldr.Data(fi.Pcline())} } if fi.Valid() && fi.NumInlTree() > 0 { its := oldState.genInlTreeSym(cu, fi, ctxt.Arch, state) funcdata[objabi.FUNCDATA_InlTree] = its - pcdata[objabi.PCDATA_InlTreeIndex] = sym.Pcdata{P: fi.Pcinline()} + pcdata[objabi.PCDATA_InlTreeIndex] = sym.Pcdata{P: ldr.Data(fi.Pcinline())} } // pcdata diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index 8fd10b0848..f149e3c831 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -1878,19 +1878,24 @@ func (fi *FuncInfo) FuncID() objabi.FuncID { return objabi.FuncID((*goobj.FuncInfo)(nil).ReadFuncID(fi.data)) } -func (fi *FuncInfo) Pcsp() []byte { - pcsp, end := (*goobj.FuncInfo)(nil).ReadPcsp(fi.data) - return fi.r.BytesAt(fi.r.PcdataBase()+pcsp, int(end-pcsp)) +func (fi *FuncInfo) Pcsp() Sym { + sym := (*goobj.FuncInfo)(nil).ReadPcsp(fi.data) + return fi.l.resolve(fi.r, sym) } -func (fi *FuncInfo) Pcfile() []byte { - pcf, end := (*goobj.FuncInfo)(nil).ReadPcfile(fi.data) - return fi.r.BytesAt(fi.r.PcdataBase()+pcf, int(end-pcf)) +func (fi *FuncInfo) Pcfile() Sym { + sym := (*goobj.FuncInfo)(nil).ReadPcfile(fi.data) + return fi.l.resolve(fi.r, sym) } -func (fi *FuncInfo) Pcline() []byte { - pcln, end := (*goobj.FuncInfo)(nil).ReadPcline(fi.data) - return fi.r.BytesAt(fi.r.PcdataBase()+pcln, int(end-pcln)) +func (fi *FuncInfo) Pcline() Sym { + sym := (*goobj.FuncInfo)(nil).ReadPcline(fi.data) + return fi.l.resolve(fi.r, sym) +} + +func (fi *FuncInfo) Pcinline() Sym { + sym := (*goobj.FuncInfo)(nil).ReadPcinline(fi.data) + return fi.l.resolve(fi.r, sym) } // Preload has to be called prior to invoking the various methods @@ -1899,27 +1904,16 @@ func (fi *FuncInfo) Preload() { fi.lengths = (*goobj.FuncInfo)(nil).ReadFuncInfoLengths(fi.data) } -func (fi *FuncInfo) Pcinline() []byte { +func (fi *FuncInfo) Pcdata() []Sym { if !fi.lengths.Initialized { panic("need to call Preload first") } - pcinl, end := (*goobj.FuncInfo)(nil).ReadPcinline(fi.data, fi.lengths.PcdataOff) - return fi.r.BytesAt(fi.r.PcdataBase()+pcinl, int(end-pcinl)) -} - -func (fi *FuncInfo) NumPcdata() uint32 { - if !fi.lengths.Initialized { - panic("need to call Preload first") - } - return fi.lengths.NumPcdata -} - -func (fi *FuncInfo) Pcdata(k int) []byte { - if !fi.lengths.Initialized { - panic("need to call Preload first") + syms := (*goobj.FuncInfo)(nil).ReadPcdata(fi.data) + ret := make([]Sym, len(syms)) + for i := range ret { + ret[i] = fi.l.resolve(fi.r, syms[i]) } - pcdat, end := (*goobj.FuncInfo)(nil).ReadPcdata(fi.data, fi.lengths.PcdataOff, uint32(k)) - return fi.r.BytesAt(fi.r.PcdataBase()+pcdat, int(end-pcdat)) + return ret } func (fi *FuncInfo) NumFuncdataoff() uint32 { -- cgit v1.3 From dfdc3880b01d46d1d8125ab9eea0606b2fa5b819 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Thu, 20 Aug 2020 17:02:18 +0800 Subject: cmd/internal/obj/arm64: enable some SIMD instructions Enable VBSL, VBIT, VCMTST, VUXTL VUXTL2 and FMOVQ SIMD instructions required by the issue #40725. And FMOVQ instrucion is used to move a large constant to a Vn register. Add test cases. Fixes #40725 Change-Id: I1cac1922a0a0165d698a4b73a41f7a5f0a0ad549 Reviewed-on: https://go-review.googlesource.com/c/go/+/249758 Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 15 +++ src/cmd/asm/internal/asm/testdata/arm64error.s | 5 + src/cmd/internal/obj/arm64/a.out.go | 6 ++ src/cmd/internal/obj/arm64/anames.go | 6 ++ src/cmd/internal/obj/arm64/asm7.go | 121 ++++++++++++++++++++++--- 5 files changed, 139 insertions(+), 14 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index f0c716a2b5..451ca749ba 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -145,6 +145,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VZIP2 V10.D2, V13.D2, V3.D2 // a379ca4e VZIP1 V17.S2, V4.S2, V26.S2 // 9a38910e VZIP2 V25.S2, V14.S2, V25.S2 // d979990e + VUXTL V30.B8, V30.H8 // dea7082f + VUXTL V30.H4, V29.S4 // dda7102f + VUXTL V29.S2, V2.D2 // a2a7202f + VUXTL2 V30.H8, V30.S4 // dea7106f + VUXTL2 V29.S4, V2.D2 // a2a7206f + VUXTL2 V30.B16, V2.H8 // c2a7086f + VBIT V21.B16, V25.B16, V4.B16 // 241fb56e + VBSL V23.B16, V3.B16, V7.B16 // 671c776e + VCMTST V2.B8, V29.B8, V2.B8 // a28f220e + VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e + VSUB V2.B8, V30.B8, V30.B8 // de87222e MOVD (R2)(R6.SXTW), R4 // 44c866f8 MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 @@ -186,6 +197,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVS $(0.96875), F3 // 03f02d1e FMOVD $(28.0), F4 // 0490671e +// move a large constant to a Vd. + FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 + FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 9f377817a9..2a911b4cce 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -340,4 +340,9 @@ TEXT errors(SB),$0 MRS PMSWINC_EL0, R3 // ERROR "system register is not readable" MRS OSLAR_EL1, R3 // ERROR "system register is not readable" VLD3R.P 24(R15), [V15.H4,V16.H4,V17.H4] // ERROR "invalid post-increment offset" + VBIT V1.H4, V12.H4, V3.H4 // ERROR "invalid arrangement" + VBSL V1.D2, V12.D2, V3.D2 // ERROR "invalid arrangement" + VUXTL V30.D2, V30.H8 // ERROR "operand mismatch" + VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch" + VUXTL V3.D2, V4.B8 // ERROR "operand mismatch" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 03e0278a33..ab065e07e5 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -874,6 +874,7 @@ const ( AFLDPS AFMOVD AFMOVS + AFMOVQ AFMULD AFMULS AFNEGD @@ -987,9 +988,14 @@ const ( AVUSHR AVSHL AVSRI + AVBSL + AVBIT AVTBL AVZIP1 AVZIP2 + AVCMTST + AVUXTL + AVUXTL2 ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 65ecd007ea..8961f04b0c 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,6 +381,7 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", + "FMOVQ", "FMULD", "FMULS", "FNEGD", @@ -494,8 +495,13 @@ var Anames = []string{ "VUSHR", "VSHL", "VSRI", + "VBSL", + "VBIT", "VTBL", "VZIP1", "VZIP2", + "VCMTST", + "VUXTL", + "VUXTL2", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 0b90e31392..7ce18d0f13 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -393,6 +393,11 @@ var optab = []Optab{ {AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0}, {AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, + // Move a large constant to a Vn. + {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, {ABL, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -403,12 +408,14 @@ var optab = []Optab{ {obj.ARET, C_NONE, C_NONE, C_NONE, C_REG, 6, 4, 0, 0, 0}, {obj.ARET, C_NONE, C_NONE, C_NONE, C_ZOREG, 6, 4, 0, 0, 0}, {ABEQ, C_NONE, C_NONE, C_NONE, C_SBRA, 7, 4, 0, 0, 0}, - {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0}, - {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0}, {ACBZ, C_REG, C_NONE, C_NONE, C_SBRA, 39, 4, 0, 0, 0}, {ATBZ, C_VCON, C_REG, C_NONE, C_SBRA, 40, 4, 0, 0, 0}, {AERET, C_NONE, C_NONE, C_NONE, C_NONE, 41, 4, 0, 0, 0}, + // get a PC-relative address + {AADRP, C_SBRA, C_NONE, C_NONE, C_REG, 60, 4, 0, 0, 0}, + {AADR, C_SBRA, C_NONE, C_NONE, C_REG, 61, 4, 0, 0, 0}, + {ACLREX, C_NONE, C_NONE, C_NONE, C_VCON, 38, 4, 0, 0, 0}, {ACLREX, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0, 0}, {ABFM, C_VCON, C_REG, C_VCON, C_REG, 42, 4, 0, 0, 0}, @@ -473,6 +480,7 @@ var optab = []Optab{ {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0}, @@ -2657,7 +2665,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD: + case AFMOVS, AFMOVD, AFMOVQ: break case AFCVTZSD: @@ -2740,6 +2748,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVCMEQ, t) oprangeset(AVORR, t) oprangeset(AVEOR, t) + oprangeset(AVBSL, t) + oprangeset(AVBIT, t) + oprangeset(AVCMTST, t) case AVADD: oprangeset(AVSUB, t) @@ -2787,6 +2798,9 @@ func buildop(ctxt *obj.Link) { case AVZIP1: oprangeset(AVZIP2, t) + case AVUXTL: + oprangeset(AVUXTL2, t) + case AVLD1R: oprangeset(AVLD2, t) oprangeset(AVLD2R, t) @@ -4163,7 +4177,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4204,17 +4218,24 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("invalid arrangement: %v", p) } - if (p.As == AVORR || p.As == AVAND || p.As == AVEOR) && - (af != ARNG_16B && af != ARNG_8B) { - c.ctxt.Diag("invalid arrangement: %v", p) - } else if (p.As == AVFMLA || p.As == AVFMLS) && - (af != ARNG_2D && af != ARNG_2S && af != ARNG_4S) { - c.ctxt.Diag("invalid arrangement: %v", p) - } else if p.As == AVORR { - size = 2 - } else if p.As == AVAND || p.As == AVEOR { + switch p.As { + case AVORR, AVAND, AVEOR, AVBIT, AVBSL: + if af != ARNG_16B && af != ARNG_8B { + c.ctxt.Diag("invalid arrangement: %v", p) + } + case AVFMLA, AVFMLS: + if af != ARNG_2D && af != ARNG_2S && af != ARNG_4S { + c.ctxt.Diag("invalid arrangement: %v", p) + } + } + switch p.As { + case AVAND, AVEOR: size = 0 - } else if p.As == AVFMLA || p.As == AVFMLS { + case AVBSL: + size = 1 + case AVORR, AVBIT: + size = 2 + case AVFMLA, AVFMLS: if af == ARNG_2D { size = 1 } else { @@ -5096,6 +5117,59 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) + case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) + + case 102: // VUXTL{2} Vn., Vd. + af := int((p.From.Reg >> 5) & 15) + at := int((p.To.Reg >> 5) & 15) + var Q, immh uint32 + switch at { + case ARNG_8H: + if af == ARNG_8B { + immh = 1 + Q = 0 + } else if af == ARNG_16B { + immh = 1 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + case ARNG_4S: + if af == ARNG_4H { + immh = 2 + Q = 0 + } else if af == ARNG_8H { + immh = 2 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + case ARNG_2D: + if af == ARNG_2S { + immh = 4 + Q = 0 + } else if af == ARNG_4S { + immh = 4 + Q = 1 + } else { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + if p.As == AVUXTL && Q == 1 { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + if p.As == AVUXTL2 && Q == 0 { + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -5662,6 +5736,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVADD: return 7<<25 | 1<<21 | 1<<15 | 1<<10 + case AVSUB: + return 0x17<<25 | 1<<21 | 1<<15 | 1<<10 + case AVADDP: return 7<<25 | 1<<21 | 1<<15 | 15<<10 @@ -5724,6 +5801,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVLD2R, AVLD4R: return 0xD<<24 | 3<<21 + + case AVBIT: + return 1<<29 | 0x75<<21 | 7<<10 + + case AVBSL: + return 1<<29 | 0x73<<21 | 7<<10 + + case AVCMTST: + return 0xE<<24 | 1<<21 | 0x23<<10 + + case AVUXTL, AVUXTL2: + return 0x5e<<23 | 0x29<<10 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6566,6 +6655,10 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { fp = 1 w = 1 /* 64-bit SIMD/FP */ + case AFMOVQ: + fp = 1 + w = 2 /* 128-bit SIMD/FP */ + case AMOVD: if p.Pool.As == ADWORD { w = 1 /* 64-bit */ -- cgit v1.3 From d7ab277eed4d2e5ede4f3361adf42d4ad76ced8f Mon Sep 17 00:00:00 2001 From: Junchen Li Date: Mon, 31 Aug 2020 13:32:33 +0800 Subject: cmd/asm: add more SIMD instructions on arm64 This CL adds USHLL, USHLL2, UZP1, UZP2, and BIF instructions requested by #40725. And since UXTL* are aliases of USHLL*, this CL also merges them into one case. Updates #40725 Change-Id: I404a4fdaf953319f72eea548175bec1097a2a816 Reviewed-on: https://go-review.googlesource.com/c/go/+/253659 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/asm/internal/asm/testdata/arm64.s | 20 +++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 8 ++ src/cmd/internal/obj/arm64/a.out.go | 9 +- src/cmd/internal/obj/arm64/anames.go | 9 +- src/cmd/internal/obj/arm64/asm7.go | 109 +++++++++++++------------ 5 files changed, 100 insertions(+), 55 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 451ca749ba..e106ff2ae1 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -156,6 +156,26 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VCMTST V2.B8, V29.B8, V2.B8 // a28f220e VCMTST V2.D2, V23.D2, V3.D2 // e38ee24e VSUB V2.B8, V30.B8, V30.B8 // de87222e + VUZP1 V0.B8, V30.B8, V1.B8 // c11b000e + VUZP1 V1.B16, V29.B16, V2.B16 // a21b014e + VUZP1 V2.H4, V28.H4, V3.H4 // 831b420e + VUZP1 V3.H8, V27.H8, V4.H8 // 641b434e + VUZP1 V28.S2, V2.S2, V5.S2 // 45189c0e + VUZP1 V29.S4, V1.S4, V6.S4 // 26189d4e + VUZP1 V30.D2, V0.D2, V7.D2 // 0718de4e + VUZP2 V0.D2, V30.D2, V1.D2 // c15bc04e + VUZP2 V30.D2, V0.D2, V29.D2 // 1d58de4e + VUSHLL $0, V30.B8, V30.H8 // dea7082f + VUSHLL $0, V30.H4, V29.S4 // dda7102f + VUSHLL $0, V29.S2, V2.D2 // a2a7202f + VUSHLL2 $0, V30.B16, V2.H8 // c2a7086f + VUSHLL2 $0, V30.H8, V30.S4 // dea7106f + VUSHLL2 $0, V29.S4, V2.D2 // a2a7206f + VUSHLL $7, V30.B8, V30.H8 // dea70f2f + VUSHLL $15, V30.H4, V29.S4 // dda71f2f + VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f + VBIF V0.B8, V30.B8, V1.B8 // c11fe02e + VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e MOVD (R2)(R6.SXTW), R4 // 44c866f8 MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8 MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 2a911b4cce..20b1f3e9f0 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -345,4 +345,12 @@ TEXT errors(SB),$0 VUXTL V30.D2, V30.H8 // ERROR "operand mismatch" VUXTL2 V20.B8, V21.H8 // ERROR "operand mismatch" VUXTL V3.D2, V4.B8 // ERROR "operand mismatch" + VUZP1 V0.B8, V30.B8, V1.B16 // ERROR "operand mismatch" + VUZP2 V0.Q1, V30.Q1, V1.Q1 // ERROR "invalid arrangement" + VUSHLL $0, V30.D2, V30.H8 // ERROR "operand mismatch" + VUSHLL2 $0, V20.B8, V21.H8 // ERROR "operand mismatch" + VUSHLL $8, V30.B8, V30.H8 // ERROR "shift amount out of range" + VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range" + VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch" + VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index ab065e07e5..2839da1437 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -954,6 +954,7 @@ const ( AVADD AVADDP AVAND + AVBIF AVCMEQ AVCNT AVEOR @@ -986,6 +987,12 @@ const ( AVEXT AVRBIT AVUSHR + AVUSHLL + AVUSHLL2 + AVUXTL + AVUXTL2 + AVUZP1 + AVUZP2 AVSHL AVSRI AVBSL @@ -994,8 +1001,6 @@ const ( AVZIP1 AVZIP2 AVCMTST - AVUXTL - AVUXTL2 ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 8961f04b0c..48c066abfd 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -461,6 +461,7 @@ var Anames = []string{ "VADD", "VADDP", "VAND", + "VBIF", "VCMEQ", "VCNT", "VEOR", @@ -493,6 +494,12 @@ var Anames = []string{ "VEXT", "VRBIT", "VUSHR", + "VUSHLL", + "VUSHLL2", + "VUXTL", + "VUXTL2", + "VUZP1", + "VUZP2", "VSHL", "VSRI", "VBSL", @@ -501,7 +508,5 @@ var Anames = []string{ "VZIP1", "VZIP2", "VCMTST", - "VUXTL", - "VUXTL2", "LAST", } diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7ce18d0f13..df4bbbbd35 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -480,6 +480,7 @@ var optab = []Optab{ {AVTBL, C_ARNG, C_NONE, C_LIST, C_ARNG, 100, 4, 0, 0, 0}, {AVUSHR, C_VCON, C_ARNG, C_NONE, C_ARNG, 95, 4, 0, 0, 0}, {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, + {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, /* conditional operations */ @@ -2751,6 +2752,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVBSL, t) oprangeset(AVBIT, t) oprangeset(AVCMTST, t) + oprangeset(AVUZP1, t) + oprangeset(AVUZP2, t) + oprangeset(AVBIF, t) case AVADD: oprangeset(AVSUB, t) @@ -2801,6 +2805,9 @@ func buildop(ctxt *obj.Link) { case AVUXTL: oprangeset(AVUXTL2, t) + case AVUSHLL: + oprangeset(AVUSHLL2, t) + case AVLD1R: oprangeset(AVLD2, t) oprangeset(AVLD2R, t) @@ -4177,7 +4184,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4219,7 +4226,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } switch p.As { - case AVORR, AVAND, AVEOR, AVBIT, AVBSL: + case AVORR, AVAND, AVEOR, AVBIT, AVBSL, AVBIF: if af != ARNG_16B && af != ARNG_8B { c.ctxt.Diag("invalid arrangement: %v", p) } @@ -4233,7 +4240,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { size = 0 case AVBSL: size = 1 - case AVORR, AVBIT: + case AVORR, AVBIT, AVBIF: size = 2 case AVFMLA, AVFMLS: if af == ARNG_2D { @@ -5120,56 +5127,44 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) - case 102: // VUXTL{2} Vn., Vd. - af := int((p.From.Reg >> 5) & 15) - at := int((p.To.Reg >> 5) & 15) - var Q, immh uint32 - switch at { - case ARNG_8H: - if af == ARNG_8B { - immh = 1 - Q = 0 - } else if af == ARNG_16B { - immh = 1 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - case ARNG_4S: - if af == ARNG_4H { - immh = 2 - Q = 0 - } else if af == ARNG_8H { - immh = 2 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - case ARNG_2D: - if af == ARNG_2S { - immh = 4 - Q = 0 - } else if af == ARNG_4S { - immh = 4 - Q = 1 - } else { - c.ctxt.Diag("operand mismatch: %v\n", p) - } + case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ + o1 = c.opirr(p, p.As) + rf := p.Reg + af := uint8((p.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + shift := int(p.From.Offset) + if p.As == AVUXTL || p.As == AVUXTL2 { + rf = p.From.Reg + af = uint8((p.From.Reg >> 5) & 15) + shift = 0 + } + + pack := func(q, x, y uint8) uint32 { + return uint32(q)<<16 | uint32(x)<<8 | uint32(y) + } + + var Q uint8 = uint8(o1>>30) & 1 + var immh, width uint8 + switch pack(Q, af, at) { + case pack(0, ARNG_8B, ARNG_8H): + immh, width = 1, 8 + case pack(1, ARNG_16B, ARNG_8H): + immh, width = 1, 8 + case pack(0, ARNG_4H, ARNG_4S): + immh, width = 2, 16 + case pack(1, ARNG_8H, ARNG_4S): + immh, width = 2, 16 + case pack(0, ARNG_2S, ARNG_2D): + immh, width = 4, 32 + case pack(1, ARNG_4S, ARNG_2D): + immh, width = 4, 32 default: c.ctxt.Diag("operand mismatch: %v\n", p) } - - if p.As == AVUXTL && Q == 1 { - c.ctxt.Diag("operand mismatch: %v\n", p) + if !(0 <= shift && shift <= int(width-1)) { + c.ctxt.Diag("shift amount out of range: %v\n", p) } - if p.As == AVUXTL2 && Q == 0 { - c.ctxt.Diag("operand mismatch: %v\n", p) - } - - o1 = c.oprrr(p, p.As) - rf := int((p.From.Reg) & 31) - rt := int((p.To.Reg) & 31) - o1 |= Q<<30 | immh<<19 | uint32((rf&31)<<5) | uint32(rt&31) + o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) } out[0] = o1 out[1] = o2 @@ -5802,6 +5797,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVLD2R, AVLD4R: return 0xD<<24 | 3<<21 + case AVBIF: + return 1<<29 | 7<<25 | 7<<21 | 7<<10 + case AVBIT: return 1<<29 | 0x75<<21 | 7<<10 @@ -5811,8 +5809,11 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVCMTST: return 0xE<<24 | 1<<21 | 0x23<<10 - case AVUXTL, AVUXTL2: - return 0x5e<<23 | 0x29<<10 + case AVUZP1: + return 7<<25 | 3<<11 + + case AVUZP2: + return 7<<25 | 1<<14 | 3<<11 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6011,6 +6012,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVSRI: return 0x5E<<23 | 17<<10 + + case AVUSHLL, AVUXTL: + return 1<<29 | 15<<24 | 0x29<<10 + + case AVUSHLL2, AVUXTL2: + return 3<<29 | 15<<24 | 0x29<<10 } c.ctxt.Diag("%v: bad irr %v", p, a) -- cgit v1.3 From a86b6f23f08fd42154bc5bbfa417da6ee5ef48fb Mon Sep 17 00:00:00 2001 From: diaxu01 Date: Thu, 2 Apr 2020 02:39:28 +0000 Subject: cmd/internal/obj/arm64: optimize the instruction of moving long effective stack address Currently, when the offset of "MOVD $offset(Rn), Rd" is a large positive constant or a negative constant, the assembler will load this offset from the constant pool.This patch gets rid of the constant pool by encoding the offset into two ADD instructions if it's a large positive constant or one SUB instruction if negative. For very large negative offset, it is rarely used, here we don't optimize this case. Optimized case 1: MOVD $-0x100000(R7), R0 Before: LDR 0x67670(constant pool), R27; ADD R27.UXTX, R0, R7 After: SUB $0x100000, R7, R0 Optimized case 2: MOVD $0x123468(R7), R0 Before: LDR 0x67670(constant pool), R27; ADD R27.UXTX, R0, R7 After: ADD $0x123000, R7, R27; ADD $0x000468, R27, R0 1. Binary size before/after. binary size change pkg/linux_arm64 +4KB pkg/tool/linux_arm64 no change go no change gofmt no change 2. go1 benckmark. name old time/op new time/op delta pkg:test/bench/go1 goos:linux goarch:arm64 BinaryTree17-64 7335721401.800000ns +-40% 6264542009.800000ns +-14% ~ (p=0.421 n=5+5) Fannkuch11-64 3886551822.600000ns +- 0% 3875870590.200000ns +- 0% ~ (p=0.151 n=5+5) FmtFprintfEmpty-64 82.960000ns +- 1% 83.900000ns +- 2% +1.13% (p=0.048 n=5+5) FmtFprintfString-64 149.200000ns +- 1% 148.000000ns +- 0% -0.80% (p=0.016 n=5+4) FmtFprintfInt-64 177.000000ns +- 0% 178.400000ns +- 2% ~ (p=0.794 n=4+5) FmtFprintfIntInt-64 240.200000ns +- 2% 239.400000ns +- 4% ~ (p=0.302 n=5+5) FmtFprintfPrefixedInt-64 300.400000ns +- 0% 299.200000ns +- 1% ~ (p=0.119 n=5+5) FmtFprintfFloat-64 360.000000ns +- 0% 361.600000ns +- 3% ~ (p=0.349 n=4+5) FmtManyArgs-64 1064.400000ns +- 1% 1061.400000ns +- 0% ~ (p=0.087 n=5+5) GobDecode-64 12080404.400000ns +- 2% 11637601.000000ns +- 1% -3.67% (p=0.008 n=5+5) GobEncode-64 8474973.800000ns +- 2% 7977801.600000ns +- 2% -5.87% (p=0.008 n=5+5) Gzip-64 416501238.400000ns +- 0% 410463405.400000ns +- 0% -1.45% (p=0.008 n=5+5) Gunzip-64 58088415.200000ns +- 0% 58826209.600000ns +- 0% +1.27% (p=0.008 n=5+5) HTTPClientServer-64 128660.200000ns +-23% 117840.800000ns +- 8% ~ (p=0.222 n=5+5) JSONEncode-64 17547746.800000ns +- 4% 17216180.000000ns +- 1% ~ (p=0.222 n=5+5) JSONDecode-64 80879896.000000ns +- 1% 80063737.200000ns +- 0% -1.01% (p=0.008 n=5+5) Mandelbrot200-64 5484901.600000ns +- 0% 5483614.400000ns +- 0% ~ (p=0.310 n=5+5) GoParse-64 6201166.800000ns +- 6% 6150920.600000ns +- 1% ~ (p=0.548 n=5+5) RegexpMatchEasy0_32-64 135.000000ns +- 0% 139.200000ns +- 7% ~ (p=0.643 n=5+5) RegexpMatchEasy0_1K-64 484.600000ns +- 2% 483.800000ns +- 2% ~ (p=0.984 n=5+5) RegexpMatchEasy1_32-64 128.000000ns +- 1% 124.600000ns +- 1% -2.66% (p=0.008 n=5+5) RegexpMatchEasy1_1K-64 769.400000ns +- 2% 761.400000ns +- 1% ~ (p=0.460 n=5+5) RegexpMatchMedium_32-64 12.900000ns +- 0% 12.500000ns +- 0% -3.10% (p=0.008 n=5+5) RegexpMatchMedium_1K-64 57879.200000ns +- 1% 56512.200000ns +- 0% -2.36% (p=0.008 n=5+5) RegexpMatchHard_32-64 3091.600000ns +- 1% 3071.000000ns +- 0% -0.67% (p=0.048 n=5+5) RegexpMatchHard_1K-64 92941.200000ns +- 1% 92794.000000ns +- 0% ~ (p=1.000 n=5+5) Revcomp-64 1695605187.000000ns +-54% 1821697637.400000ns +-47% ~ (p=1.000 n=5+5) Template-64 112839686.800000ns +- 1% 109964069.200000ns +- 3% ~ (p=0.095 n=5+5) TimeParse-64 587.000000ns +- 0% 587.000000ns +- 0% ~ (all equal) TimeFormat-64 586.000000ns +- 1% 584.200000ns +- 1% ~ (p=0.659 n=5+5) [Geo mean] 81804.262218ns 80694.712973ns -1.36% name old speed new speed delta pkg:test/bench/go1 goos:linux goarch:arm64 GobDecode-64 63.6MB/s +- 2% 66.0MB/s +- 1% +3.78% (p=0.008 n=5+5) GobEncode-64 90.6MB/s +- 2% 96.2MB/s +- 2% +6.23% (p=0.008 n=5+5) Gzip-64 46.6MB/s +- 0% 47.3MB/s +- 0% +1.47% (p=0.008 n=5+5) Gunzip-64 334MB/s +- 0% 330MB/s +- 0% -1.25% (p=0.008 n=5+5) JSONEncode-64 111MB/s +- 4% 113MB/s +- 1% ~ (p=0.222 n=5+5) JSONDecode-64 24.0MB/s +- 1% 24.2MB/s +- 0% +1.02% (p=0.008 n=5+5) GoParse-64 9.35MB/s +- 6% 9.42MB/s +- 1% ~ (p=0.571 n=5+5) RegexpMatchEasy0_32-64 237MB/s +- 0% 231MB/s +- 7% ~ (p=0.690 n=5+5) RegexpMatchEasy0_1K-64 2.11GB/s +- 2% 2.12GB/s +- 2% ~ (p=1.000 n=5+5) RegexpMatchEasy1_32-64 250MB/s +- 1% 257MB/s +- 1% +2.63% (p=0.008 n=5+5) RegexpMatchEasy1_1K-64 1.33GB/s +- 2% 1.35GB/s +- 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32-64 77.6MB/s +- 0% 79.8MB/s +- 0% +2.80% (p=0.008 n=5+5) RegexpMatchMedium_1K-64 17.7MB/s +- 1% 18.1MB/s +- 0% +2.41% (p=0.008 n=5+5) RegexpMatchHard_32-64 10.4MB/s +- 1% 10.4MB/s +- 0% ~ (p=0.056 n=5+5) RegexpMatchHard_1K-64 11.0MB/s +- 1% 11.0MB/s +- 0% ~ (p=0.984 n=5+5) Revcomp-64 188MB/s +-71% 155MB/s +-71% ~ (p=1.000 n=5+5) Template-64 17.2MB/s +- 1% 17.7MB/s +- 3% ~ (p=0.095 n=5+5) [Geo mean] 79.2MB/s 79.3MB/s +0.24% Change-Id: I593ac3e7037afafc3605ad4b0cfb51d5dd88015d Reviewed-on: https://go-review.googlesource.com/c/go/+/232438 Trust: Alberto Donizetti Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 15 ++++++++-- src/cmd/internal/obj/arm64/a.out.go | 7 +++-- src/cmd/internal/obj/arm64/anames7.go | 1 + src/cmd/internal/obj/arm64/asm7.go | 46 +++++++++++++++++++++---------- 4 files changed, 50 insertions(+), 19 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e106ff2ae1..acfb16b096 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -340,8 +340,19 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2 MOVD $0, R1 // 010080d2 MOVD $-1, R1 // 01008092 - MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 - MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2 + MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92 + + MOVD $0x1002(RSP), R1 // MOVD $4098(RSP), R1 // fb074091610b0091 + MOVD $0x1708(RSP), RSP // MOVD $5896(RSP), RSP // fb0740917f231c91 + MOVD $0x2001(R7), R1 // MOVD $8193(R7), R1 // fb08409161070091 + MOVD $0xffffff(R7), R1 // MOVD $16777215(R7), R1 // fbfc7f9161ff3f91 + + MOVD $-0x1(R7), R1 // MOVD $-1(R7), R1 // e10400d1 + MOVD $-0x30(R7), R1 // MOVD $-48(R7), R1 // e1c000d1 + MOVD $-0x708(R7), R1 // MOVD $-1800(R7), R1 // e1201cd1 + MOVD $-0x2000(RSP), R1 // MOVD $-8192(RSP), R1 // e10b40d1 + MOVD $-0x10000(RSP), RSP // MOVD $-65536(RSP), RSP // ff4340d1 // // CLS diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 2839da1437..b3c9e9a18e 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -410,9 +410,10 @@ const ( C_FCON // floating-point constant C_VCONADDR // 64-bit memory address - C_AACON // ADDCON offset in auto constant $a(FP) - C_LACON // 32-bit offset in auto constant $a(FP) - C_AECON // ADDCON offset in extern constant $e(SB) + C_AACON // ADDCON offset in auto constant $a(FP) + C_AACON2 // 24-bit offset in auto constant $a(FP) + C_LACON // 32-bit offset in auto constant $a(FP) + C_AECON // ADDCON offset in extern constant $e(SB) // TODO(aram): only one branch class should be enough C_SBRA // for TYPE_BRANCH diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index e1703fc4ab..96c9f788d9 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -36,6 +36,7 @@ var cnames7 = []string{ "FCON", "VCONADDR", "AACON", + "AACON2", "LACON", "AECON", "SBRA", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index df4bbbbd35..fc2033d689 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -391,7 +391,11 @@ var optab = []Optab{ {AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 16, 0, NOTUSETMP, 0}, {AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0}, - {AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON, C_NONE, C_NONE, C_RSP, 4, 4, REGFROM, 0, 0}, + {AMOVD, C_AACON2, C_NONE, C_NONE, C_RSP, 4, 8, REGFROM, 0, 0}, + + /* load long effective stack address (load int32 offset and add) */ + {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, // Move a large constant to a Vn. {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, @@ -594,9 +598,6 @@ var optab = []Optab{ {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, - /* load long effective stack address (load int32 offset and add) */ - {AMOVD, C_LACON, C_NONE, C_NONE, C_REG, 34, 8, REGSP, LFROM, 0}, - /* pre/post-indexed load (unscaled, signed 9-bit offset) */ {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, @@ -1361,6 +1362,10 @@ func isaddcon(v int64) bool { return v <= 0xFFF } +func isaddcon2(v int64) bool { + return 0 <= v && v <= 0xFFFFFF +} + // isbitcon reports whether a constant can be encoded into a logical instruction. // bitcon has a binary form of repetition of a bit sequence of length 2, 4, 8, 16, 32, or 64, // which itself is a rotate (w.r.t. the length of the unit) of a sequence of ones. @@ -1889,10 +1894,14 @@ func (c *ctxt7) aclass(a *obj.Addr) int { default: return C_GOK } - - if isaddcon(c.instoffset) { + cf := c.instoffset + if isaddcon(cf) || isaddcon(-cf) { return C_AACON } + if isaddcon2(cf) { + return C_AACON2 + } + return C_LACON case obj.TYPE_BRANCH: @@ -2046,7 +2055,7 @@ func cmp(a int, b int) bool { return cmp(C_LCON, b) case C_LACON: - if b == C_AACON { + if b == C_AACON || b == C_AACON2 { return true } @@ -3062,11 +3071,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(r&31) << 5) | uint32(rt&31) - case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R */ - o1 = c.opirr(p, p.As) - + case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R; mov $addcon2, R */ rt := int(p.To.Reg) r := int(o.param) + if r == 0 { r = REGZERO } else if r == REGFROM { @@ -3075,13 +3083,23 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if r == 0 { r = REGSP } + v := int32(c.regoff(&p.From)) - if (v & 0xFFF000) != 0 { - v >>= 12 - o1 |= 1 << 22 /* shift, by 12 */ + var op int32 + if v < 0 { + v = -v + op = int32(c.opirr(p, ASUB)) + } else { + op = int32(c.opirr(p, AADD)) + } + + if int(o.size) == 8 { + o1 = c.oaddi(p, op, v&0xfff000, r, REGTMP) + o2 = c.oaddi(p, op, v&0x000fff, REGTMP, rt) + break } - o1 |= ((uint32(v) & 0xFFF) << 10) | (uint32(r&31) << 5) | uint32(rt&31) + o1 = c.oaddi(p, op, v, r, rt) case 5: /* b s; bl s */ o1 = c.opbra(p, p.As) -- cgit v1.3 From 0dde60a5fefcb1447c97efa5c7bb4dbcf3575736 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Wed, 19 Aug 2020 03:07:26 +1000 Subject: cmd/internal/obj/riscv: clean up lowerJALR This cleans up the last of the direct obj.Prog rewriting, removing lowerJALR and replacing it with correct handling for AJALR during instruction encoding. Change-Id: Ieea125bde30d4c0edd2d9ed1e50160543aa8f330 Reviewed-on: https://go-review.googlesource.com/c/go/+/249077 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Trust: Joel Sing --- src/cmd/internal/obj/riscv/obj.go | 43 ++++++++------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 77d383b290..841b30d85c 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -58,30 +58,14 @@ func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *ob p.As = AJALR p.From.Type = obj.TYPE_REG p.From.Reg = lr - p.From.Sym = to.Sym p.Reg = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_TMP - lowerJALR(p) + p.To.Sym = to.Sym return p } -// lowerJALR normalizes a JALR instruction. -func lowerJALR(p *obj.Prog) { - if p.As != AJALR { - panic("lowerJALR: not a JALR") - } - - // JALR gets parsed like JAL - the linkage pointer goes in From, - // and the target is in To. However, we need to assemble it as an - // I-type instruction, so place the linkage pointer in To, the - // target register in Reg, and the offset in From. - p.Reg = p.To.Reg - p.From, p.To = p.To, p.From - p.From.Type, p.From.Reg = obj.TYPE_CONST, obj.REG_NONE -} - // progedit is called individually for each *obj.Prog. It normalizes instruction // formats and eliminates as many pseudo-instructions as possible. func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { @@ -125,7 +109,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { switch p.As { case obj.AJMP: // Turn JMP into JAL ZERO or JALR ZERO. - // p.From is actually an _output_ for this instruction. p.From.Type = obj.TYPE_REG p.From.Reg = REG_ZERO @@ -136,7 +119,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { switch p.To.Name { case obj.NAME_NONE: p.As = AJALR - lowerJALR(p) case obj.NAME_EXTERN: // Handled in preprocess. default: @@ -154,14 +136,10 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p.As = AJALR p.From.Type = obj.TYPE_REG p.From.Reg = REG_LR - lowerJALR(p) default: ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p) } - case AJALR: - lowerJALR(p) - case obj.AUNDEF: p.As = AEBREAK @@ -454,7 +432,7 @@ func containsCall(sym *obj.LSym) bool { case obj.ACALL: return true case AJAL, AJALR: - if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_LR { + if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR { return true } } @@ -731,11 +709,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p = jalrToSym(ctxt, p, newprog, REG_ZERO) } else { p.As = AJALR - p.From.Type = obj.TYPE_CONST - p.From.Offset = 0 - p.Reg = REG_LR - p.To.Type = obj.TYPE_REG - p.To.Reg = REG_ZERO + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} } // "Add back" the stack removed in the previous instruction. @@ -917,9 +893,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // it is reserved by SSA. jmp := obj.Appendp(p, newprog) jmp.As = AJALR - jmp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} - jmp.To = p.From - jmp.Reg = REG_TMP + jmp.From = p.From + jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} // p.From is not generally valid, however will be // fixed up in the next loop. @@ -1801,8 +1776,8 @@ func instructionsForProg(p *obj.Prog) []*instruction { inss := []*instruction{ins} switch ins.as { - case AJAL: - ins.rd, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE + case AJAL, AJALR: + ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE ins.imm = p.To.Offset case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: -- cgit v1.3 From 967465da2975fe4322080703ce5a77ea90752829 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 31 Aug 2020 09:43:40 -0400 Subject: cmd/compile: use combined shifts to improve array addressing on ppc64x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 + src/cmd/compile/internal/ppc64/ssa.go | 36 ++ src/cmd/compile/internal/ssa/gen/PPC64.rules | 63 ++- src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 5 + src/cmd/compile/internal/ssa/opGen.go | 45 ++ src/cmd/compile/internal/ssa/rewrite.go | 38 ++ src/cmd/compile/internal/ssa/rewritePPC64.go | 787 +++++++++++++++++++++++++++ src/cmd/internal/obj/ppc64/a.out.go | 4 + src/cmd/internal/obj/ppc64/anames.go | 4 + src/cmd/internal/obj/ppc64/asm9.go | 74 ++- test/codegen/shift.go | 55 ++ 11 files changed, 1087 insertions(+), 28 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 10a05ec402..e26f6f8933 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -284,6 +284,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLDICLCC $0, R4, $15, R6 // 788603c1 RLDICR $0, R4, $15, R6 // 788603c4 RLDICRCC $0, R4, $15, R6 // 788603c5 + RLDIC $0, R4, $15, R6 // 788603c8 + RLDICCC $0, R4, $15, R6 // 788603c9 + CLRLSLWI $16, R5, $8, R4 // 54a4861e + CLRLSLDI $2, R4, $24, R3 // 78831588 BEQ 0(PC) // 41820000 BGE 0(PC) // 40800000 diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index f8d9ac2379..4a83a0bdd7 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -565,6 +565,42 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p = s.Prog(obj.ANOP) gc.Patch(pbover, p) + case ssa.OpPPC64CLRLSLWI: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + // clrlslwi ra,rs,sh,mb will become rlwinm ra,rs,sh,mb-sh,31-n as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + case ssa.OpPPC64CLRLSLDI: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + // clrlsldi ra,rs,sh,mb will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + + // Mask has been set as sh + case ssa.OpPPC64RLDICL: + r := v.Reg() + r1 := v.Args[0].Reg() + shifts := v.AuxInt + p := s.Prog(v.Op.Asm()) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index e5fb1e98c2..774d5096de 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -79,6 +79,23 @@ (Abs ...) => (FABS ...) (FMA ...) => (FMADD ...) +// Lowering extension +// Note: we always extend to 64 bits even though some ops don't need that many result bits. +(SignExt8to(16|32|64) ...) => (MOVBreg ...) +(SignExt16to(32|64) ...) => (MOVHreg ...) +(SignExt32to64 ...) => (MOVWreg ...) + +(ZeroExt8to(16|32|64) ...) => (MOVBZreg ...) +(ZeroExt16to(32|64) ...) => (MOVHZreg ...) +(ZeroExt32to64 ...) => (MOVWZreg ...) + +(Trunc(16|32|64)to8 x) && isSigned(t) => (MOVBreg x) +(Trunc(16|32|64)to8 x) => (MOVBZreg x) +(Trunc(32|64)to16 x) && isSigned(t) => (MOVHreg x) +(Trunc(32|64)to16 x) => (MOVHZreg x) +(Trunc64to32 x) && isSigned(t) => (MOVWreg x) +(Trunc64to32 x) => (MOVWZreg x) + // Lowering constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) (Const(32|64)F ...) => (FMOV(S|D)const ...) @@ -780,6 +797,21 @@ (MOVWreg y:(MOVWZreg x)) => (MOVWreg x) (MOVWZreg y:(MOVWreg x)) => (MOVWZreg x) +// Truncate then logical then truncate: omit first, lesser or equal truncate +(MOVWZreg ((OR|XOR|AND) x (MOVWZreg y))) => (MOVWZreg ((OR|XOR|AND) x y)) +(MOVHZreg ((OR|XOR|AND) x (MOVWZreg y))) => (MOVHZreg ((OR|XOR|AND) x y)) +(MOVHZreg ((OR|XOR|AND) x (MOVHZreg y))) => (MOVHZreg ((OR|XOR|AND) x y)) +(MOVBZreg ((OR|XOR|AND) x (MOVWZreg y))) => (MOVBZreg ((OR|XOR|AND) x y)) +(MOVBZreg ((OR|XOR|AND) x (MOVHZreg y))) => (MOVBZreg ((OR|XOR|AND) x y)) +(MOVBZreg ((OR|XOR|AND) x (MOVBZreg y))) => (MOVBZreg ((OR|XOR|AND) x y)) + +(MOV(B|H|W)Zreg z:(ANDconst [c] (MOVBZload ptr x))) => z +(MOVBZreg z:(AND y (MOVBZload ptr x))) => z +(MOV(H|W)Zreg z:(ANDconst [c] (MOVHZload ptr x))) => z +(MOVHZreg z:(AND y (MOVHZload ptr x))) => z +(MOVWZreg z:(ANDconst [c] (MOVWZload ptr x))) => z +(MOVWZreg z:(AND y (MOVWZload ptr x))) => z + // Arithmetic constant ops (ADD x (MOVDconst [c])) && is32Bit(c) => (ADDconst [c] x) @@ -949,23 +981,6 @@ (AtomicAnd8 ...) => (LoweredAtomicAnd8 ...) (AtomicOr8 ...) => (LoweredAtomicOr8 ...) -// Lowering extension -// Note: we always extend to 64 bits even though some ops don't need that many result bits. -(SignExt8to(16|32|64) ...) => (MOVBreg ...) -(SignExt16to(32|64) ...) => (MOVHreg ...) -(SignExt32to64 ...) => (MOVWreg ...) - -(ZeroExt8to(16|32|64) ...) => (MOVBZreg ...) -(ZeroExt16to(32|64) ...) => (MOVHZreg ...) -(ZeroExt32to64 ...) => (MOVWZreg ...) - -(Trunc(16|32|64)to8 x) && isSigned(t) => (MOVBreg x) -(Trunc(16|32|64)to8 x) => (MOVBZreg x) -(Trunc(32|64)to16 x) && isSigned(t) => (MOVHreg x) -(Trunc(32|64)to16 x) => (MOVHZreg x) -(Trunc64to32 x) && isSigned(t) => (MOVWreg x) -(Trunc64to32 x) => (MOVWZreg x) - (Slicemask x) => (SRADconst (NEG x) [63]) // Note that MOV??reg returns a 64-bit int, x is not necessarily that wide @@ -996,6 +1011,20 @@ (MOVWreg (MOVDconst [c])) => (MOVDconst [int64(int32(c))]) (MOVWZreg (MOVDconst [c])) => (MOVDconst [int64(uint32(c))]) +// Implement clrsldi and clrslwi extended mnemonics as described in +// ISA 3.0 section C.8. AuxInt field contains values needed for +// the instructions, packed together since there is only one available. +(SLDconst [c] z:(MOVBZreg x)) && c < 8 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) +(SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) +(SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) + +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) +(SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) +(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 37706b2dd9..ed99c40cd2 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -206,6 +206,11 @@ func init() { {name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64 {name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32 + // The following are ops to implement the extended mnemonics for shifts as described in section C.8 of the ISA. + // The constant shift values are packed into the aux int32. + {name: "RLDICL", argLength: 1, reg: gp11, asm: "RLDICL", aux: "Int32"}, // arg0 extract bits identified by shift params" + {name: "CLRLSLWI", argLength: 1, reg: gp11, asm: "CLRLSLWI", aux: "Int32"}, // + {name: "CLRLSLDI", argLength: 1, reg: gp11, asm: "CLRLSLDI", aux: "Int32"}, // {name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d95943231a..f00dc3f7f5 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1853,6 +1853,9 @@ const ( OpPPC64SLW OpPPC64ROTL OpPPC64ROTLW + OpPPC64RLDICL + OpPPC64CLRLSLWI + OpPPC64CLRLSLDI OpPPC64LoweredAdd64Carry OpPPC64SRADconst OpPPC64SRAWconst @@ -24672,6 +24675,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "RLDICL", + auxType: auxInt32, + argLen: 1, + asm: ppc64.ARLDICL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "CLRLSLWI", + auxType: auxInt32, + argLen: 1, + asm: ppc64.ACLRLSLWI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "CLRLSLDI", + auxType: auxInt32, + argLen: 1, + asm: ppc64.ACLRLSLDI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "LoweredAdd64Carry", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 2ab310ad85..d9c3e455a0 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1325,6 +1325,44 @@ func hasSmallRotate(c *Config) bool { } } +func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { + if sh < 0 || sh >= sz { + panic("PPC64 shift arg sh out of range") + } + if mb < 0 || mb >= sz { + panic("PPC64 shift arg mb out of range") + } + if me < 0 || me >= sz { + panic("PPC64 shift arg me out of range") + } + return int32(sh<<16 | mb<<8 | me) +} + +func GetPPC64Shiftsh(auxint int64) int64 { + return int64(int8(auxint >> 16)) +} + +func GetPPC64Shiftmb(auxint int64) int64 { + return int64(int8(auxint >> 8)) +} + +func GetPPC64Shiftme(auxint int64) int64 { + return int64(int8(auxint)) +} + +// Catch the simple ones first +// TODO: Later catch more cases +func isPPC64ValidShiftMask(v int64) bool { + if ((v + 1) & v) == 0 { + return true + } + return false +} + +func getPPC64ShiftMaskLength(v int64) int64 { + return int64(bits.Len64(uint64(v))) +} + // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. func armBFAuxInt(lsb, width int64) arm64BitField { if lsb < 0 || lsb > 63 { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 152cdfdf4d..12b08824b5 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -586,8 +586,12 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ROTLW(v) case OpPPC64SLD: return rewriteValuePPC64_OpPPC64SLD(v) + case OpPPC64SLDconst: + return rewriteValuePPC64_OpPPC64SLDconst(v) case OpPPC64SLW: return rewriteValuePPC64_OpPPC64SLW(v) + case OpPPC64SLWconst: + return rewriteValuePPC64_OpPPC64SLWconst(v) case OpPPC64SRAD: return rewriteValuePPC64_OpPPC64SRAD(v) case OpPPC64SRAW: @@ -6565,6 +6569,255 @@ func rewriteValuePPC64_OpPPC64MOVBZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBZreg (OR x (MOVWZreg y))) + // result: (MOVBZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR x (MOVWZreg y))) + // result: (MOVBZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND x (MOVWZreg y))) + // result: (MOVBZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (OR x (MOVHZreg y))) + // result: (MOVBZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR x (MOVHZreg y))) + // result: (MOVBZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND x (MOVHZreg y))) + // result: (MOVBZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (OR x (MOVBZreg y))) + // result: (MOVBZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR x (MOVBZreg y))) + // result: (MOVBZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND x (MOVBZreg y))) + // result: (MOVBZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVBZreg z:(AND y (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVBZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVBZreg x:(MOVBZload _ _)) // result: x for { @@ -8507,6 +8760,197 @@ func rewriteValuePPC64_OpPPC64MOVHZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHZreg (OR x (MOVWZreg y))) + // result: (MOVHZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (XOR x (MOVWZreg y))) + // result: (MOVHZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (AND x (MOVWZreg y))) + // result: (MOVHZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (OR x (MOVHZreg y))) + // result: (MOVHZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (XOR x (MOVHZreg y))) + // result: (MOVHZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (AND x (MOVHZreg y))) + // result: (MOVHZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVHZreg z:(ANDconst [c] (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVHZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVHZreg z:(AND y (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVHZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVHZreg x:(MOVBZload _ _)) // result: x for { @@ -9657,6 +10101,139 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVWZreg (OR x (MOVWZreg y))) + // result: (MOVWZreg (OR x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg (XOR x (MOVWZreg y))) + // result: (MOVWZreg (XOR x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg (AND x (MOVWZreg y))) + // result: (MOVWZreg (AND x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(ANDconst [c] (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVHZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(ANDconst [c] (MOVWZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVWZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(AND y (MOVWZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVWZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVWZreg x:(MOVBZload _ _)) // result: x for { @@ -12197,6 +12774,111 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLDconst [c] z:(MOVBZreg x)) + // cond: c < 8 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVBZreg { + break + } + x := z.Args[0] + if !(c < 8 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 56, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(MOVHZreg x)) + // cond: c < 16 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVHZreg { + break + } + x := z.Args[0] + if !(c < 16 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 48, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(MOVWZreg x)) + // cond: c < 32 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWZreg { + break + } + x := z.Args[0] + if !(c < 32 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(ANDconst [d] x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + d := auxIntToInt64(z.AuxInt) + x := z.Args[0] + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_0.Op != OpPPC64MOVDconst { + continue + } + d := auxIntToInt64(z_0.AuxInt) + x := z_1 + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + continue + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12215,6 +12897,111 @@ func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLWconst [c] z:(MOVBZreg x)) + // cond: z.Uses == 1 && c < 8 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVBZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 8) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 24, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(MOVHZreg x)) + // cond: z.Uses == 1 && c < 16 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVHZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 16) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 16, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(MOVWZreg x)) + // cond: z.Uses == 1 && c < 24 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 24) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(ANDconst [d] x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + d := auxIntToInt64(z.AuxInt) + x := z.Args[0] + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_0.Op != OpPPC64MOVDconst { + continue + } + d := auxIntToInt64(z_0.AuxInt) + x := z_1 + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + continue + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index 8b32692778..f438803fb5 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -575,6 +575,7 @@ const ( ARLWMICC ARLWNM ARLWNMCC + ACLRLSLWI ASLW ASLWCC ASRW @@ -716,6 +717,9 @@ const ( ARLDCLCC ARLDICL ARLDICLCC + ARLDIC + ARLDICCC + ACLRLSLDI AROTL AROTLW ASLBIA diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index 287011877c..accd87fe00 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -180,6 +180,7 @@ var Anames = []string{ "RLWMICC", "RLWNM", "RLWNMCC", + "CLRLSLWI", "SLW", "SLWCC", "SRW", @@ -312,6 +313,9 @@ var Anames = []string{ "RLDCLCC", "RLDICL", "RLDICLCC", + "RLDIC", + "RLDICCC", + "CLRLSLDI", "ROTL", "ROTLW", "SLBIA", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 98b453de6c..60dda72507 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1904,6 +1904,7 @@ func buildop(ctxt *obj.Link) { opset(ARLWMICC, r0) opset(ARLWNM, r0) opset(ARLWNMCC, r0) + opset(ACLRLSLWI, r0) case ARLDMI: opset(ARLDMICC, r0) @@ -1922,6 +1923,9 @@ func buildop(ctxt *obj.Link) { opset(ARLDICLCC, r0) opset(ARLDICR, r0) opset(ARLDICRCC, r0) + opset(ARLDIC, r0) + opset(ARLDICCC, r0) + opset(ACLRLSLDI, r0) case AFMOVD: opset(AFMOVDCC, r0) @@ -2734,13 +2738,31 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case ARLDICR, ARLDICRCC: me := int(d) sh := c.regoff(&p.From) + if me < 0 || me > 63 || sh > 63 { + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) - case ARLDICL, ARLDICLCC: + case ARLDICL, ARLDICLCC, ARLDIC, ARLDICCC: mb := int(d) sh := c.regoff(&p.From) + if mb < 0 || mb > 63 || sh > 63 { + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) + case ACLRLSLDI: + // This is an extended mnemonic defined in the ISA section C.8.1 + // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // It maps onto RLDIC so is directly generated here based on the operands from + // the clrlsldi. + b := int(d) + n := c.regoff(&p.From) + if n > int32(b) || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + } + o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) + default: c.ctxt.Diag("unexpected op in rldc case\n%v", p) a = 0 @@ -3354,18 +3376,43 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case 62: /* rlwmi $sh,s,$mask,a */ v := c.regoff(&p.From) - - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + switch p.As { + case ACLRLSLWI: + b := c.regoff(p.GetFrom3()) + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // It maps onto rlwinm which is directly generated here. + if v < 0 || v > 32 || b > 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + } + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } case 63: /* rlwmi b,s,$mask,a */ - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + v := c.regoff(&p.From) + switch p.As { + case ACLRLSLWI: + b := c.regoff(p.GetFrom3()) + if v > b || b > 32 { + // Message will match operands from the ISA even though in the + // code it uses 'v' + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + } + // This is an extended mnemonic described in the ISA C.8.2 + // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // It generates the rlwinm directly here. + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + default: + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 + } case 64: /* mtfsf fr[, $m] {,fpcsr} */ var v int32 @@ -4277,6 +4324,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ARLDICRCC: return OPVCC(30, 0, 0, 1) | 2<<1 // rldicr. + case ARLDIC: + return OPVCC(30, 0, 0, 0) | 4<<1 // rldic + case ARLDICCC: + return OPVCC(30, 0, 0, 1) | 4<<1 // rldic. + case ASYSCALL: return OPVCC(17, 1, 0, 0) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 5e50ea6bff..32214851b5 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -150,6 +150,61 @@ func lshGuarded64(v int64, s uint) int64 { panic("shift too large") } +func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) { + + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f := tab[byte(v)^b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)&b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)|b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)&h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)^h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)|h] + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + f += tab[v&0xff] + // ppc64le:-".*AND",".*CLRLSLWI" + // ppc64:-".*AND",".*CLRLSLWI" + f += 2*uint32(uint16(d)) + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + g := 2*uint64(uint32(d)) + return f, g +} + +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { + + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f := (v8 &0xF) << 2 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f += byte(v16)<<3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + g := (v16 & 0xFF) << 3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h := (v32 & 0xFFFFF) << 2 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h += uint32(v64)<<4 + // ppc64le:-"AND","CLRLSLDI" + // ppc64:-"AND","CLRLSLDI" + i := (v64 & 0xFFFFFFFF) << 5 + return f, g, h, i +} + func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { // ppc64le:-".*MOVW" -- cgit v1.3 From 982ac06f3df4ea08ae0506e6f9fb53eb27ddb140 Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Thu, 17 Sep 2020 15:35:31 -0400 Subject: cmd/compile,cmd/asm: dump sym ABI versions for -S=2 When -S=2 is in effect for the compiler/assembler, include symbol ABI values for defined symbols and relocations. This is intended to help make it easier to distinguish between a symbol and its ABI wrapper. Change-Id: Ifbf71372392075f15363b40e882b2132406b7d6d Reviewed-on: https://go-review.googlesource.com/c/go/+/255718 Trust: Than McIntosh Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: David Chase --- src/cmd/internal/obj/objfile.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index 7bc4f4992e..aede5fe71c 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -663,7 +663,11 @@ func (ctxt *Link) writeSymDebug(s *LSym) { } func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { - fmt.Fprintf(ctxt.Bso, "%s ", name) + ver := "" + if ctxt.Debugasm > 1 { + ver = fmt.Sprintf("<%d>", s.ABI()) + } + fmt.Fprintf(ctxt.Bso, "%s%s ", name, ver) if s.Type != 0 { fmt.Fprintf(ctxt.Bso, "%v ", s.Type) } @@ -726,15 +730,19 @@ func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { sort.Sort(relocByOff(s.R)) // generate stable output for _, r := range s.R { name := "" + ver := "" if r.Sym != nil { name = r.Sym.Name + if ctxt.Debugasm > 1 { + ver = fmt.Sprintf("<%d>", s.ABI()) + } } else if r.Type == objabi.R_TLS_LE { name = "TLS" } if ctxt.Arch.InFamily(sys.ARM, sys.PPC64) { - fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s+%x\n", int(r.Off), r.Siz, r.Type, name, uint64(r.Add)) + fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s%s+%x\n", int(r.Off), r.Siz, r.Type, name, ver, uint64(r.Add)) } else { - fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s+%d\n", int(r.Off), r.Siz, r.Type, name, r.Add) + fmt.Fprintf(ctxt.Bso, "\trel %d+%d t=%d %s%s+%d\n", int(r.Off), r.Siz, r.Type, name, ver, r.Add) } } } -- cgit v1.3 From f765dcbd5c8205a0d222257b4514b1194cad26f8 Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Thu, 24 Sep 2020 16:11:43 -0400 Subject: cmd/compile,cmd/asm: fix buglet in -S=2 output In CL 255718 the -S=2 assembly output was enhanced to dump symbol ABIs. This patch fixes a bug in that CL: when dumping the relocations on a symbol, we were dumping the symbol's ABI as opposed to the relocation target symbol's ABI. Change-Id: I134128687757f549fa37b998cff1290765889140 Reviewed-on: https://go-review.googlesource.com/c/go/+/257202 Trust: Than McIntosh Run-TryBot: Than McIntosh Reviewed-by: David Chase Reviewed-by: Cherry Zhang TryBot-Result: Go Bot --- src/cmd/internal/obj/objfile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index aede5fe71c..e4b9620568 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -734,7 +734,7 @@ func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { if r.Sym != nil { name = r.Sym.Name if ctxt.Debugasm > 1 { - ver = fmt.Sprintf("<%d>", s.ABI()) + ver = fmt.Sprintf("<%d>", r.Sym.ABI()) } } else if r.Type == objabi.R_TLS_LE { name = "TLS" -- cgit v1.3 From fa04d488bd54b8fdd78cc9bcc6d90de4bf5f8efb Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Wed, 16 Sep 2020 14:05:18 +0800 Subject: cmd/asm: fix the issue of moving 128-bit integers to vector registers on arm64 The CL 249758 added `FMOVQ $vcon, Vd` instruction and assembler used 128-bit simd literal-loading to load `$vcon` from pool into 128-bit vector register `Vd`. Because Go does not have 128-bit integers for now, the assembler will report an error of `immediate out of range` when assembleing `FMOVQ $0x123456789abcdef0123456789abcdef, V0` instruction. This patch lets 128-bit integers take two 64-bit operands, for the high and low parts separately and adds `VMOVQ $hi, $lo, Vd` instruction to move `$hi<<64+$lo' into 128-bit register `Vd`. In addition, this patch renames `FMOVQ/FMOVD/FMOVS` ops to 'VMOVQ/VMOVD/VMOVS' and uses them to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively Update the go doc. Fixes #40725 Change-Id: Ia3c83bb6463f104d2bee960905053a97299e0a3a Reviewed-on: https://go-review.googlesource.com/c/go/+/255900 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/arch/arm64.go | 18 +++++++++------ src/cmd/asm/internal/asm/asm.go | 21 ++++++++--------- src/cmd/asm/internal/asm/testdata/arm64.s | 6 +++-- src/cmd/internal/obj/arm64/a.out.go | 4 +++- src/cmd/internal/obj/arm64/anames.go | 4 +++- src/cmd/internal/obj/arm64/asm7.go | 38 ++++++++++++++++--------------- src/cmd/internal/obj/arm64/doc.go | 10 ++++++++ 7 files changed, 60 insertions(+), 41 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index 3817fcd5c2..e643889aef 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -82,6 +82,17 @@ func IsARM64STLXR(op obj.As) bool { return false } +// IsARM64TBL reports whether the op (as defined by an arm64.A* +// constant) is one of the TBL-like instructions and one of its +// inputs does not fit into prog.Reg, so require special handling. +func IsARM64TBL(op obj.As) bool { + switch op { + case arm64.AVTBL, arm64.AVMOVQ: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. @@ -125,13 +136,6 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) { return 0, false } -// IsARM64TBL reports whether the op (as defined by an arm64.A* -// constant) is one of the table lookup instructions that require special -// handling. -func IsARM64TBL(op obj.As) bool { - return op == arm64.AVTBL -} - // ARM64RegisterExtension parses an ARM64 register with extension or arrangement. func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error { Rnum := (reg & 31) + int16(num<<5) diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 42e217dc23..7878d74549 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -622,8 +622,9 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.SetFrom3(a[1]) prog.To = a[2] case sys.ARM64: - // ARM64 instructions with one input and two outputs. - if arch.IsARM64STLXR(op) { + switch { + case arch.IsARM64STLXR(op): + // ARM64 instructions with one input and two outputs. prog.From = a[0] prog.To = a[1] if a[2].Type != obj.TYPE_REG { @@ -631,20 +632,16 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { return } prog.RegTo2 = a[2].Reg - break - } - if arch.IsARM64TBL(op) { + case arch.IsARM64TBL(op): + // one of its inputs does not fit into prog.Reg. prog.From = a[0] - if a[1].Type != obj.TYPE_REGLIST { - p.errorf("%s: expected list; found %s", op, obj.Dconv(prog, &a[1])) - } prog.SetFrom3(a[1]) prog.To = a[2] - break + default: + prog.From = a[0] + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.To = a[2] } - prog.From = a[0] - prog.Reg = p.getRegister(prog, op, &a[1]) - prog.To = a[2] case sys.I386: prog.From = a[0] prog.SetFrom3(a[1]) diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index acfb16b096..e277c04b7c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -218,8 +218,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD $(28.0), F4 // 0490671e // move a large constant to a Vd. - FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20 - FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29 + VMOVS $0x80402010, V11 // VMOVS $2151686160, V11 + VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20 + VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10 + VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20 FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc FMOVS (R2)(R6<<2), F4 // 447866bc diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index b3c9e9a18e..1ca41c15ba 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -875,7 +875,9 @@ const ( AFLDPS AFMOVD AFMOVS - AFMOVQ + AVMOVQ + AVMOVD + AVMOVS AFMULD AFMULS AFNEGD diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 48c066abfd..900cdba817 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -381,7 +381,9 @@ var Anames = []string{ "FLDPS", "FMOVD", "FMOVS", - "FMOVQ", + "VMOVQ", + "VMOVD", + "VMOVS", "FMULD", "FMULS", "FNEGD", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index fc2033d689..ee4a33eef4 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -260,8 +260,9 @@ func MOVCONST(d int64, s int, rt int) uint32 { const ( // Optab.flag LFROM = 1 << 0 // p.From uses constant pool - LTO = 1 << 1 // p.To uses constant pool - NOTUSETMP = 1 << 2 // p expands to multiple instructions, but does NOT use REGTMP + LFROM3 = 1 << 1 // p.From3 uses constant pool + LTO = 1 << 2 // p.To uses constant pool + NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP ) var optab = []Optab{ @@ -397,10 +398,10 @@ var optab = []Optab{ /* load long effective stack address (load int32 offset and add) */ {AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0}, - // Move a large constant to a Vn. - {AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, - {AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + // Move a large constant to a vector register. + {AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0}, + {AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, + {AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0}, /* jump operations */ {AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0}, @@ -950,13 +951,14 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("zero-width instruction\n%v", p) } } - switch o.flag & (LFROM | LTO) { - case LFROM: + if o.flag&LFROM != 0 { c.addpool(p, &p.From) - - case LTO: + } + if o.flag&LFROM3 != 0 { + c.addpool(p, p.GetFrom3()) + } + if o.flag<O != 0 { c.addpool(p, &p.To) - break } if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */ @@ -1174,8 +1176,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { sz := 4 if a.Type == obj.TYPE_CONST { - if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) { - // out of range -0x80000000 ~ 0xffffffff, must store 64-bit + if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD { + // out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit. t.As = ADWORD sz = 8 } // else store 32-bit @@ -2675,7 +2677,7 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AFMOVQ: + case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -5142,7 +5144,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = q<<30 | 0xe<<24 | len<<13 o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31) - case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool. + case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool. o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg)) case 102: /* vushll, vushll2, vuxtl, vuxtl2 */ @@ -6672,15 +6674,15 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 { } else { fp, w := 0, 0 switch as { - case AFMOVS: + case AFMOVS, AVMOVS: fp = 1 w = 0 /* 32-bit SIMD/FP */ - case AFMOVD: + case AFMOVD, AVMOVD: fp = 1 w = 1 /* 64-bit SIMD/FP */ - case AFMOVQ: + case AVMOVQ: fp = 1 w = 2 /* 128-bit SIMD/FP */ diff --git a/src/cmd/internal/obj/arm64/doc.go b/src/cmd/internal/obj/arm64/doc.go index 7515217544..efd4577f56 100644 --- a/src/cmd/internal/obj/arm64/doc.go +++ b/src/cmd/internal/obj/arm64/doc.go @@ -86,6 +86,16 @@ In the following example, PCALIGN at the entry of the function Add will align it MOVD $1, R1 RET +7. Move large constants to vector registers. + +Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively. +And for a 128-bit interger, it take two 64-bit operands, for the high and low parts separately. + + Examples: + VMOVS $0x11223344, V0 + VMOVD $0x1122334455667788, V1 + VMOVQ $0x1122334455667788, $8877665544332211, V2 // V2=0x11223344556677888877665544332211 + Special Cases. (1) umov is written as VMOV. -- cgit v1.3 From a424f6e45e29960c933a7ccc1cd8fc9bb2766f15 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Wed, 23 Sep 2020 11:06:39 -0400 Subject: cmd/asm,cmd/compile,cmd/internal/obj/ppc64: add extswsli support on power9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the extswsli instruction which combines extsw followed by a shift. New benchmark demonstrates the improvement: name old time/op new time/op delta ExtShift 1.34µs ± 0% 1.30µs ± 0% -3.15% (p=0.057 n=4+3) Change-Id: I21b410676fdf15d20e0cbbaa75d7c6dcd3bbb7b0 Reviewed-on: https://go-review.googlesource.com/c/go/+/257017 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 1 + src/cmd/compile/internal/gc/bench_test.go | 12 ++++ src/cmd/compile/internal/ppc64/ssa.go | 2 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 + src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 1 + src/cmd/compile/internal/ssa/opGen.go | 15 ++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 36 ++++++++++ src/cmd/internal/obj/ppc64/a.out.go | 2 + src/cmd/internal/obj/ppc64/anames.go | 2 + src/cmd/internal/obj/ppc64/asm9.go | 104 +++++++++++++++++---------- test/codegen/shift.go | 7 +- 11 files changed, 142 insertions(+), 42 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index e26f6f8933..88a7609ba8 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -266,6 +266,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 SRDCC R3, R4 // 7c841c37 ROTLW $16, R3, R4 // 5464803e ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index 09aaf428c3..a2887f2f7b 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -20,6 +20,18 @@ func BenchmarkLoadAdd(b *testing.B) { } } +// Added for ppc64 extswsli on power9 +func BenchmarkExtShift(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int64 + for i := range x { + s ^= int64(x[i]+32) * 8 + } + globl = s + } +} + func BenchmarkModify(b *testing.B) { a := make([]int64, 1024) v := globl diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 4a83a0bdd7..a5fbdaffba 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 774d5096de..de30d003e6 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1025,6 +1025,8 @@ (SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) (SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) (SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +// special case for power9 +(SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) // Lose widening ops fed to stores (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) => (MOVBstore [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index ed99c40cd2..28317928a8 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -223,6 +223,7 @@ func init() { {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"}, // arg0 rotate left by auxInt bits {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits + {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1fc0f7ea79..1fe00c7026 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1865,6 +1865,7 @@ const ( OpPPC64SLWconst OpPPC64ROTLconst OpPPC64ROTLWconst + OpPPC64EXTSWSLconst OpPPC64CNTLZD OpPPC64CNTLZW OpPPC64CNTTZD @@ -24849,6 +24850,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "EXTSWSLconst", + auxType: auxInt64, + argLen: 1, + asm: ppc64.AEXTSWSLI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "CNTLZD", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 12b08824b5..29ec3992f2 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12877,6 +12877,24 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } break } + // match: (SLDconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { @@ -13000,6 +13018,24 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } break } + // match: (SLWconst [c] z:(MOVWreg x)) + // cond: c < 32 && objabi.GOPPC64 >= 9 + // result: (EXTSWSLconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWreg { + break + } + x := z.Args[0] + if !(c < 32 && objabi.GOPPC64 >= 9) { + break + } + v.reset(OpPPC64EXTSWSLconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index f438803fb5..4c97302f83 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -733,6 +733,8 @@ const ( ASRAD ASRADCC ASRDCC + AEXTSWSLI + AEXTSWSLICC ASTDCCC ATD diff --git a/src/cmd/internal/obj/ppc64/anames.go b/src/cmd/internal/obj/ppc64/anames.go index accd87fe00..fca4b3e355 100644 --- a/src/cmd/internal/obj/ppc64/anames.go +++ b/src/cmd/internal/obj/ppc64/anames.go @@ -329,6 +329,8 @@ var Anames = []string{ "SRAD", "SRADCC", "SRDCC", + "EXTSWSLI", + "EXTSWSLICC", "STDCCC", "TD", "DWORD", diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 60dda72507..9f06bdf8b3 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -160,6 +160,8 @@ var optab = []Optab{ {ASLD, C_REG, C_REG, C_NONE, C_REG, 6, 4, 0}, {ASLD, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLD, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_NONE, C_NONE, C_REG, 25, 4, 0}, + {AEXTSWSLI, C_SCON, C_REG, C_NONE, C_REG, 25, 4, 0}, {ASLW, C_SCON, C_REG, C_NONE, C_REG, 57, 4, 0}, {ASLW, C_SCON, C_NONE, C_NONE, C_REG, 57, 4, 0}, {ASRAW, C_REG, C_NONE, C_NONE, C_REG, 6, 4, 0}, @@ -1877,6 +1879,9 @@ func buildop(ctxt *obj.Link) { case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRAWCC, r0) + case AEXTSWSLI: + opset(AEXTSWSLICC, r0) + case ASRAD: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */ opset(ASRADCC, r0) @@ -2189,49 +2194,54 @@ func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 { return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5 } +func AOP_EXTSWSLI(op uint32, a uint32, s uint32, sh uint32) uint32 { + return op | (a&31)<<21 | (s&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 +} + func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 } const ( /* each rhs is OPVCC(_, _, _, _) */ - OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 - OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 - OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 - OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 - OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 - OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 - OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 - OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 - OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 - OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 - OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 - OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 - OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 - OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 - OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 - OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 - OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 - OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 - OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 - OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 - OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 - OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 - OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 - OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 - OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 - OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 - OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 - OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 - OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 - OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 - OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 - OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 - OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 - OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 - OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 - OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 - OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 + OP_ADDI = 14<<26 | 0<<1 | 0<<10 | 0 + OP_ADDIS = 15<<26 | 0<<1 | 0<<10 | 0 + OP_ANDI = 28<<26 | 0<<1 | 0<<10 | 0 + OP_EXTSB = 31<<26 | 954<<1 | 0<<10 | 0 + OP_EXTSH = 31<<26 | 922<<1 | 0<<10 | 0 + OP_EXTSW = 31<<26 | 986<<1 | 0<<10 | 0 + OP_ISEL = 31<<26 | 15<<1 | 0<<10 | 0 + OP_MCRF = 19<<26 | 0<<1 | 0<<10 | 0 + OP_MCRFS = 63<<26 | 64<<1 | 0<<10 | 0 + OP_MCRXR = 31<<26 | 512<<1 | 0<<10 | 0 + OP_MFCR = 31<<26 | 19<<1 | 0<<10 | 0 + OP_MFFS = 63<<26 | 583<<1 | 0<<10 | 0 + OP_MFMSR = 31<<26 | 83<<1 | 0<<10 | 0 + OP_MFSPR = 31<<26 | 339<<1 | 0<<10 | 0 + OP_MFSR = 31<<26 | 595<<1 | 0<<10 | 0 + OP_MFSRIN = 31<<26 | 659<<1 | 0<<10 | 0 + OP_MTCRF = 31<<26 | 144<<1 | 0<<10 | 0 + OP_MTFSF = 63<<26 | 711<<1 | 0<<10 | 0 + OP_MTFSFI = 63<<26 | 134<<1 | 0<<10 | 0 + OP_MTMSR = 31<<26 | 146<<1 | 0<<10 | 0 + OP_MTMSRD = 31<<26 | 178<<1 | 0<<10 | 0 + OP_MTSPR = 31<<26 | 467<<1 | 0<<10 | 0 + OP_MTSR = 31<<26 | 210<<1 | 0<<10 | 0 + OP_MTSRIN = 31<<26 | 242<<1 | 0<<10 | 0 + OP_MULLW = 31<<26 | 235<<1 | 0<<10 | 0 + OP_MULLD = 31<<26 | 233<<1 | 0<<10 | 0 + OP_OR = 31<<26 | 444<<1 | 0<<10 | 0 + OP_ORI = 24<<26 | 0<<1 | 0<<10 | 0 + OP_ORIS = 25<<26 | 0<<1 | 0<<10 | 0 + OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0 + OP_RLWNM = 23<<26 | 0<<1 | 0<<10 | 0 + OP_SUBF = 31<<26 | 40<<1 | 0<<10 | 0 + OP_RLDIC = 30<<26 | 4<<1 | 0<<10 | 0 + OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0 + OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0 + OP_RLDCL = 30<<26 | 8<<1 | 0<<10 | 0 + OP_EXTSWSLI = 31<<26 | 445<<2 ) func oclass(a *obj.Addr) int { @@ -2965,14 +2975,21 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case AROTL: a = int(0) op = OP_RLDICL + case AEXTSWSLI: + a = int(v) default: c.ctxt.Diag("unexpected op in sldi case\n%v", p) a = 0 o1 = 0 } - o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) - if p.As == ASLDCC || p.As == ASRDCC { + if p.As == AEXTSWSLI || p.As == AEXTSWSLICC { + o1 = AOP_EXTSWSLI(OP_EXTSWSLI, uint32(r), uint32(p.To.Reg), uint32(v)) + + } else { + o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a)) + } + if p.As == ASLDCC || p.As == ASRDCC || p.As == AEXTSWSLICC { o1 |= 1 // Set the condition code bit } @@ -4350,6 +4367,11 @@ func (c *ctxt9) oprrr(a obj.As) uint32 { case ASRADCC: return OPVCC(31, 794, 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) + case ASRW: return OPVCC(31, 536, 0, 0) case ASRWCC: @@ -5013,6 +5035,10 @@ func (c *ctxt9) opirr(a obj.As) uint32 { return OPVCC(31, (413 << 1), 0, 0) case ASRADCC: return OPVCC(31, (413 << 1), 0, 1) + case AEXTSWSLI: + return OPVCC(31, 445, 0, 0) + case AEXTSWSLICC: + return OPVCC(31, 445, 0, 1) case ASTSW: return OPVCC(31, 725, 0, 0) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index 32214851b5..abc4b091c9 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -182,7 +182,7 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt return f, g } -func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) { +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) { // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -202,7 +202,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, u // ppc64le:-"AND","CLRLSLDI" // ppc64:-"AND","CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 - return f, g, h, i + // ppc64le/power9:-"SLD","EXTSWSLI" + // ppc64/power9:-"SLD","EXTSWSLI" + j := int64(x32+32)*8 + return f, g, h, i, j } func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { -- cgit v1.3 From af18bce87cc7ee1ffc68f91abefa241ab209539e Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sun, 20 Sep 2020 23:29:20 -0400 Subject: cmd/link: consider interface conversions only in reachable code The linker prunes methods that are not directly reachable if the receiver type is never converted to interface. Currently, this "never" is too strong: it is invalidated even if the interface conversion is in an unreachable function. This CL improves it by only considering interface conversions in reachable code. To do that, we introduce a marker relocation R_USEIFACE, which marks the target symbol as UsedInIface if the source symbol is reached. binary size before after cmd/compile 18897528 18887400 cmd/go 13607372 13470652 Change-Id: I66c6b69eeff9ae02d84d2e6f2bc7f1b29dd53910 Reviewed-on: https://go-review.googlesource.com/c/go/+/256797 Trust: Cherry Zhang Reviewed-by: Jeremy Faller Reviewed-by: Than McIntosh --- src/cmd/compile/internal/gc/pgen.go | 8 ++- src/cmd/compile/internal/gc/sinit.go | 2 +- src/cmd/compile/internal/gc/walk.go | 15 +++-- src/cmd/internal/obj/s390x/asmz.go | 6 +- src/cmd/internal/obj/wasm/wasmobj.go | 2 + src/cmd/internal/obj/x86/asm6.go | 7 ++- src/cmd/internal/objabi/reloctype.go | 5 ++ src/cmd/internal/objabi/reloctype_string.go | 66 +++++++++++++++++++++- src/cmd/link/internal/ld/data.go | 6 ++ src/cmd/link/internal/ld/deadcode.go | 14 +++++ .../internal/ld/testdata/deadcode/ifacemethod.go | 9 ++- src/cmd/link/internal/loader/loader.go | 1 + src/cmd/link/internal/wasm/asm.go | 3 + 13 files changed, 129 insertions(+), 15 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 74262595b0..52b1ed351d 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -231,6 +231,11 @@ func compile(fn *Node) { return } + // Set up the function's LSym early to avoid data races with the assemblers. + // Do this before walk, as walk needs the LSym to set attributes/relocations + // (e.g. in markTypeUsedInInterface). + fn.Func.initLSym(true) + walk(fn) if nerrors != 0 { return @@ -250,9 +255,6 @@ func compile(fn *Node) { return } - // Set up the function's LSym early to avoid data races with the assemblers. - fn.Func.initLSym(true) - // Make sure type syms are declared for all types that might // be types of stack objects. We need to do this here // because symbols must be allocated before the parallel diff --git a/src/cmd/compile/internal/gc/sinit.go b/src/cmd/compile/internal/gc/sinit.go index 71ed558461..af19a96bbc 100644 --- a/src/cmd/compile/internal/gc/sinit.go +++ b/src/cmd/compile/internal/gc/sinit.go @@ -278,7 +278,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool { return Isconst(val, CTNIL) } - markTypeUsedInInterface(val.Type) + markTypeUsedInInterface(val.Type, l.Sym.Linksym()) var itab *Node if l.Type.IsEmptyInterface() { diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index 933f16d9a0..d238cc2f45 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -805,8 +805,8 @@ opswitch: fromType := n.Left.Type toType := n.Type - if !fromType.IsInterface() { - markTypeUsedInInterface(fromType) + if !fromType.IsInterface() && !Curfn.Func.Nname.isBlank() { // skip unnamed functions (func _()) + markTypeUsedInInterface(fromType, Curfn.Func.lsym) } // typeword generates the type word of the interface value. @@ -1621,8 +1621,13 @@ opswitch: // markTypeUsedInInterface marks that type t is converted to an interface. // This information is used in the linker in dead method elimination. -func markTypeUsedInInterface(t *types.Type) { - typenamesym(t).Linksym().Set(obj.AttrUsedInIface, true) +func markTypeUsedInInterface(t *types.Type, from *obj.LSym) { + tsym := typenamesym(t).Linksym() + // Emit a marker relocation. The linker will know the type is converted + // to an interface if "from" is reachable. + r := obj.Addrel(from) + r.Sym = tsym + r.Type = objabi.R_USEIFACE } // rtconvfn returns the parameter and result types that will be used by a @@ -3687,6 +3692,8 @@ func usemethod(n *Node) { // Also need to check for reflect package itself (see Issue #38515). if s := res0.Type.Sym; s != nil && s.Name == "Method" && isReflectPkg(s.Pkg) { Curfn.Func.SetReflectMethod(true) + // The LSym is initialized at this point. We need to set the attribute on the LSym. + Curfn.Func.lsym.Set(obj.AttrReflectMethod, true) } } diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index 68f01f1c5d..cb3a2c3196 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -461,6 +461,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { buffer := make([]byte, 0) changed := true loop := 0 + nrelocs0 := len(c.cursym.R) for changed { if loop > 100 { c.ctxt.Diag("stuck in spanz loop") @@ -468,7 +469,10 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } changed = false buffer = buffer[:0] - c.cursym.R = make([]obj.Reloc, 0) + for i := range c.cursym.R[nrelocs0:] { + c.cursym.R[nrelocs0+i] = obj.Reloc{} + } + c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler for p := c.cursym.Func.Text; p != nil; p = p.Link { pc := int64(len(buffer)) if pc != p.Pc { diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index 70e8e51e65..a9e093a8ad 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -1007,6 +1007,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { panic("bad name for Call") } r := obj.Addrel(s) + r.Siz = 1 // actually variable sized r.Off = int32(w.Len()) r.Type = objabi.R_CALL if p.Mark&WasmImport != 0 { @@ -1033,6 +1034,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { case AI32Const, AI64Const: if p.From.Name == obj.NAME_EXTERN { r := obj.Addrel(s) + r.Siz = 1 // actually variable sized r.Off = int32(w.Len()) r.Type = objabi.R_ADDR r.Sym = p.From.Sym diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index fb99c620ad..4940c79eaa 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2100,14 +2100,15 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var c int32 errors := ctxt.Errors var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) + nrelocs0 := len(s.R) for { // This loop continues while there are reasons to re-assemble // whole block, like the presence of long forward jumps. reAssemble := false - for i := range s.R { - s.R[i] = obj.Reloc{} + for i := range s.R[nrelocs0:] { + s.R[nrelocs0+i] = obj.Reloc{} } - s.R = s.R[:0] + s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler s.P = s.P[:0] c = 0 var pPrev *obj.Prog diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go index f029a3c396..1e328d659f 100644 --- a/src/cmd/internal/objabi/reloctype.go +++ b/src/cmd/internal/objabi/reloctype.go @@ -89,6 +89,11 @@ const ( // should be linked into the final binary, even if there are no other // direct references. (This is used for types reachable by reflection.) R_USETYPE + // R_USEIFACE marks a type is converted to an interface in the function this + // relocation is applied to. The target is a type descriptor. + // This is a marker relocation (0-sized), for the linker's reachabililty + // analysis. + R_USEIFACE // R_METHODOFF resolves to a 32-bit offset from the beginning of the section // holding the data being relocated to the referenced symbol. // It is a variant of R_ADDROFF used when linking from the uncommonType of a diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go index 83dfe71e07..caf24eea58 100644 --- a/src/cmd/internal/objabi/reloctype_string.go +++ b/src/cmd/internal/objabi/reloctype_string.go @@ -4,9 +4,71 @@ package objabi import "strconv" -const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_WEAKADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CALLRISCVR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[R_ADDR-1] + _ = x[R_ADDRPOWER-2] + _ = x[R_ADDRARM64-3] + _ = x[R_ADDRMIPS-4] + _ = x[R_ADDROFF-5] + _ = x[R_WEAKADDROFF-6] + _ = x[R_SIZE-7] + _ = x[R_CALL-8] + _ = x[R_CALLARM-9] + _ = x[R_CALLARM64-10] + _ = x[R_CALLIND-11] + _ = x[R_CALLPOWER-12] + _ = x[R_CALLMIPS-13] + _ = x[R_CALLRISCV-14] + _ = x[R_CONST-15] + _ = x[R_PCREL-16] + _ = x[R_TLS_LE-17] + _ = x[R_TLS_IE-18] + _ = x[R_GOTOFF-19] + _ = x[R_PLT0-20] + _ = x[R_PLT1-21] + _ = x[R_PLT2-22] + _ = x[R_USEFIELD-23] + _ = x[R_USETYPE-24] + _ = x[R_USEIFACE-25] + _ = x[R_METHODOFF-26] + _ = x[R_POWER_TOC-27] + _ = x[R_GOTPCREL-28] + _ = x[R_JMPMIPS-29] + _ = x[R_DWARFSECREF-30] + _ = x[R_DWARFFILEREF-31] + _ = x[R_ARM64_TLS_LE-32] + _ = x[R_ARM64_TLS_IE-33] + _ = x[R_ARM64_GOTPCREL-34] + _ = x[R_ARM64_GOT-35] + _ = x[R_ARM64_PCREL-36] + _ = x[R_ARM64_LDST8-37] + _ = x[R_ARM64_LDST32-38] + _ = x[R_ARM64_LDST64-39] + _ = x[R_ARM64_LDST128-40] + _ = x[R_POWER_TLS_LE-41] + _ = x[R_POWER_TLS_IE-42] + _ = x[R_POWER_TLS-43] + _ = x[R_ADDRPOWER_DS-44] + _ = x[R_ADDRPOWER_GOT-45] + _ = x[R_ADDRPOWER_PCREL-46] + _ = x[R_ADDRPOWER_TOCREL-47] + _ = x[R_ADDRPOWER_TOCREL_DS-48] + _ = x[R_RISCV_PCREL_ITYPE-49] + _ = x[R_RISCV_PCREL_STYPE-50] + _ = x[R_PCRELDBL-51] + _ = x[R_ADDRMIPSU-52] + _ = x[R_ADDRMIPSTLS-53] + _ = x[R_ADDRCUOFF-54] + _ = x[R_WASMIMPORT-55] + _ = x[R_XCOFFREF-56] +} + +const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_WEAKADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CALLRISCVR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" -var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 60, 66, 72, 81, 92, 101, 112, 122, 133, 140, 147, 155, 163, 171, 177, 183, 189, 199, 208, 219, 230, 240, 249, 262, 276, 290, 304, 320, 331, 344, 357, 371, 385, 400, 414, 428, 439, 453, 468, 485, 503, 524, 543, 562, 572, 583, 596, 607, 619, 629} +var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 60, 66, 72, 81, 92, 101, 112, 122, 133, 140, 147, 155, 163, 171, 177, 183, 189, 199, 208, 218, 229, 240, 250, 259, 272, 286, 300, 314, 330, 341, 354, 367, 381, 395, 410, 424, 438, 449, 463, 478, 495, 513, 534, 553, 572, 582, 593, 606, 617, 629, 639} func (i RelocType) String() string { i -= 1 diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index a730125cf2..0a3418bfc9 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -698,6 +698,9 @@ func windynrelocsym(ctxt *Link, rel *loader.SymbolBuilder, s loader.Sym) { relocs := ctxt.loader.Relocs(s) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) + if r.IsMarker() { + continue // skip marker relocations + } targ := r.Sym() if targ == 0 { continue @@ -775,6 +778,9 @@ func dynrelocsym(ctxt *Link, s loader.Sym) { relocs := ldr.Relocs(s) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) + if r.IsMarker() { + continue // skip marker relocations + } if ctxt.BuildMode == BuildModePIE && ctxt.LinkMode == LinkInternal { // It's expected that some relocations will be done // later by relocsym (R_TLS_LE, R_ADDROFF), so diff --git a/src/cmd/link/internal/ld/deadcode.go b/src/cmd/link/internal/ld/deadcode.go index 7f14aa3d27..816a23b9a7 100644 --- a/src/cmd/link/internal/ld/deadcode.go +++ b/src/cmd/link/internal/ld/deadcode.go @@ -153,6 +153,20 @@ func (d *deadcodePass) flood() { // do nothing for now as we still load all type symbols. continue } + if t == objabi.R_USEIFACE { + // R_USEIFACE is a marker relocation that tells the linker the type is + // converted to an interface, i.e. should have UsedInIface set. See the + // comment below for why we need to unset the Reachable bit and re-mark it. + rs := r.Sym() + if !d.ldr.AttrUsedInIface(rs) { + d.ldr.SetAttrUsedInIface(rs, true) + if d.ldr.AttrReachable(rs) { + d.ldr.SetAttrReachable(rs, false) + d.mark(rs, symIdx) + } + } + continue + } rs := r.Sym() if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) { // If a type is converted to an interface, it is possible to obtain an diff --git a/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go b/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go index b62f18c342..32a24cf6f0 100644 --- a/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go +++ b/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go @@ -18,6 +18,13 @@ var p *T var e interface{} func main() { - p = new(T) // used T, but never converted to interface + p = new(T) // used T, but never converted to interface in any reachable code e.(I).M() // used I and I.M } + +func Unused() { // convert T to interface, but this function is not reachable + var i I = T(0) + i.M() +} + +var Unused2 interface{} = T(1) // convert T to interface, in an unreachable global initializer diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index 43a0352e0b..ea99233f67 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -63,6 +63,7 @@ type Reloc struct { func (rel Reloc) Type() objabi.RelocType { return objabi.RelocType(rel.Reloc.Type()) + rel.typ } func (rel Reloc) Sym() Sym { return rel.l.resolve(rel.r, rel.Reloc.Sym()) } func (rel Reloc) SetSym(s Sym) { rel.Reloc.SetSym(goobj.SymRef{PkgIdx: 0, SymIdx: uint32(s)}) } +func (rel Reloc) IsMarker() bool { return rel.Siz() == 0 } func (rel Reloc) SetType(t objabi.RelocType) { if t != objabi.RelocType(uint8(t)) { diff --git a/src/cmd/link/internal/wasm/asm.go b/src/cmd/link/internal/wasm/asm.go index 3bd56a6e3a..31851fbb56 100644 --- a/src/cmd/link/internal/wasm/asm.go +++ b/src/cmd/link/internal/wasm/asm.go @@ -167,6 +167,9 @@ func asmb2(ctxt *ld.Link, ldr *loader.Loader) { off := int32(0) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) + if r.Siz() == 0 { + continue // skip marker relocations + } wfn.Write(P[off:r.Off()]) off = r.Off() rs := ldr.ResolveABIAlias(r.Sym()) -- cgit v1.3 From 66770f4b1de37d9c5c962edb2980a70102e09ec3 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 28 Sep 2020 13:10:30 -0400 Subject: cmd/compile: mark type namedata symbols content-addressable Type namedata symbols are for type/field/method names and package paths. We can use content-addressable symbol mechanism for them. Change-Id: I923fda17b7094c7a0e46aad7c450622eb3826294 Reviewed-on: https://go-review.googlesource.com/c/go/+/257960 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Than McIntosh Reviewed-by: Jeremy Faller --- src/cmd/compile/internal/gc/reflect.go | 2 ++ src/cmd/internal/obj/objfile.go | 7 +++++++ src/cmd/internal/obj/sym.go | 5 ++++- src/cmd/link/internal/loader/loader.go | 4 ++-- 4 files changed, 15 insertions(+), 3 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go index ae3e2f8e65..21429af782 100644 --- a/src/cmd/compile/internal/gc/reflect.go +++ b/src/cmd/compile/internal/gc/reflect.go @@ -511,6 +511,7 @@ func dimportpath(p *types.Pkg) { s := Ctxt.Lookup("type..importpath." + p.Prefix + ".") ot := dnameData(s, 0, str, "", nil, false) ggloblsym(s, int32(ot), obj.DUPOK|obj.RODATA) + s.Set(obj.AttrContentAddressable, true) p.Pathsym = s } @@ -638,6 +639,7 @@ func dname(name, tag string, pkg *types.Pkg, exported bool) *obj.LSym { } ot := dnameData(s, 0, name, tag, pkg, exported) ggloblsym(s, int32(ot), obj.DUPOK|obj.RODATA) + s.Set(obj.AttrContentAddressable, true) return s } diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index e4b9620568..186016c970 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -372,6 +372,13 @@ func contentHash64(s *LSym) goobj.Hash64Type { // hashed symbols. func (w *writer) contentHash(s *LSym) goobj.HashType { h := sha1.New() + // Don't dedup type symbols with others, as they are in a different + // section. + if strings.HasPrefix(s.Name, "type.") { + h.Write([]byte{'T'}) + } else { + h.Write([]byte{0}) + } // The compiler trims trailing zeros _sometimes_. We just do // it always. h.Write(bytes.TrimRight(s.P, "\x00")) diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index d58877ee15..e5d7b2cbfd 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -38,6 +38,7 @@ import ( "log" "math" "sort" + "strings" ) func Linknew(arch *LinkArch) *Link { @@ -204,7 +205,9 @@ func (ctxt *Link) NumberSyms() { // if Pkgpath is unknown, cannot hash symbols with relocations, as it // may reference named symbols whose names are not fully expanded. if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) { - if len(s.P) <= 8 && len(s.R) == 0 { // we can use short hash only for symbols without relocations + if len(s.P) <= 8 && len(s.R) == 0 && !strings.HasPrefix(s.Name, "type.") { + // We can use short hash only for symbols without relocations. + // Don't use short hash for type symbols, as they need special handling. s.PkgIdx = goobj.PkgIdxHashed64 s.SymIdx = hashed64idx if hashed64idx != int32(len(ctxt.hashed64defs)) { diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index ea99233f67..4025edc23f 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -2153,11 +2153,11 @@ func (l *Loader) LoadNonpkgSyms(arch *sys.Arch) { l.npkgsyms = l.NSym() // Preallocate some space (a few hundreds KB) for some symbols. // As of Go 1.15, linking cmd/compile has ~8000 hashed64 symbols and - // ~13000 hashed symbols. + // ~27000 hashed symbols. st := loadState{ l: l, hashed64Syms: make(map[uint64]symAndSize, 10000), - hashedSyms: make(map[goobj.HashType]symAndSize, 15000), + hashedSyms: make(map[goobj.HashType]symAndSize, 30000), } for _, o := range l.objs[goObjStart:] { st.preloadSyms(o.r, hashed64Def) -- cgit v1.3 From cc2a5cf4b8b0aeaccd3dd439f8d3d68f25eef358 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 28 Sep 2020 18:20:12 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: fix some shift rules due to a regression A recent change to improve shifts was generating some invalid cases when the rule was based on an AND. The extended mnemonics CLRLSLDI and CLRLSLWI only allow certain values for the operands and in the mask case those values were not being checked properly. This adds a check to those rules to verify that the 'b' and 'n' values used when an AND was part of the rule have correct values. There was a bug in some diag messages in asm9. The message expected 3 values but only provided 2. Those are corrected here also. The test/codegen/shift.go was updated to add a few more cases to check for the case mentioned here. Some of the comments that mention the order of operands in these extended mnemonics were wrong and those have been corrected. Fixes #41683. Change-Id: If5bb860acaa5051b9e0cd80784b2868b85898c31 Reviewed-on: https://go-review.googlesource.com/c/go/+/258138 Run-TryBot: Lynn Boger Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo TryBot-Result: Go Bot Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 ++-- src/cmd/compile/internal/ppc64/ssa.go | 12 +++++----- src/cmd/compile/internal/ssa/gen/PPC64.rules | 9 ++++--- src/cmd/compile/internal/ssa/rewrite.go | 4 ++-- src/cmd/compile/internal/ssa/rewritePPC64.go | 34 +++++++-------------------- src/cmd/internal/obj/ppc64/asm9.go | 35 ++++++++++++++-------------- test/codegen/shift.go | 17 ++++++++------ 7 files changed, 50 insertions(+), 65 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 88a7609ba8..869f8c2d4f 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -287,8 +287,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLDICRCC $0, R4, $15, R6 // 788603c5 RLDIC $0, R4, $15, R6 // 788603c8 RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $16, R5, $8, R4 // 54a4861e - CLRLSLDI $2, R4, $24, R3 // 78831588 + CLRLSLWI $8, R5, $6, R4 // 54a430b2 + CLRLSLDI $24, R4, $4, R3 // 78832508 BEQ 0(PC) // 41820000 BGE 0(PC) // 40800000 diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index a5fbdaffba..d83b2df379 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -570,9 +570,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlslwi ra,rs,sh,mb will become rlwinm ra,rs,sh,mb-sh,31-n as described in ISA - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlslwi ra,rs,mb,sh will become rlwinm ra,rs,sh,mb-sh,31-sh as described in ISA + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r @@ -582,9 +582,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { r1 := v.Args[0].Reg() shifts := v.AuxInt p := s.Prog(v.Op.Asm()) - // clrlsldi ra,rs,sh,mb will become rldic ra,rs,sh,mb-sh - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)} - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)}) + // clrlsldi ra,rs,mb,sh will become rldic ra,rs,sh,mb-sh + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftmb(shifts)} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: ssa.GetPPC64Shiftsh(shifts)}) p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index de30d003e6..83ee4c499b 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1018,13 +1018,12 @@ (SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) (SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) -(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) -(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) (SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) -(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) -(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) -(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // special case for power9 (SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9f4de83a77..5d8b3ddc4e 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1380,8 +1380,8 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } -// Catch the simple ones first -// TODO: Later catch more cases +// This verifies that the mask occupies the +// rightmost bits. func isPPC64ValidShiftMask(v int64) bool { if ((v + 1) & v) == 0 { return true diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 29ec3992f2..9822637b05 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -12831,7 +12831,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12841,7 +12841,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLDI) @@ -12850,7 +12850,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { return true } // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12867,7 +12867,7 @@ func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLDI) @@ -12953,26 +12953,8 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { v.AddArg(x) return true } - // match: (SLWconst [c] z:(MOVWZreg x)) - // cond: z.Uses == 1 && c < 24 - // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) - for { - c := auxIntToInt64(v.AuxInt) - z := v_0 - if z.Op != OpPPC64MOVWZreg { - break - } - x := z.Args[0] - if !(z.Uses == 1 && c < 24) { - break - } - v.reset(OpPPC64CLRLSLWI) - v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) - v.AddArg(x) - return true - } // match: (SLWconst [c] z:(ANDconst [d] x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -12982,7 +12964,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z.AuxInt) x := z.Args[0] - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { break } v.reset(OpPPC64CLRLSLWI) @@ -12991,7 +12973,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { return true } // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) - // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) for { c := auxIntToInt64(v.AuxInt) @@ -13008,7 +12990,7 @@ func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { } d := auxIntToInt64(z_0.AuxInt) x := z_1 - if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + if !(z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (32-getPPC64ShiftMaskLength(d))) { continue } v.reset(OpPPC64CLRLSLWI) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 9f06bdf8b3..928e299f43 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2749,7 +2749,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { me := int(d) sh := c.regoff(&p.From) if me < 0 || me > 63 || sh > 63 { - c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh) + c.ctxt.Diag("Invalid me or sh for RLDICR: %x %x\n%v", int(d), sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me)) @@ -2757,19 +2757,19 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { mb := int(d) sh := c.regoff(&p.From) if mb < 0 || mb > 63 || sh > 63 { - c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh) + c.ctxt.Diag("Invalid mb or sh for RLDIC, RLDICL: %x %x\n%v", mb, sh, p) } o1 = AOP_RLDIC(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb)) case ACLRLSLDI: // This is an extended mnemonic defined in the ISA section C.8.1 - // clrlsldi ra,rs,n,b --> rldic ra,rs,n,b-n + // clrlsldi ra,rs,b,n --> rldic ra,rs,n,b-n // It maps onto RLDIC so is directly generated here based on the operands from // the clrlsldi. - b := int(d) - n := c.regoff(&p.From) - if n > int32(b) || b > 63 { - c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b) + n := int32(d) + b := c.regoff(&p.From) + if n > b || b > 63 { + c.ctxt.Diag("Invalid n or b for CLRLSLDI: %x %x\n%v", n, b, p) } o1 = AOP_RLDIC(OP_RLDIC, uint32(p.To.Reg), uint32(r), uint32(n), uint32(b)-uint32(n)) @@ -3395,14 +3395,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) + n := c.regoff(p.GetFrom3()) // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It maps onto rlwinm which is directly generated here. - if v < 0 || v > 32 || b > 32 { - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + if n > v || v >= 32 { + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) @@ -3414,16 +3415,16 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v := c.regoff(&p.From) switch p.As { case ACLRLSLWI: - b := c.regoff(p.GetFrom3()) - if v > b || b > 32 { + n := c.regoff(p.GetFrom3()) + if n > v || v >= 32 { // Message will match operands from the ISA even though in the // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, b) + c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) } // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,n,b -> rlwinm ra,rs,n,b-n,31-n + // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(v), uint32(b-v), uint32(31-v)) + o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) default: var mask [2]uint8 c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index abc4b091c9..bbfc85ffbb 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -187,8 +187,8 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" f := (v8 &0xF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" + // ppc64le:"CLRLSLWI" + // ppc64:"CLRLSLWI" f += byte(v16)<<3 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" @@ -196,12 +196,15 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" h := (v32 & 0xFFFFF) << 2 - // ppc64le:-"AND","CLRLSLWI" - // ppc64:-"AND","CLRLSLWI" - h += uint32(v64)<<4 - // ppc64le:-"AND","CLRLSLDI" - // ppc64:-"AND","CLRLSLDI" + // ppc64le:"CLRLSLDI" + // ppc64:"CLRLSLDI" i := (v64 & 0xFFFFFFFF) << 5 + // ppc64le:-"CLRLSLDI" + // ppc64:-"CLRLSLDI" + i += (v64 & 0xFFFFFFF) << 38 + // ppc64le/power9:-"CLRLSLDI" + // ppc64/power9:-"CLRLSLDI" + i += (v64 & 0xFFFF00) << 10 // ppc64le/power9:-"SLD","EXTSWSLI" // ppc64/power9:-"SLD","EXTSWSLI" j := int64(x32+32)*8 -- cgit v1.3 From bdab5df40f474c7768a945ef4fcf5aab634f7af5 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Fri, 2 Oct 2020 17:51:13 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: use mulli where possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support to allow the use of mulli when one of the multiply operands is a constant that fits in 16 bits. This especially helps in the case where this instruction appears in a loop since the load of the constant is not being moved out of the loop. Some improvements seen in compress/flate on power9: Decode/Digits/Huffman/1e4 259µs ± 0% 261µs ± 0% +0.57% (p=1.000 n=1+1) Decode/Digits/Huffman/1e5 2.43ms ± 0% 2.45ms ± 0% +0.79% (p=1.000 n=1+1) Decode/Digits/Huffman/1e6 23.9ms ± 0% 24.2ms ± 0% +0.86% (p=1.000 n=1+1) Decode/Digits/Speed/1e4 278µs ± 0% 279µs ± 0% +0.34% (p=1.000 n=1+1) Decode/Digits/Speed/1e5 2.80ms ± 0% 2.81ms ± 0% +0.29% (p=1.000 n=1+1) Decode/Digits/Speed/1e6 28.0ms ± 0% 28.1ms ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e4 278µs ± 0% 278µs ± 0% +0.28% (p=1.000 n=1+1) Decode/Digits/Default/1e5 2.68ms ± 0% 2.69ms ± 0% +0.19% (p=1.000 n=1+1) Decode/Digits/Default/1e6 26.6ms ± 0% 26.6ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e4 278µs ± 0% 278µs ± 0% +0.00% (p=1.000 n=1+1) Decode/Digits/Compression/1e5 2.68ms ± 0% 2.69ms ± 0% +0.21% (p=1.000 n=1+1) Decode/Digits/Compression/1e6 26.6ms ± 0% 26.6ms ± 0% +0.07% (p=1.000 n=1+1) Decode/Newton/Huffman/1e4 322µs ± 0% 312µs ± 0% -2.84% (p=1.000 n=1+1) Decode/Newton/Huffman/1e5 3.11ms ± 0% 2.91ms ± 0% -6.41% (p=1.000 n=1+1) Decode/Newton/Huffman/1e6 31.4ms ± 0% 29.3ms ± 0% -6.85% (p=1.000 n=1+1) Decode/Newton/Speed/1e4 282µs ± 0% 269µs ± 0% -4.69% (p=1.000 n=1+1) Decode/Newton/Speed/1e5 2.29ms ± 0% 2.20ms ± 0% -4.13% (p=1.000 n=1+1) Decode/Newton/Speed/1e6 22.7ms ± 0% 21.3ms ± 0% -6.06% (p=1.000 n=1+1) Decode/Newton/Default/1e4 254µs ± 0% 237µs ± 0% -6.60% (p=1.000 n=1+1) Decode/Newton/Default/1e5 1.86ms ± 0% 1.75ms ± 0% -5.99% (p=1.000 n=1+1) Decode/Newton/Default/1e6 18.1ms ± 0% 17.4ms ± 0% -4.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e4 254µs ± 0% 244µs ± 0% -3.91% (p=1.000 n=1+1) Decode/Newton/Compression/1e5 1.85ms ± 0% 1.79ms ± 0% -3.10% (p=1.000 n=1+1) Decode/Newton/Compression/1e6 18.0ms ± 0% 17.3ms ± 0% -3.88% (p=1.000 n=1+1) Change-Id: I840320fab1c4bf64c76b001c2651ab79f23df4eb Reviewed-on: https://go-review.googlesource.com/c/go/+/259444 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 4 +++ src/cmd/compile/internal/gc/bench_test.go | 12 +++++++ src/cmd/compile/internal/ppc64/ssa.go | 3 +- src/cmd/compile/internal/ssa/gen/PPC64.rules | 2 ++ src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 2 ++ src/cmd/compile/internal/ssa/opGen.go | 30 ++++++++++++++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 54 ++++++++++++++++++++++++++++ src/cmd/internal/obj/ppc64/asm9.go | 9 ++--- 8 files changed, 111 insertions(+), 5 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s index 869f8c2d4f..c6d7b59aad 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s @@ -204,12 +204,16 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MULLW R3, R4 // 7c8419d6 MULLW R3, R4, R5 // 7ca419d6 + MULLW $10, R3 // 1c63000a + MULLW $10000000, R3 // 641f009863ff96807c7f19d6 MULLWCC R3, R4, R5 // 7ca419d7 MULHW R3, R4, R5 // 7ca41896 MULHWU R3, R4, R5 // 7ca41816 MULLD R3, R4 // 7c8419d2 MULLD R4, R4, R5 // 7ca421d2 + MULLD $20, R4 // 1c840014 + MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 MULLDCC R3, R4, R5 // 7ca419d3 MULHD R3, R4, R5 // 7ca41892 MULHDCC R3, R4, R5 // 7ca41893 diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go index a2887f2f7b..8c4288128f 100644 --- a/src/cmd/compile/internal/gc/bench_test.go +++ b/src/cmd/compile/internal/gc/bench_test.go @@ -7,6 +7,7 @@ package gc import "testing" var globl int64 +var globl32 int32 func BenchmarkLoadAdd(b *testing.B) { x := make([]int64, 1024) @@ -42,6 +43,17 @@ func BenchmarkModify(b *testing.B) { } } +func BenchmarkMullImm(b *testing.B) { + x := make([]int32, 1024) + for i := 0; i < b.N; i++ { + var s int32 + for i := range x { + s += x[i] * 100 + } + globl32 = s + } +} + func BenchmarkConstModify(b *testing.B) { a := make([]int64, 1024) for i := 0; i < b.N; i++ { diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index d83b2df379..1ece4d999f 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -677,7 +677,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, - ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst: + ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, + ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst, ssa.OpPPC64EXTSWSLconst, ssa.OpPPC64MULLWconst, ssa.OpPPC64MULLDconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 83ee4c499b..a05cfee654 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -821,6 +821,8 @@ (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x) +(MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x) + // Subtract from (with carry, but ignored) constant. // Note, these clobber the carry bit. (SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 28317928a8..5885660597 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -181,6 +181,8 @@ func init() { {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) + {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) + {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit) {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d7d2b24a48..051550fb17 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1832,6 +1832,8 @@ const ( OpPPC64FSUBS OpPPC64MULLD OpPPC64MULLW + OpPPC64MULLDconst + OpPPC64MULLWconst OpPPC64MADDLD OpPPC64MULHD OpPPC64MULHW @@ -24377,6 +24379,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MULLDconst", + auxType: auxInt32, + argLen: 1, + asm: ppc64.AMULLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "MULLWconst", + auxType: auxInt32, + argLen: 1, + asm: ppc64.AMULLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "MADDLD", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 9822637b05..1b8a5a78ca 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -568,6 +568,10 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64MOVWstorezero(v) case OpPPC64MTVSRD: return rewriteValuePPC64_OpPPC64MTVSRD(v) + case OpPPC64MULLD: + return rewriteValuePPC64_OpPPC64MULLD(v) + case OpPPC64MULLW: + return rewriteValuePPC64_OpPPC64MULLW(v) case OpPPC64NEG: return rewriteValuePPC64_OpPPC64NEG(v) case OpPPC64NOR: @@ -11003,6 +11007,56 @@ func rewriteValuePPC64_OpPPC64MTVSRD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64MULLD(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULLD x (MOVDconst [c])) + // cond: is16Bit(c) + // result: (MULLDconst [int32(c)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is16Bit(c)) { + continue + } + v.reset(OpPPC64MULLDconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true + } + break + } + return false +} +func rewriteValuePPC64_OpPPC64MULLW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULLW x (MOVDconst [c])) + // cond: is16Bit(c) + // result: (MULLWconst [int32(c)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is16Bit(c)) { + continue + } + v.reset(OpPPC64MULLWconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64NEG(v *Value) bool { v_0 := v.Args[0] // match: (NEG (ADDconst [c] x)) diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 928e299f43..c2e8e9e9d0 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -1279,6 +1279,9 @@ func buildop(ctxt *obj.Link) { case AREMD: opset(AREMDU, r0) + case AMULLW: + opset(AMULLD, r0) + case ADIVW: /* op Rb[,Ra],Rd */ opset(AMULHW, r0) @@ -1312,7 +1315,6 @@ func buildop(ctxt *obj.Link) { opset(AMULHDCC, r0) opset(AMULHDU, r0) opset(AMULHDUCC, r0) - opset(AMULLD, r0) opset(AMULLDCC, r0) opset(AMULLDVCC, r0) opset(AMULLDV, r0) @@ -1996,7 +1998,6 @@ func buildop(ctxt *obj.Link) { AMOVB, /* macro: move byte with sign extension */ AMOVBU, /* macro: move byte with sign extension & update */ AMOVFL, - AMULLW, /* op $s[,r2],r3; op r1[,r2],r3; no cc/v */ ASUBC, /* op r1,$s,r3; op r1[,r2],r3 */ ASTSW, @@ -4990,8 +4991,8 @@ func (c *ctxt9) opirr(a obj.As) uint32 { case ADARN: return OPVCC(31, 755, 0, 0) /* darn - v3.00 */ - case AMULLW: - return OPVCC(7, 0, 0, 0) + case AMULLW, AMULLD: + return OPVCC(7, 0, 0, 0) /* mulli works with MULLW or MULLD */ case AOR: return OPVCC(24, 0, 0, 0) -- cgit v1.3 From 28e549dec3954b36d0c83442be913d8709d7e5ae Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 12 Sep 2020 12:33:24 -0400 Subject: runtime: use sigaltstack on macOS/ARM64 Currently we don't use sigaltstack on darwin/arm64, as is not supported on iOS. However, it is supported on macOS. Use it. (iOS remains unchanged.) Change-Id: Icc154c5e2edf2dbdc8ca68741ad9157fc15a72ee Reviewed-on: https://go-review.googlesource.com/c/go/+/256917 Trust: Cherry Zhang Reviewed-by: Ian Lance Taylor --- misc/cgo/test/sigaltstack.go | 2 +- src/cmd/internal/obj/arm64/obj7.go | 2 +- src/runtime/mkpreempt.go | 7 ++----- src/runtime/os_darwin.go | 8 ++++---- src/runtime/preempt_arm64.s | 3 --- src/runtime/stack.go | 2 +- src/runtime/sys_darwin_arm64.s | 22 ++++++++++++++++++---- 7 files changed, 27 insertions(+), 19 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/misc/cgo/test/sigaltstack.go b/misc/cgo/test/sigaltstack.go index 27b753a147..034cc4b371 100644 --- a/misc/cgo/test/sigaltstack.go +++ b/misc/cgo/test/sigaltstack.go @@ -62,7 +62,7 @@ import ( func testSigaltstack(t *testing.T) { switch { - case runtime.GOOS == "solaris", runtime.GOOS == "illumos", (runtime.GOOS == "darwin" || runtime.GOOS == "ios") && runtime.GOARCH == "arm64": + case runtime.GOOS == "solaris", runtime.GOOS == "illumos", runtime.GOOS == "ios" && runtime.GOARCH == "arm64": t.Skipf("switching signal stack not implemented on %s/%s", runtime.GOOS, runtime.GOARCH) } diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index 56da854f16..f1bc2583cb 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -589,7 +589,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.To.Reg = REGSP q1.Spadj = c.autosize - if c.ctxt.Headtype == objabi.Hdarwin { + if objabi.GOOS == "ios" { // iOS does not support SA_ONSTACK. We will run the signal handler // on the G stack. If we write below SP, it may be clobbered by // the signal handler. So we save LR after decrementing SP. diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index c5bfb0f207..40683bb9d9 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -340,12 +340,9 @@ func genARM64() { p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) p("SUB $8, RSP, R29") // set up new frame pointer p("#endif") - // On darwin, save the LR again after decrementing SP. We run the - // signal handler on the G stack (as it doesn't support SA_ONSTACK), + // On iOS, save the LR again after decrementing SP. We run the + // signal handler on the G stack (as it doesn't support sigaltstack), // so any writes below SP may be clobbered. - p("#ifdef GOOS_darwin") - p("MOVD R30, (RSP)") - p("#endif") p("#ifdef GOOS_ios") p("MOVD R30, (RSP)") p("#endif") diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go index 01c40b4813..394bd6fb0f 100644 --- a/src/runtime/os_darwin.go +++ b/src/runtime/os_darwin.go @@ -289,9 +289,9 @@ func mpreinit(mp *m) { // Called to initialize a new m (including the bootstrap m). // Called on the new thread, cannot allocate memory. func minit() { - // The alternate signal stack is buggy on arm64. + // iOS does not support alternate signal stack. // The signal handler handles it directly. - if GOARCH != "arm64" { + if !(GOOS == "ios" && GOARCH == "arm64") { minitSignalStack() } minitSignalMask() @@ -301,9 +301,9 @@ func minit() { // Called from dropm to undo the effect of an minit. //go:nosplit func unminit() { - // The alternate signal stack is buggy on arm64. + // iOS does not support alternate signal stack. // See minit. - if GOARCH != "arm64" { + if !(GOOS == "ios" && GOARCH == "arm64") { unminitSignals() } } diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index d0e77659c3..36ee13282c 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -10,9 +10,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R29, -8(RSP) SUB $8, RSP, R29 #endif - #ifdef GOOS_darwin - MOVD R30, (RSP) - #endif #ifdef GOOS_ios MOVD R30, (RSP) #endif diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 3802cd049e..2afc2635aa 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -66,7 +66,7 @@ const ( // to each stack below the usual guard area for OS-specific // purposes like signal handling. Used on Windows, Plan 9, // and iOS because they do not use a separate stack. - _StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + (sys.GoosDarwin+sys.GoosIos)*sys.GoarchArm64*1024 + _StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + sys.GoosIos*sys.GoarchArm64*1024 // The minimum size of stack used by Go code _StackMin = 2048 diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s index 585d4f2c64..427cb17781 100644 --- a/src/runtime/sys_darwin_arm64.s +++ b/src/runtime/sys_darwin_arm64.s @@ -202,6 +202,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$192 BEQ 2(PC) BL runtime·load_g(SB) +#ifdef GOOS_ios MOVD RSP, R6 CMP $0, g BEQ nog @@ -226,16 +227,21 @@ nog: // Switch to gsignal stack. MOVD R6, RSP - // Call sigtrampgo. + // Save arguments. MOVW R0, (8*1)(RSP) MOVD R1, (8*2)(RSP) MOVD R2, (8*3)(RSP) +#endif + + // Call sigtrampgo. MOVD $runtime·sigtrampgo(SB), R11 BL (R11) +#ifdef GOOS_ios // Switch to old stack. MOVD (8*4)(RSP), R5 MOVD R5, RSP +#endif // Restore callee-save registers. MOVD (8*4)(RSP), R19 @@ -329,12 +335,20 @@ TEXT runtime·fcntl_trampoline(SB),NOSPLIT,$0 ADD $16, RSP RET -// sigaltstack on iOS is not supported and will always -// run the signal handler on the main stack, so our sigtramp has -// to do the stack switch ourselves. TEXT runtime·sigaltstack_trampoline(SB),NOSPLIT,$0 +#ifdef GOOS_ios + // sigaltstack on iOS is not supported and will always + // run the signal handler on the main stack, so our sigtramp has + // to do the stack switch ourselves. MOVW $43, R0 BL libc_exit(SB) +#else + MOVD 8(R0), R1 // arg 2 old + MOVD 0(R0), R0 // arg 1 new + CALL libc_sigaltstack(SB) + CBZ R0, 2(PC) + BL notok<>(SB) +#endif RET // Thread related functions -- cgit v1.3 From 3f7b4d12075277f28427e6b57708258225841ecd Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 7 Oct 2020 14:39:47 -0400 Subject: cmd/internal/obj/arm64: only emit R_CALLIND relocations on calls Don't emit it for jumps. In particular, not for the return instruction, which is JMP (LR). Reduce some binary size and linker resources. Change-Id: Idb3242b86c5a137597fb8accb8aadfe0244c14cf Reviewed-on: https://go-review.googlesource.com/c/go/+/260341 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Than McIntosh --- src/cmd/internal/obj/arm64/asm7.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index ee4a33eef4..7c35fce106 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -3120,12 +3120,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 6: /* b ,O(R); bl ,O(R) */ o1 = c.opbrr(p, p.As) - o1 |= uint32(p.To.Reg&31) << 5 - rel := obj.Addrel(c.cursym) - rel.Off = int32(c.pc) - rel.Siz = 0 - rel.Type = objabi.R_CALLIND + if p.As == obj.ACALL { + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 0 + rel.Type = objabi.R_CALLIND + } case 7: /* beq s */ o1 = c.opbra(p, p.As) -- cgit v1.3 From 3036b76df0ae748856e3e0008b67241cc580e263 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Sat, 13 Jun 2020 00:06:49 +0800 Subject: cmd/asm: Add SHA3 hardware instructions for ARM64 Armv8.2-SHA introduced four SHA3-related instructions EOR3 .16B, .16B, .16B, .16B RAX1 .2D, .2D, .2D XAR .2D, .2D, .2D, # BCAX .16B, .16B, .16B, .16B We convert them into Go asm style as: VEOR3 .B16, .B16, .B16, .B16 VRAX1 .D2, .D2, .D2 VXAR $imm6, .D2, .D2, .D2 VBCAX .B16, .B16, .B16, .B16 Armv8 Reference Manual: * EOR3 (Three-way Exclusive OR) on C7.2.42 * RAX1 (Rotate and Exclusive OR) on C7.2.217 * XAR (Exclusive OR and Rotate) on C7.2.401 * BCAX (Bit Clear and Exclusive OR) on C7.2.12 Change-Id: I9a5d1b5ad508ed8fd5289d535906c54d9a63ca5a Reviewed-on: https://go-review.googlesource.com/c/go/+/180757 Run-TryBot: Meng Zhuo TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Emmanuel Odeke --- src/cmd/asm/internal/asm/testdata/arm64.s | 4 ++ src/cmd/internal/obj/arm64/a.out.go | 4 ++ src/cmd/internal/obj/arm64/anames.go | 4 ++ src/cmd/internal/obj/arm64/asm7.go | 73 ++++++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 2 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index e277c04b7c..7f495b90bb 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -81,6 +81,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 SHA512H2 V4.D2, V3, V2 // 628464ce SHA512SU0 V9.D2, V8.D2 // 2881c0ce SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce + VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace + VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce VADDV V0.S4, V0 // 00b8b14e VMOVI $82, V0.B16 // 40e6024f VUADDLV V6.B16, V6 // c638306e @@ -139,6 +141,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e VTBL V3.B8, [V27.B16], V8.B8 // 6803030e + VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce + VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 1ca41c15ba..33319e48df 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -958,9 +958,11 @@ const ( AVADDP AVAND AVBIF + AVBCAX AVCMEQ AVCNT AVEOR + AVEOR3 AVMOV AVLD1 AVLD2 @@ -989,6 +991,7 @@ const ( AVPMULL2 AVEXT AVRBIT + AVRAX1 AVUSHR AVUSHLL AVUSHLL2 @@ -1001,6 +1004,7 @@ const ( AVBSL AVBIT AVTBL + AVXAR AVZIP1 AVZIP2 AVCMTST diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 900cdba817..e5534e26b9 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -464,9 +464,11 @@ var Anames = []string{ "VADDP", "VAND", "VBIF", + "VBCAX", "VCMEQ", "VCNT", "VEOR", + "VEOR3", "VMOV", "VLD1", "VLD2", @@ -495,6 +497,7 @@ var Anames = []string{ "VPMULL2", "VEXT", "VRBIT", + "VRAX1", "VUSHR", "VUSHLL", "VUSHLL2", @@ -507,6 +510,7 @@ var Anames = []string{ "VBSL", "VBIT", "VTBL", + "VXAR", "VZIP1", "VZIP2", "VCMTST", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 7c35fce106..c46066313e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -843,6 +843,8 @@ var optab = []Optab{ {ASHA256H, C_ARNG, C_VREG, C_NONE, C_VREG, 1, 4, 0, 0, 0}, {AVREV32, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0}, {AVPMULL, C_ARNG, C_ARNG, C_NONE, C_ARNG, 93, 4, 0, 0, 0}, + {AVEOR3, C_ARNG, C_ARNG, C_ARNG, C_ARNG, 103, 4, 0, 0, 0}, + {AVXAR, C_VCON, C_ARNG, C_ARNG, C_ARNG, 104, 4, 0, 0, 0}, {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 90, 4, 0, 0, 0}, {obj.APCDATA, C_VCON, C_NONE, C_NONE, C_VCON, 0, 0, 0, 0, 0}, @@ -2769,6 +2771,7 @@ func buildop(ctxt *obj.Link) { case AVADD: oprangeset(AVSUB, t) + oprangeset(AVRAX1, t) case AAESD: oprangeset(AAESE, t) @@ -2827,6 +2830,9 @@ func buildop(ctxt *obj.Link) { oprangeset(AVLD4, t) oprangeset(AVLD4R, t) + case AVEOR3: + oprangeset(AVBCAX, t) + case ASHA1H, AVCNT, AVMOV, @@ -2839,7 +2845,8 @@ func buildop(ctxt *obj.Link) { AVDUP, AVMOVI, APRFM, - AVEXT: + AVEXT, + AVXAR: break case obj.ANOP, @@ -4205,7 +4212,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Add = 0 rel.Type = objabi.R_ARM64_GOTPCREL - case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2 Vm., Vn., Vd. */ + case 72: /* vaddp/vand/vcmeq/vorr/vadd/veor/vfmla/vfmls/vbit/vbsl/vcmtst/vsub/vbif/vuzip1/vuzip2/vrax1 Vm., Vn., Vd. */ af := int((p.From.Reg >> 5) & 15) af3 := int((p.Reg >> 5) & 15) at := int((p.To.Reg >> 5) & 15) @@ -4269,6 +4276,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } else { size = 0 } + case AVRAX1: + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement: %v", p) + } + size = 0 + Q = 0 } o1 |= (uint32(Q&1) << 30) | (uint32(size&3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) @@ -5186,6 +5199,51 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ + ta := (p.From.Reg >> 5) & 15 + tm := (p.Reg >> 5) & 15 + td := (p.To.Reg >> 5) & 15 + tn := ((p.GetFrom3().Reg) >> 5) & 15 + + if ta != tm || ta != tn || ta != td || ta != ARNG_16B { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + o1 = c.oprrr(p, p.As) + ra := int(p.From.Reg) + rm := int(p.Reg) + rn := int(p.GetFrom3().Reg) + rd := int(p.To.Reg) + o1 |= uint32(rm&31)<<16 | uint32(ra&31)<<10 | uint32(rn&31)<<5 | uint32(rd)&31 + + case 104: /* vxar $imm4, Vm., Vn., Vd. */ + af := ((p.GetFrom3().Reg) >> 5) & 15 + at := (p.To.Reg >> 5) & 15 + a := (p.Reg >> 5) & 15 + index := int(p.From.Offset) + + if af != a || af != at { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + if af != ARNG_2D { + c.ctxt.Diag("invalid arrangement, should be D2: %v", p) + break + } + + if index < 0 || index > 63 { + c.ctxt.Diag("illegal offset: %v", p) + } + + o1 = c.opirr(p, p.As) + rf := (p.GetFrom3().Reg) & 31 + rt := (p.To.Reg) & 31 + r := (p.Reg) & 31 + + o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + } out[0] = o1 out[1] = o2 @@ -5761,6 +5819,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVAND: return 7<<25 | 1<<21 | 7<<10 + case AVBCAX: + return 0xCE<<24 | 1<<21 + case AVCMEQ: return 1<<29 | 0x71<<21 | 0x23<<10 @@ -5776,12 +5837,18 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVEOR: return 1<<29 | 0x71<<21 | 7<<10 + case AVEOR3: + return 0xCE << 24 + case AVORR: return 7<<25 | 5<<21 | 7<<10 case AVREV16: return 3<<26 | 2<<24 | 1<<21 | 3<<11 + case AVRAX1: + return 0xCE<<24 | 3<<21 | 1<<15 | 3<<10 + case AVREV32: return 11<<26 | 2<<24 | 1<<21 | 1<<11 @@ -6039,6 +6106,8 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHLL2, AVUXTL2: return 3<<29 | 15<<24 | 0x29<<10 + case AVXAR: + return 0xCE<<24 | 1<<23 } c.ctxt.Diag("%v: bad irr %v", p, a) -- cgit v1.3 From c8eea1633e160d3517ea44e50898893dad09fed0 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Tue, 13 Oct 2020 18:28:25 -0400 Subject: cmd/internal/obj: remove LSym.RefIdx LSym.RefIdx was for the old object files. I should have deleted it when I deleted old object file code. Change-Id: I8294f43a1e7ba45b1d75e84cc83cbaf2cb32f025 Reviewed-on: https://go-review.googlesource.com/c/go/+/262077 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Jeremy Faller --- src/cmd/internal/obj/link.go | 3 +-- src/cmd/internal/obj/sizeof_test.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 014c78dbfc..f14b691802 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -395,7 +395,6 @@ type LSym struct { Type objabi.SymKind Attribute - RefIdx int // Index of this symbol in the symbol reference list. Size int64 Gotype *LSym P []byte @@ -405,7 +404,7 @@ type LSym struct { Pkg string PkgIdx int32 - SymIdx int32 // TODO: replace RefIdx + SymIdx int32 } // A FuncInfo contains extra fields for STEXT symbols. diff --git a/src/cmd/internal/obj/sizeof_test.go b/src/cmd/internal/obj/sizeof_test.go index b5e170c694..69e60473f5 100644 --- a/src/cmd/internal/obj/sizeof_test.go +++ b/src/cmd/internal/obj/sizeof_test.go @@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) { _64bit uintptr // size on 64bit platforms }{ {Addr{}, 32, 48}, - {LSym{}, 76, 128}, + {LSym{}, 72, 120}, {Prog{}, 132, 200}, } -- cgit v1.3 From 912262b806a432a29302e0cee45e4f42ef7038a2 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 19 Jul 2020 00:30:12 -0400 Subject: cmd/internal/obj: move LSym.Func into LSym.Extra This creates space for a different kind of extension field in LSym without making the struct any larger. (There are many LSym, so we care about keeping the struct small.) Change-Id: Ib16edb9e15f54c2a7351c8b875e19684058711e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/243943 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/asm.go | 2 +- src/cmd/compile/internal/gc/dwinl.go | 4 +- src/cmd/compile/internal/gc/gsubr.go | 2 +- src/cmd/compile/internal/gc/obj.go | 9 ++-- src/cmd/compile/internal/gc/pgen.go | 18 ++++---- src/cmd/compile/internal/gc/plive.go | 9 ++-- src/cmd/compile/internal/gc/scope.go | 4 +- src/cmd/compile/internal/gc/ssa.go | 10 ++-- src/cmd/internal/obj/arm/asm5.go | 8 ++-- src/cmd/internal/obj/arm/obj5.go | 34 +++++++------- src/cmd/internal/obj/arm64/asm7.go | 12 ++--- src/cmd/internal/obj/arm64/obj7.go | 36 +++++++-------- src/cmd/internal/obj/dwarf.go | 31 +++++++------ src/cmd/internal/obj/ld.go | 2 +- src/cmd/internal/obj/link.go | 23 ++++++++- src/cmd/internal/obj/mips/asm0.go | 10 ++-- src/cmd/internal/obj/mips/obj0.go | 42 ++++++++--------- src/cmd/internal/obj/objfile.go | 90 ++++++++++++++++++------------------ src/cmd/internal/obj/pass.go | 6 +-- src/cmd/internal/obj/pcln.go | 28 +++++------ src/cmd/internal/obj/plist.go | 14 +++--- src/cmd/internal/obj/ppc64/asm9.go | 10 ++-- src/cmd/internal/obj/ppc64/obj9.go | 28 +++++------ src/cmd/internal/obj/riscv/obj.go | 38 +++++++-------- src/cmd/internal/obj/s390x/asmz.go | 6 +-- src/cmd/internal/obj/s390x/objz.go | 28 +++++------ src/cmd/internal/obj/sym.go | 6 ++- src/cmd/internal/obj/wasm/wasmobj.go | 34 +++++++------- src/cmd/internal/obj/x86/asm6.go | 8 ++-- src/cmd/internal/obj/x86/obj6.go | 26 +++++------ 30 files changed, 304 insertions(+), 274 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 7878d74549..b9efa454ed 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -181,7 +181,7 @@ func (p *Parser) asmText(operands [][]lex.Token) { // Argsize set below. }, } - nameAddr.Sym.Func.Text = prog + nameAddr.Sym.Func().Text = prog prog.To.Val = int32(argSize) p.append(prog, "", true) } diff --git a/src/cmd/compile/internal/gc/dwinl.go b/src/cmd/compile/internal/gc/dwinl.go index 27e2cbcd98..5120fa1166 100644 --- a/src/cmd/compile/internal/gc/dwinl.go +++ b/src/cmd/compile/internal/gc/dwinl.go @@ -34,7 +34,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { // Walk progs to build up the InlCalls data structure var prevpos src.XPos - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := fnsym.Func().Text; p != nil; p = p.Link { if p.Pos == prevpos { continue } @@ -150,7 +150,7 @@ func assembleInlines(fnsym *obj.LSym, dwVars []*dwarf.Var) dwarf.InlCalls { start := int64(-1) curii := -1 var prevp *obj.Prog - for p := fnsym.Func.Text; p != nil; prevp, p = p, p.Link { + for p := fnsym.Func().Text; p != nil; prevp, p = p, p.Link { if prevp != nil && p.Pos == prevp.Pos { continue } diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go index 480d411f49..14c217ff3b 100644 --- a/src/cmd/compile/internal/gc/gsubr.go +++ b/src/cmd/compile/internal/gc/gsubr.go @@ -199,7 +199,7 @@ func (pp *Progs) settext(fn *Node) { ptxt := pp.Prog(obj.ATEXT) pp.Text = ptxt - fn.Func.lsym.Func.Text = ptxt + fn.Func.lsym.Func().Text = ptxt ptxt.From.Type = obj.TYPE_MEM ptxt.From.Name = obj.NAME_EXTERN ptxt.From.Sym = fn.Func.lsym diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index d7f4a94041..f6557e2d15 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -305,20 +305,21 @@ func dumpglobls() { // global symbols can't be declared during parallel compilation. func addGCLocals() { for _, s := range Ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } - for _, gcsym := range []*obj.LSym{s.Func.GCArgs, s.Func.GCLocals, s.Func.GCRegs} { + for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} { if gcsym != nil && !gcsym.OnList() { ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK) } } - if x := s.Func.StackObjects; x != nil { + if x := fn.StackObjects; x != nil { attr := int16(obj.RODATA) ggloblsym(x, int32(len(x.P)), attr) x.Set(obj.AttrStatic, true) } - if x := s.Func.OpenCodedDeferInfo; x != nil { + if x := fn.OpenCodedDeferInfo; x != nil { ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.DUPOK) } } diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 52b1ed351d..353f4b08c9 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -266,8 +266,8 @@ func compile(fn *Node) { dtypesym(n.Type) // Also make sure we allocate a linker symbol // for the stack object data, for the same reason. - if fn.Func.lsym.Func.StackObjects == nil { - fn.Func.lsym.Func.StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") + if fn.Func.lsym.Func().StackObjects == nil { + fn.Func.lsym.Func().StackObjects = Ctxt.Lookup(fn.Func.lsym.Name + ".stkobj") } } } @@ -415,7 +415,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S case PAUTO: if !n.Name.Used() { // Text == nil -> generating abstract function - if fnsym.Func.Text != nil { + if fnsym.Func().Text != nil { Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)") } continue @@ -425,7 +425,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S continue } apdecls = append(apdecls, n) - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } decls, dwarfVars := createDwarfVars(fnsym, fn.Func, apdecls) @@ -435,7 +435,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S // the function symbol to insure that the type included in DWARF // processing during linking. typesyms := []*obj.LSym{} - for t, _ := range fnsym.Func.Autot { + for t, _ := range fnsym.Func().Autot { typesyms = append(typesyms, t) } sort.Sort(obj.BySymName(typesyms)) @@ -444,7 +444,7 @@ func debuginfo(fnsym *obj.LSym, infosym *obj.LSym, curfn interface{}) ([]dwarf.S r.Sym = sym r.Type = objabi.R_USETYPE } - fnsym.Func.Autot = nil + fnsym.Func().Autot = nil var varScopes []ScopeID for _, decl := range decls { @@ -522,7 +522,7 @@ func createSimpleVar(fnsym *obj.LSym, n *Node) *dwarf.Var { } typename := dwarf.InfoPrefix + typesymname(n.Type) - delete(fnsym.Func.Autot, ngotype(n).Linksym()) + delete(fnsym.Func().Autot, ngotype(n).Linksym()) inlIndex := 0 if genDwarfInline > 1 { if n.Name.InlFormal() || n.Name.InlLocal() { @@ -667,7 +667,7 @@ func createDwarfVars(fnsym *obj.LSym, fn *Func, apDecls []*Node) ([]*Node, []*dw ChildIndex: -1, }) // Record go type of to insure that it gets emitted by the linker. - fnsym.Func.RecordAutoType(ngotype(n).Linksym()) + fnsym.Func().RecordAutoType(ngotype(n).Linksym()) } return decls, vars @@ -731,7 +731,7 @@ func createComplexVar(fnsym *obj.LSym, fn *Func, varID ssa.VarID) *dwarf.Var { } gotype := ngotype(n).Linksym() - delete(fnsym.Func.Autot, gotype) + delete(fnsym.Func().Autot, gotype) typename := dwarf.InfoPrefix + gotype.Name[len("type."):] inlIndex := 0 if genDwarfInline > 1 { diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index a9ea37701e..b471accb65 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -1552,26 +1552,27 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { // Emit the live pointer map data structures ls := e.curfn.Func.lsym - ls.Func.GCArgs, ls.Func.GCLocals, ls.Func.GCRegs = lv.emit() + fninfo := ls.Func() + fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit() p := pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCArgs + p.To.Sym = fninfo.GCArgs p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_LocalsPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCLocals + p.To.Sym = fninfo.GCLocals if !go115ReduceLiveness { p = pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCRegs + p.To.Sym = fninfo.GCRegs } return lv.livenessMap diff --git a/src/cmd/compile/internal/gc/scope.go b/src/cmd/compile/internal/gc/scope.go index d7239d5693..e66b859e10 100644 --- a/src/cmd/compile/internal/gc/scope.go +++ b/src/cmd/compile/internal/gc/scope.go @@ -62,9 +62,9 @@ func scopePCs(fnsym *obj.LSym, marks []Mark, dwarfScopes []dwarf.Scope) { if len(marks) == 0 { return } - p0 := fnsym.Func.Text + p0 := fnsym.Func().Text scope := findScope(marks, p0.Pos) - for p := fnsym.Func.Text; p != nil; p = p.Link { + for p := p0; p != nil; p = p.Link { if p.Pos == p0.Pos { continue } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 3d5fa4cd0a..d8f627c213 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -240,7 +240,7 @@ func dvarint(x *obj.LSym, off int, v int64) int { // - Offset of where argument should be placed in the args frame when making call func (s *state) emitOpenDeferInfo() { x := Ctxt.Lookup(s.curfn.Func.lsym.Name + ".opendefer") - s.curfn.Func.lsym.Func.OpenCodedDeferInfo = x + s.curfn.Func.lsym.Func().OpenCodedDeferInfo = x off := 0 // Compute maxargsize (max size of arguments for all defers) @@ -6108,7 +6108,7 @@ func emitStackObjects(e *ssafn, pp *Progs) { // Populate the stack object data. // Format must match runtime/stack.go:stackObjectRecord. - x := e.curfn.Func.lsym.Func.StackObjects + x := e.curfn.Func.lsym.Func().StackObjects off := 0 off = duintptr(x, off, uint64(len(vars))) for _, v := range vars { @@ -6145,7 +6145,7 @@ func genssa(f *ssa.Func, pp *Progs) { s.livenessMap = liveness(e, f, pp) emitStackObjects(e, pp) - openDeferInfo := e.curfn.Func.lsym.Func.OpenCodedDeferInfo + openDeferInfo := e.curfn.Func.lsym.Func().OpenCodedDeferInfo if openDeferInfo != nil { // This function uses open-coded defers -- write out the funcdata // info that we computed at the end of genssa. @@ -6350,7 +6350,7 @@ func genssa(f *ssa.Func, pp *Progs) { // some of the inline marks. // Use this instruction instead. p.Pos = p.Pos.WithIsStmt() // promote position to a statement - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[m]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[m]) // Make the inline mark a real nop, so it doesn't generate any code. m.As = obj.ANOP m.Pos = src.NoXPos @@ -6362,7 +6362,7 @@ func genssa(f *ssa.Func, pp *Progs) { // Any unmatched inline marks now need to be added to the inlining tree (and will generate a nop instruction). for _, p := range inlMarkList { if p.As != obj.ANOP { - pp.curfn.Func.lsym.Func.AddInlMark(p, inlMarks[p]) + pp.curfn.Func.lsym.Func().AddInlMark(p, inlMarks[p]) } } } diff --git a/src/cmd/internal/obj/arm/asm5.go b/src/cmd/internal/obj/arm/asm5.go index 269a4223d5..ebb98b4859 100644 --- a/src/cmd/internal/obj/arm/asm5.go +++ b/src/cmd/internal/obj/arm/asm5.go @@ -390,7 +390,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var p *obj.Prog var op *obj.Prog - p = cursym.Func.Text + p = cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -482,8 +482,8 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bflag = 0 pc = 0 times++ - c.cursym.Func.Text.Pc = 0 // force re-layout the code. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + c.cursym.Func().Text.Pc = 0 // force re-layout the code. + for p = c.cursym.Func().Text; p != nil; p = p.Link { o = c.oplook(p) if int64(pc) > p.Pc { p.Pc = int64(pc) @@ -558,7 +558,7 @@ func span5(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * perhaps we'd be able to parallelize the span loop above. */ - p = c.cursym.Func.Text + p = c.cursym.Func().Text c.autosize = p.To.Offset + 4 c.cursym.Grow(c.cursym.Size) diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go index 4d9187b530..f2bfb9679f 100644 --- a/src/cmd/internal/obj/arm/obj5.go +++ b/src/cmd/internal/obj/arm/obj5.go @@ -249,13 +249,13 @@ const ( func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset == -4 { // Historical way to mark NOFRAME. @@ -271,30 +271,30 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - cursym.Func.Locals = autoffset - cursym.Func.Args = p.To.Val.(int32) + cursym.Func().Locals = autoffset + cursym.Func().Args = p.To.Val.(int32) /* * find leaf subroutines */ - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF case ADIV, ADIVU, AMOD, AMODU: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF case ABL, ABX, obj.ADUFFZERO, obj.ADUFFCOPY: - cursym.Func.Text.Mark &^= LEAF + cursym.Func().Text.Mark &^= LEAF } } var q2 *obj.Prog - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -311,20 +311,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && cursym.Func.Text.Mark&LEAF == 0 { + if autosize == 0 && cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // are not identified as leaves but still have no frame. if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", cursym.Name) } - cursym.Func.Text.Mark |= LEAF + cursym.Func().Text.Mark |= LEAF } // FP offsets need an updated p.To.Offset. p.To.Offset = int64(autosize) - 4 - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -347,7 +347,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REGSP p.Spadj = autosize - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVW g_panic(g), R1 @@ -460,7 +460,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: nocache(p) - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AB p.From = obj.Addr{} @@ -508,7 +508,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } case ADIV, ADIVU, AMOD, AMODU: - if cursym.Func.Text.From.Sym.NoSplit() { + if cursym.Func().Text.From.Sym.NoSplit() { ctxt.Diag("cannot divide in NOSPLIT function") } const debugdivmod = false @@ -720,7 +720,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -751,7 +751,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -762,7 +762,7 @@ func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { b := obj.Appendp(pcdata, c.newprog) b.As = obj.AJMP b.To.Type = obj.TYPE_BRANCH - b.To.SetTarget(c.cursym.Func.Text.Link) + b.To.SetTarget(c.cursym.Func().Text.Link) b.Spadj = +framesize return end diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index c46066313e..6b9fe27c05 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -913,7 +913,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -943,8 +943,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { alignedValue := p.From.Offset m = pcAlignPadLength(pc, alignedValue, ctxt) // Update the current text symbol alignment value. - if int32(alignedValue) > cursym.Func.Align { - cursym.Func.Align = int32(alignedValue) + if int32(alignedValue) > cursym.Func().Align { + cursym.Func().Align = int32(alignedValue) } break case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: @@ -983,7 +983,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { if p.As == ADWORD && (pc&7) != 0 { pc += 4 } @@ -1047,7 +1047,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { psz := int32(0) var i int var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) @@ -1088,7 +1088,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index f1bc2583cb..0baf51973a 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -166,7 +166,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { end := c.ctxt.EndUnsafePoint(bls, c.newprog, -1) var last *obj.Prog - for last = c.cursym.Func.Text; last.Link != nil; last = last.Link { + for last = c.cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -209,7 +209,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { switch { case c.cursym.CFunc(): morestack = "runtime.morestackc" - case !c.cursym.Func.Text.From.Sym.NeedCtxt(): + case !c.cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = c.ctxt.Lookup(morestack) @@ -220,7 +220,7 @@ func (c *ctxt7) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { jmp := obj.Appendp(pcdata, c.newprog) jmp.As = AB jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(c.cursym.Func.Text.Link) + jmp.To.SetTarget(c.cursym.Func().Text.Link) jmp.Spadj = +framesize return end @@ -441,13 +441,13 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Historical way to mark NOFRAME. @@ -463,13 +463,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF @@ -477,18 +477,18 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF } } var q *obj.Prog var q1 *obj.Prog var retjmp *obj.LSym - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: - c.cursym.Func.Text = p + c.cursym.Func().Text = p c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { @@ -514,7 +514,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) } c.autosize += extrasize - c.cursym.Func.Locals += extrasize + c.cursym.Func().Locals += extrasize // low 32 bits for autosize // high 32 bits for extrasize @@ -524,14 +524,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Offset = 0 } - if c.autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { if c.ctxt.Debugvlog { - c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func.Text.From.Sym.Name) + c.ctxt.Logf("save suppressed in: %s\n", c.cursym.Func().Text.From.Sym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if cursym.Func.Text.Mark&LEAF != 0 { + if cursym.Func().Text.Mark&LEAF != 0 { cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -641,7 +641,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q1.To.Reg = REGFP } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retjmp = p.To.Sym p.To = obj.Addr{} - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if c.autosize != 0 { p.As = AADD p.From.Type = obj.TYPE_CONST diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go index 9abb31b558..328fb03b24 100644 --- a/src/cmd/internal/obj/dwarf.go +++ b/src/cmd/internal/obj/dwarf.go @@ -46,12 +46,12 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // we expect at the start of a new sequence. stmt := true line := int64(1) - pc := s.Func.Text.Pc + pc := s.Func().Text.Pc var lastpc int64 // last PC written to line table, not last PC in func name := "" prologue, wrotePrologue := false, false // Walk the progs, generating the DWARF table. - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd) // If we're not at a real instruction, keep looping! if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) { @@ -103,7 +103,7 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // text address before the end sequence op. If this isn't done, // GDB will assign a line number of zero the last row in the line // table, which we don't want. - lastlen := uint64(s.Size - (lastpc - s.Func.Text.Pc)) + lastlen := uint64(s.Size - (lastpc - s.Func().Text.Pc)) putpclcdelta(ctxt, dctxt, lines, lastlen, 0) dctxt.AddUint8(lines, 0) // start extended opcode dwarf.Uleb128put(dctxt, lines, 1) @@ -301,26 +301,27 @@ func (ctxt *Link) dwarfSym(s *LSym) (dwarfInfoSym, dwarfLocSym, dwarfRangesSym, if s.Type != objabi.STEXT { ctxt.Diag("dwarfSym of non-TEXT %v", s) } - if s.Func.dwarfInfoSym == nil { - s.Func.dwarfInfoSym = &LSym{ + fn := s.Func() + if fn.dwarfInfoSym == nil { + fn.dwarfInfoSym = &LSym{ Type: objabi.SDWARFFCN, } if ctxt.Flag_locationlists { - s.Func.dwarfLocSym = &LSym{ + fn.dwarfLocSym = &LSym{ Type: objabi.SDWARFLOC, } } - s.Func.dwarfRangesSym = &LSym{ + fn.dwarfRangesSym = &LSym{ Type: objabi.SDWARFRANGE, } - s.Func.dwarfDebugLinesSym = &LSym{ + fn.dwarfDebugLinesSym = &LSym{ Type: objabi.SDWARFLINES, } if s.WasInlined() { - s.Func.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) + fn.dwarfAbsFnSym = ctxt.DwFixups.AbsFuncDwarfSym(s) } } - return s.Func.dwarfInfoSym, s.Func.dwarfLocSym, s.Func.dwarfRangesSym, s.Func.dwarfAbsFnSym, s.Func.dwarfDebugLinesSym + return fn.dwarfInfoSym, fn.dwarfLocSym, fn.dwarfRangesSym, fn.dwarfAbsFnSym, fn.dwarfDebugLinesSym } func (s *LSym) Length(dwarfContext interface{}) int64 { @@ -331,7 +332,7 @@ func (s *LSym) Length(dwarfContext interface{}) int64 { // first instruction (prog) of the specified function. This will // presumably be the file in which the function is defined. func (ctxt *Link) fileSymbol(fn *LSym) *LSym { - p := fn.Func.Text + p := fn.Func().Text if p != nil { f, _ := linkgetlineFromPos(ctxt, p.Pos) fsym := ctxt.Lookup(f) @@ -405,8 +406,8 @@ func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath str if absfn.Size != 0 { ctxt.Diag("internal error: DwarfAbstractFunc double process %v", s) } - if s.Func == nil { - s.Func = new(FuncInfo) + if s.Func() == nil { + s.NewFuncInfo() } scopes, _ := ctxt.DebugInfo(s, absfn, curfn) dwctxt := dwCtxt{ctxt} @@ -527,8 +528,8 @@ func (ft *DwarfFixupTable) SetPrecursorFunc(s *LSym, fn interface{}) { // wrapper generation as opposed to the main inlining phase) it's // possible that we didn't cache the abstract function sym for the // text symbol -- do so now if needed. See issue 38068. - if s.Func != nil && s.Func.dwarfAbsFnSym == nil { - s.Func.dwarfAbsFnSym = absfn + if fn := s.Func(); fn != nil && fn.dwarfAbsFnSym == nil { + fn.dwarfAbsFnSym = absfn } ft.precursor[s] = fnState{precursor: fn, absfn: absfn} diff --git a/src/cmd/internal/obj/ld.go b/src/cmd/internal/obj/ld.go index 4ba52c7785..5d6c000dc6 100644 --- a/src/cmd/internal/obj/ld.go +++ b/src/cmd/internal/obj/ld.go @@ -59,7 +59,7 @@ func mkfwd(sym *LSym) { } i := 0 - for p := sym.Func.Text; p != nil && p.Link != nil; p = p.Link { + for p := sym.Func().Text; p != nil && p.Link != nil; p = p.Link { i-- if i < 0 { i = LOG - 1 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index f14b691802..ae85dbbe4e 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -38,6 +38,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "fmt" + "log" "sync" ) @@ -400,7 +401,7 @@ type LSym struct { P []byte R []Reloc - Func *FuncInfo + Extra *interface{} // *FuncInfo if present Pkg string PkgIdx int32 @@ -433,6 +434,26 @@ type FuncInfo struct { FuncInfoSym *LSym } +// NewFuncInfo allocates and returns a FuncInfo for LSym. +func (s *LSym) NewFuncInfo() *FuncInfo { + if s.Extra != nil { + log.Fatalf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra) + } + f := new(FuncInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// Func returns the *FuncInfo associated with s, or else nil. +func (s *LSym) Func() *FuncInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FuncInfo) + return f +} + type InlMark struct { // When unwinding from an instruction in an inlined body, mark // where we should unwind to. diff --git a/src/cmd/internal/obj/mips/asm0.go b/src/cmd/internal/obj/mips/asm0.go index 6107974745..fd29f9fa21 100644 --- a/src/cmd/internal/obj/mips/asm0.go +++ b/src/cmd/internal/obj/mips/asm0.go @@ -410,7 +410,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -455,7 +455,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -512,7 +512,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [4]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { @@ -529,7 +529,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, c.isRestartable) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) } // isUnsafePoint returns whether p is an unsafe point. @@ -1302,7 +1302,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = OP_JMP(c.opirr(p.As), uint32(v)) if p.To.Sym == nil { - p.To.Sym = c.cursym.Func.Text.From.Sym + p.To.Sym = c.cursym.Func().Text.From.Sym p.To.Offset = p.To.Target().Pc } rel := obj.Addrel(c.cursym) diff --git a/src/cmd/internal/obj/mips/obj0.go b/src/cmd/internal/obj/mips/obj0.go index f19facc00c..135a8df3aa 100644 --- a/src/cmd/internal/obj/mips/obj0.go +++ b/src/cmd/internal/obj/mips/obj0.go @@ -133,11 +133,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a switch for enabling/disabling instruction scheduling nosched := true - if c.cursym.Func.Text == nil || c.cursym.Func.Text.Link == nil { + if c.cursym.Func().Text == nil || c.cursym.Func().Text.Link == nil { return } - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -ctxt.FixedFrameSize() { // Historical way to mark NOFRAME. @@ -153,8 +153,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -162,7 +162,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { * expand BECOME pseudo */ - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -203,7 +203,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { AJAL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case AJMP, @@ -267,7 +267,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -288,19 +288,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize += 4 } - if autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 { - if c.cursym.Func.Text.From.Sym.NoSplit() { + if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.NoSplit() { if ctxt.Debugvlog { ctxt.Logf("save suppressed in: %s\n", c.cursym.Name) } - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } } p.To.Offset = int64(autosize) - ctxt.FixedFrameSize() - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) if p.From.Sym.NoFrame() { break @@ -344,7 +344,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - if c.cursym.Func.Text.From.Sym.Wrapper() && c.cursym.Func.Text.Mark&LEAF == 0 { + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 @@ -438,7 +438,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction p.To.Sym = nil - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AJMP p.From = obj.Addr{} @@ -540,7 +540,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if c.ctxt.Arch.Family == sys.MIPS { // rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access - for p = c.cursym.Func.Text; p != nil; p = p1 { + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link if p.As != AMOVD { @@ -580,7 +580,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if nosched { // if we don't do instruction scheduling, simply add // NOP after each branch instruction. - for p = c.cursym.Func.Text; p != nil; p = p.Link { + for p = c.cursym.Func().Text; p != nil; p = p.Link { if p.Mark&BRANCH != 0 { c.addnop(p) } @@ -589,10 +589,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // instruction scheduling - q = nil // p - 1 - q1 = c.cursym.Func.Text // top of block - o := 0 // count of instructions - for p = c.cursym.Func.Text; p != nil; p = p1 { + q = nil // p - 1 + q1 = c.cursym.Func().Text // top of block + o := 0 // count of instructions + for p = c.cursym.Func().Text; p != nil; p = p1 { p1 = p.Link o++ if p.Mark&NOSCHED != 0 { @@ -791,7 +791,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -805,7 +805,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { p.As = AJMP p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) p.Mark |= BRANCH // placeholder for q1's jump target diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index fa60c9ad6d..a08de891d3 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -189,8 +189,8 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { // object file, and the Pcln variables haven't been filled in. As such, we // need to check that Pcsp exists, and assume the other pcln variables exist // as well. Tests like test/fixedbugs/issue22200.go demonstrate this issue. - if s.Func != nil && s.Func.Pcln.Pcsp != nil { - pc := &s.Func.Pcln + if fn := s.Func(); fn != nil && fn.Pcln.Pcsp != nil { + pc := &fn.Pcln w.Bytes(pc.Pcsp.P) w.Bytes(pc.Pcfile.P) w.Bytes(pc.Pcline.P) @@ -303,8 +303,8 @@ func (w *writer) Sym(s *LSym) { name = filepath.ToSlash(name) } var align uint32 - if s.Func != nil { - align = uint32(s.Func.Align) + if fn := s.Func(); fn != nil { + align = uint32(fn.Align) } if s.ContentAddressable() { // We generally assume data symbols are natually aligned, @@ -470,38 +470,38 @@ func (w *writer) Aux(s *LSym) { if s.Gotype != nil { w.aux1(goobj.AuxGotype, s.Gotype) } - if s.Func != nil { - w.aux1(goobj.AuxFuncInfo, s.Func.FuncInfoSym) + if fn := s.Func(); fn != nil { + w.aux1(goobj.AuxFuncInfo, fn.FuncInfoSym) - for _, d := range s.Func.Pcln.Funcdata { + for _, d := range fn.Pcln.Funcdata { w.aux1(goobj.AuxFuncdata, d) } - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { - w.aux1(goobj.AuxDwarfInfo, s.Func.dwarfInfoSym) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { + w.aux1(goobj.AuxDwarfInfo, fn.dwarfInfoSym) } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { - w.aux1(goobj.AuxDwarfLoc, s.Func.dwarfLocSym) + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { + w.aux1(goobj.AuxDwarfLoc, fn.dwarfLocSym) } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { - w.aux1(goobj.AuxDwarfRanges, s.Func.dwarfRangesSym) + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { + w.aux1(goobj.AuxDwarfRanges, fn.dwarfRangesSym) } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { - w.aux1(goobj.AuxDwarfLines, s.Func.dwarfDebugLinesSym) + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { + w.aux1(goobj.AuxDwarfLines, fn.dwarfDebugLinesSym) } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { - w.aux1(goobj.AuxPcsp, s.Func.Pcln.Pcsp) + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { + w.aux1(goobj.AuxPcsp, fn.Pcln.Pcsp) } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { - w.aux1(goobj.AuxPcfile, s.Func.Pcln.Pcfile) + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { + w.aux1(goobj.AuxPcfile, fn.Pcln.Pcfile) } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { - w.aux1(goobj.AuxPcline, s.Func.Pcln.Pcline) + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { + w.aux1(goobj.AuxPcline, fn.Pcln.Pcline) } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { - w.aux1(goobj.AuxPcinline, s.Func.Pcln.Pcinline) + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { + w.aux1(goobj.AuxPcinline, fn.Pcln.Pcinline) } - for _, pcSym := range s.Func.Pcln.Pcdata { + for _, pcSym := range fn.Pcln.Pcdata { w.aux1(goobj.AuxPcdata, pcSym) } @@ -571,34 +571,34 @@ func nAuxSym(s *LSym) int { if s.Gotype != nil { n++ } - if s.Func != nil { + if fn := s.Func(); fn != nil { // FuncInfo is an aux symbol, each Funcdata is an aux symbol - n += 1 + len(s.Func.Pcln.Funcdata) - if s.Func.dwarfInfoSym != nil && s.Func.dwarfInfoSym.Size != 0 { + n += 1 + len(fn.Pcln.Funcdata) + if fn.dwarfInfoSym != nil && fn.dwarfInfoSym.Size != 0 { n++ } - if s.Func.dwarfLocSym != nil && s.Func.dwarfLocSym.Size != 0 { + if fn.dwarfLocSym != nil && fn.dwarfLocSym.Size != 0 { n++ } - if s.Func.dwarfRangesSym != nil && s.Func.dwarfRangesSym.Size != 0 { + if fn.dwarfRangesSym != nil && fn.dwarfRangesSym.Size != 0 { n++ } - if s.Func.dwarfDebugLinesSym != nil && s.Func.dwarfDebugLinesSym.Size != 0 { + if fn.dwarfDebugLinesSym != nil && fn.dwarfDebugLinesSym.Size != 0 { n++ } - if s.Func.Pcln.Pcsp != nil && s.Func.Pcln.Pcsp.Size != 0 { + if fn.Pcln.Pcsp != nil && fn.Pcln.Pcsp.Size != 0 { n++ } - if s.Func.Pcln.Pcfile != nil && s.Func.Pcln.Pcfile.Size != 0 { + if fn.Pcln.Pcfile != nil && fn.Pcln.Pcfile.Size != 0 { n++ } - if s.Func.Pcln.Pcline != nil && s.Func.Pcln.Pcline.Size != 0 { + if fn.Pcln.Pcline != nil && fn.Pcln.Pcline.Size != 0 { n++ } - if s.Func.Pcln.Pcinline != nil && s.Func.Pcln.Pcinline.Size != 0 { + if fn.Pcln.Pcinline != nil && fn.Pcln.Pcinline.Size != 0 { n++ } - n += len(s.Func.Pcln.Pcdata) + n += len(fn.Pcln.Pcdata) } return n } @@ -620,15 +620,16 @@ func genFuncInfoSyms(ctxt *Link) { var b bytes.Buffer symidx := int32(len(ctxt.defs)) for _, s := range ctxt.Text { - if s.Func == nil { + fn := s.Func() + if fn == nil { continue } o := goobj.FuncInfo{ - Args: uint32(s.Func.Args), - Locals: uint32(s.Func.Locals), - FuncID: objabi.FuncID(s.Func.FuncID), + Args: uint32(fn.Args), + Locals: uint32(fn.Locals), + FuncID: objabi.FuncID(fn.FuncID), } - pc := &s.Func.Pcln + pc := &fn.Pcln o.Pcsp = makeSymRef(preparePcSym(pc.Pcsp)) o.Pcfile = makeSymRef(preparePcSym(pc.Pcfile)) o.Pcline = makeSymRef(preparePcSym(pc.Pcline)) @@ -670,10 +671,10 @@ func genFuncInfoSyms(ctxt *Link) { isym.Set(AttrIndexed, true) symidx++ infosyms = append(infosyms, isym) - s.Func.FuncInfoSym = isym + fn.FuncInfoSym = isym b.Reset() - dwsyms := []*LSym{s.Func.dwarfRangesSym, s.Func.dwarfLocSym, s.Func.dwarfDebugLinesSym, s.Func.dwarfInfoSym} + dwsyms := []*LSym{fn.dwarfRangesSym, fn.dwarfLocSym, fn.dwarfDebugLinesSym, fn.dwarfInfoSym} for _, s := range dwsyms { if s == nil || s.Size == 0 { continue @@ -744,14 +745,15 @@ func (ctxt *Link) writeSymDebugNamed(s *LSym, name string) { } fmt.Fprintf(ctxt.Bso, "size=%d", s.Size) if s.Type == objabi.STEXT { - fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(s.Func.Args), uint64(s.Func.Locals), uint64(s.Func.FuncID)) + fn := s.Func() + fmt.Fprintf(ctxt.Bso, " args=%#x locals=%#x funcid=%#x", uint64(fn.Args), uint64(fn.Locals), uint64(fn.FuncID)) if s.Leaf() { fmt.Fprintf(ctxt.Bso, " leaf") } } fmt.Fprintf(ctxt.Bso, "\n") if s.Type == objabi.STEXT { - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { fmt.Fprintf(ctxt.Bso, "\t%#04x ", uint(int(p.Pc))) if ctxt.Debugasm > 1 { io.WriteString(ctxt.Bso, p.String()) diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go index 09d520b4e9..01657dd4f6 100644 --- a/src/cmd/internal/obj/pass.go +++ b/src/cmd/internal/obj/pass.go @@ -118,7 +118,7 @@ func checkaddr(ctxt *Link, p *Prog, a *Addr) { } func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { checkaddr(ctxt, p, &p.From) if p.GetFrom3() != nil { checkaddr(ctxt, p, p.GetFrom3()) @@ -138,7 +138,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { if p.To.Sym != nil { continue } - q := sym.Func.Text + q := sym.Func().Text for q != nil && p.To.Offset != q.Pc { if q.Forwd != nil && p.To.Offset >= q.Forwd.Pc { q = q.Forwd @@ -164,7 +164,7 @@ func linkpatch(ctxt *Link, sym *LSym, newprog ProgAlloc) { } // Collapse series of jumps to jumps. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { if p.To.Target() == nil { continue } diff --git a/src/cmd/internal/obj/pcln.go b/src/cmd/internal/obj/pcln.go index ce0d3714c0..67c4f9a62b 100644 --- a/src/cmd/internal/obj/pcln.go +++ b/src/cmd/internal/obj/pcln.go @@ -35,20 +35,21 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, val := int32(-1) oldval := val - if func_.Func.Text == nil { + fn := func_.Func() + if fn.Text == nil { // Return the emtpy symbol we've built so far. return sym } - pc := func_.Func.Text.Pc + pc := fn.Text.Pc if dbg { - ctxt.Logf("%6x %6d %v\n", uint64(pc), val, func_.Func.Text) + ctxt.Logf("%6x %6d %v\n", uint64(pc), val, fn.Text) } buf := make([]byte, binary.MaxVarintLen32) started := false - for p := func_.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { // Update val. If it's not changing, keep going. val = valfunc(ctxt, func_, val, p, 0, arg) @@ -107,7 +108,7 @@ func funcpctab(ctxt *Link, func_ *LSym, desc string, valfunc func(*Link, *LSym, if started { if dbg { - ctxt.Logf("%6x done\n", uint64(func_.Func.Text.Pc+func_.Size)) + ctxt.Logf("%6x done\n", uint64(fn.Text.Pc+func_.Size)) } v := (func_.Size - pc) / int64(ctxt.Arch.MinLC) if v < 0 { @@ -257,12 +258,12 @@ func pctopcdata(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg i } func linkpcln(ctxt *Link, cursym *LSym) { - pcln := &cursym.Func.Pcln + pcln := &cursym.Func().Pcln pcln.UsedFiles = make(map[goobj.CUFileIndex]struct{}) npcdata := 0 nfuncdata := 0 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { // Find the highest ID of any used PCDATA table. This ignores PCDATA table // that consist entirely of "-1", since that's the assumed default value. // From.Offset is table ID @@ -288,11 +289,12 @@ func linkpcln(ctxt *Link, cursym *LSym) { // Check that all the Progs used as inline markers are still reachable. // See issue #40473. - inlMarkProgs := make(map[*Prog]struct{}, len(cursym.Func.InlMarks)) - for _, inlMark := range cursym.Func.InlMarks { + fn := cursym.Func() + inlMarkProgs := make(map[*Prog]struct{}, len(fn.InlMarks)) + for _, inlMark := range fn.InlMarks { inlMarkProgs[inlMark.p] = struct{}{} } - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if _, ok := inlMarkProgs[p]; ok { delete(inlMarkProgs, p) } @@ -303,7 +305,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { pcinlineState := new(pcinlineState) pcln.Pcinline = funcpctab(ctxt, cursym, "pctoinline", pcinlineState.pctoinline, nil) - for _, inlMark := range cursym.Func.InlMarks { + for _, inlMark := range fn.InlMarks { pcinlineState.setParentPC(ctxt, int(inlMark.id), int32(inlMark.p.Pc)) } pcln.InlTree = pcinlineState.localTree @@ -316,7 +318,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // tabulate which pc and func data we have. havepc := make([]uint32, (npcdata+31)/32) havefunc := make([]uint32, (nfuncdata+31)/32) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As == AFUNCDATA { if (havefunc[p.From.Offset/32]>>uint64(p.From.Offset%32))&1 != 0 { ctxt.Diag("multiple definitions for FUNCDATA $%d", p.From.Offset) @@ -344,7 +346,7 @@ func linkpcln(ctxt *Link, cursym *LSym) { // funcdata if nfuncdata > 0 { - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := fn.Text; p != nil; p = p.Link { if p.As != AFUNCDATA { continue } diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index 6e33f29959..eb54c67f6a 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -81,7 +81,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string continue } found := false - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.As == AFUNCDATA && p.From.Type == TYPE_CONST && p.From.Offset == objabi.FUNCDATA_ArgsPointerMaps { found = true break @@ -89,7 +89,7 @@ func Flushplist(ctxt *Link, plist *Plist, newprog ProgAlloc, myimportpath string } if !found { - p := Appendp(s.Func.Text, newprog) + p := Appendp(s.Func().Text, newprog) p.As = AFUNCDATA p.From.Type = TYPE_CONST p.From.Offset = objabi.FUNCDATA_ArgsPointerMaps @@ -120,15 +120,15 @@ func (ctxt *Link) InitTextSym(s *LSym, flag int) { // func _() { } return } - if s.Func != nil { + if s.Func() != nil { ctxt.Diag("InitTextSym double init for %s", s.Name) } - s.Func = new(FuncInfo) + s.NewFuncInfo() if s.OnList() { ctxt.Diag("symbol %s listed multiple times", s.Name) } name := strings.Replace(s.Name, "\"\"", ctxt.Pkgpath, -1) - s.Func.FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) + s.Func().FuncID = objabi.GetFuncID(name, flag&WRAPPER != 0) s.Set(AttrOnList, true) s.Set(AttrDuplicateOK, flag&DUPOK != 0) s.Set(AttrNoSplit, flag&NOSPLIT != 0) @@ -185,7 +185,7 @@ func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit stack map. func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_StackMapIndex @@ -198,7 +198,7 @@ func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { // Similar to EmitEntryLiveness, but just emit register map. func (ctxt *Link) EmitEntryRegMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) - pcdata.Pos = s.Func.Text.Pos + pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST pcdata.From.Offset = objabi.PCDATA_RegMapIndex diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index c2e8e9e9d0..dcabb3cd6a 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -663,8 +663,8 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { // the function alignment is not changed which might // result in 16 byte alignment but that is still fine. // TODO: alignment on AIX - if ctxt.Headtype != objabi.Haix && cursym.Func.Align < 32 { - cursym.Func.Align = 32 + if ctxt.Headtype != objabi.Haix && cursym.Func().Align < 32 { + cursym.Func().Align = 32 } default: ctxt.Diag("Unexpected alignment: %d for PCALIGN directive\n", a) @@ -673,7 +673,7 @@ func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { } func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -722,7 +722,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for bflag != 0 { bflag = 0 pc = 0 - for p = c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc o = c.oplook(p) @@ -784,7 +784,7 @@ func span9(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { bp := c.cursym.P var i int32 var out [6]uint32 - for p := c.cursym.Func.Text.Link; p != nil; p = p.Link { + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc o = c.oplook(p) if int(o.size) > 4*len(out) { diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index c012762a18..3ab19de602 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -402,13 +402,13 @@ func (c *ctxt9) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxt9{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -424,8 +424,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -435,7 +435,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var q *obj.Prog var q1 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: @@ -541,7 +541,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -598,7 +598,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: @@ -664,7 +664,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { rel.Type = objabi.R_ADDRPOWER_PCREL } - if !c.cursym.Func.Text.From.Sym.NoSplit() { + if !c.cursym.Func().Text.From.Sym.NoSplit() { q = c.stacksplit(q, autosize) // emit split check } @@ -732,14 +732,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) } - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } @@ -755,7 +755,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Offset = 24 } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -853,7 +853,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 || c.cursym.Name == "runtime.racecallbackthunk" { p.As = ABR p.From = obj.Addr{} @@ -1161,7 +1161,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { var morestacksym *obj.LSym if c.cursym.CFunc() { morestacksym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { morestacksym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { morestacksym = c.ctxt.Lookup("runtime.morestack") diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 841b30d85c..045c2250b5 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -427,7 +427,7 @@ func InvertBranch(as obj.As) obj.As { // instruction. Must be called after progedit. func containsCall(sym *obj.LSym) bool { // CALLs are CALL or JAL(R) with link register LR. - for p := sym.Func.Text; p != nil; p = p.Link { + for p := sym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ACALL: return true @@ -499,12 +499,12 @@ func stackOffset(a *obj.Addr, stacksize int64) { // concrete, real RISC-V instructions or directive pseudo-ops like TEXT, // PCDATA, and FUNCDATA. func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } // Generate the prologue. - text := cursym.Func.Text + text := cursym.Func().Text if text.As != obj.ATEXT { ctxt.Diag("preprocess: found symbol that does not start with TEXT directive") return @@ -538,12 +538,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { stacksize += ctxt.FixedFrameSize() } - cursym.Func.Args = text.To.Val.(int32) - cursym.Func.Locals = int32(stacksize) + cursym.Func().Args = text.To.Val.(int32) + cursym.Func().Locals = int32(stacksize) prologue := text - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check } @@ -567,7 +567,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { prologue = ctxt.EndUnsafePoint(prologue, newprog, -1) } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), X11 @@ -647,13 +647,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Update stack-based offsets. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { stackOffset(&p.From, stacksize) stackOffset(&p.To, stacksize) } // Additional instruction rewriting. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.AGETCALLERPC: if cursym.Leaf() { @@ -733,7 +733,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Rewrite MOV pseudo-instructions. This cannot be done in // progedit, as SP offsets need to be applied before we split // up some of the Addrs. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: rewriteMOV(ctxt, newprog, p) @@ -741,7 +741,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Split immediates larger than 12-bits. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { // $imm, REG, TO case AADDI, AANDI, AORI, AXORI: @@ -858,9 +858,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // a fixed point will be reached). No attempt to handle functions > 2GiB. for { rescan := false - setPCs(cursym.Func.Text, 0) + setPCs(cursym.Func().Text, 0) - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: if p.To.Type != obj.TYPE_BRANCH { @@ -917,7 +917,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // Now that there are no long branches, resolve branch and jump targets. // At this point, instruction rewriting which changes the number of // instructions will break everything--don't do it! - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: switch p.To.Type { @@ -940,7 +940,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } // Validate all instructions - this provides nice error messages. - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { for _, ins := range instructionsForProg(p) { ins.validate(ctxt) } @@ -1068,7 +1068,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.To.Type = obj.TYPE_BRANCH if cursym.CFunc() { p.To.Sym = ctxt.Lookup("runtime.morestackc") - } else if !cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = ctxt.Lookup("runtime.morestack") @@ -1083,7 +1083,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA p.As = AJAL p.To = obj.Addr{Type: obj.TYPE_BRANCH} p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} - p.To.SetTarget(cursym.Func.Text.Link) + p.To.SetTarget(cursym.Func().Text.Link) // placeholder for to_done's jump target p = obj.Appendp(p, newprog) @@ -1926,7 +1926,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var symcode []uint32 - for p := cursym.Func.Text; p != nil; p = p.Link { + for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { case AJALR: if p.To.Sym != nil { @@ -1981,7 +1981,7 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Arch.ByteOrder.PutUint32(p, symcode[i]) } - obj.MarkUnsafePoints(ctxt, cursym.Func.Text, newprog, isUnsafePoint, nil) + obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil) } func isUnsafePoint(p *obj.Prog) bool { diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index cb3a2c3196..da14dd3c41 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -447,7 +447,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Retpoline = false // don't keep printing } - p := cursym.Func.Text + p := cursym.Func().Text if p == nil || p.Link == nil { // handle external functions and ELF section symbols return } @@ -473,7 +473,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.cursym.R[nrelocs0+i] = obj.Reloc{} } c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pc := int64(len(buffer)) if pc != p.Pc { changed = true @@ -504,7 +504,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // We use REGTMP as a scratch register during call injection, // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. - obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint, nil) + obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, nil) } // Return whether p is an unsafe point. diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 625bb0f7b4..3af5425b36 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -205,13 +205,13 @@ func (c *ctxtz) rewriteToUseGot(p *obj.Prog) { func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // TODO(minux): add morestack short-cuts with small fixed frame-size. - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } c := ctxtz{ctxt: ctxt, cursym: cursym, newprog: newprog} - p := c.cursym.Func.Text + p := c.cursym.Func().Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. @@ -227,8 +227,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } - c.cursym.Func.Args = p.To.Val.(int32) - c.cursym.Func.Locals = int32(textstksiz) + c.cursym.Func().Args = p.To.Val.(int32) + c.cursym.Func().Locals = int32(textstksiz) /* * find leaf subroutines @@ -237,7 +237,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { */ var q *obj.Prog - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: q = p @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case ABL, ABCL: q = p - c.cursym.Func.Text.Mark &^= LEAF + c.cursym.Func().Text.Mark &^= LEAF fallthrough case ABC, @@ -294,7 +294,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false - for p := c.cursym.Func.Text; p != nil; p = p.Link { + for p := c.cursym.Func().Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: @@ -356,19 +356,19 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.Spadj = autosize q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) - } else if c.cursym.Func.Text.Mark&LEAF == 0 { + } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. - c.cursym.Func.Text.Mark |= LEAF + c.cursym.Func().Text.Mark |= LEAF } - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { c.cursym.Set(obj.AttrLeaf, true) break } - if c.cursym.Func.Text.From.Sym.Wrapper() { + if c.cursym.Func().Text.From.Sym.Wrapper() { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 @@ -461,7 +461,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ARET: retTarget := p.To.Sym - if c.cursym.Func.Text.Mark&LEAF != 0 { + if c.cursym.Func().Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} @@ -696,7 +696,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.To.Type = obj.TYPE_BRANCH if c.cursym.CFunc() { p.To.Sym = c.ctxt.Lookup("runtime.morestackc") - } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() { + } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() { p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt") } else { p.To.Sym = c.ctxt.Lookup("runtime.morestack") @@ -709,7 +709,7 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog, p.As = ABR p.To.Type = obj.TYPE_BRANCH - p.To.SetTarget(c.cursym.Func.Text.Link) + p.To.SetTarget(c.cursym.Func().Text.Link) return p } diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index e5d7b2cbfd..0182773f8e 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -358,7 +358,8 @@ func (ctxt *Link) traverseSyms(flag traverseFlag, fn func(*LSym)) { } func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent *LSym, aux *LSym)) { - pc := &fsym.Func.Pcln + fninfo := fsym.Func() + pc := &fninfo.Pcln if flag&traverseAux == 0 { // NB: should it become necessary to walk aux sym reloc references // without walking the aux syms themselves, this can be changed. @@ -389,7 +390,8 @@ func (ctxt *Link) traverseFuncAux(flag traverseFlag, fsym *LSym, fn func(parent fn(fsym, filesym) } } - dwsyms := []*LSym{fsym.Func.dwarfRangesSym, fsym.Func.dwarfLocSym, fsym.Func.dwarfDebugLinesSym, fsym.Func.dwarfInfoSym} + + dwsyms := []*LSym{fninfo.dwarfRangesSym, fninfo.dwarfLocSym, fninfo.dwarfDebugLinesSym, fninfo.dwarfInfoSym} for _, dws := range dwsyms { if dws == nil || dws.Size == 0 { continue diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index a9e093a8ad..f7f66a1255 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -182,14 +182,14 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { return p } - framesize := s.Func.Text.To.Offset + framesize := s.Func().Text.To.Offset if framesize < 0 { panic("bad framesize") } - s.Func.Args = s.Func.Text.To.Val.(int32) - s.Func.Locals = int32(framesize) + s.Func().Args = s.Func().Text.To.Val.(int32) + s.Func().Locals = int32(framesize) - if s.Func.Text.From.Sym.Wrapper() { + if s.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -222,7 +222,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { Offset: 0, // panic.argp } - p := s.Func.Text + p := s.Func().Text p = appendp(p, AMOVD, gpanic, regAddr(REG_R0)) p = appendp(p, AGet, regAddr(REG_R0)) @@ -245,7 +245,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } if framesize > 0 { - p := s.Func.Text + p := s.Func().Text p = appendp(p, AGet, regAddr(REG_SP)) p = appendp(p, AI32Const, constAddr(framesize)) p = appendp(p, AI32Sub) @@ -260,8 +260,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction var tableIdxs []uint64 tablePC := int64(0) - base := ctxt.PosTable.Pos(s.Func.Text.Pos).Base() - for p := s.Func.Text; p != nil; p = p.Link { + base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base() + for p := s.Func().Text; p != nil; p = p.Link { prevBase := base base = ctxt.PosTable.Pos(p.Pos).Base() switch p.As { @@ -313,8 +313,8 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { tableIdxs = append(tableIdxs, uint64(numResumePoints)) s.Size = pc + 1 - if !s.Func.Text.From.Sym.NoSplit() { - p := s.Func.Text + if !s.Func().Text.From.Sym.NoSplit() { + p := s.Func().Text if framesize <= objabi.StackSmall { // small stack: SP <= stackguard @@ -352,7 +352,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { p = appendp(p, AIf) p = appendp(p, obj.ACALL, constAddr(0)) - if s.Func.Text.From.Sym.NeedCtxt() { + if s.Func().Text.From.Sym.NeedCtxt() { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack} } else { p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt} @@ -365,7 +365,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var entryPointLoopBranches []*obj.Prog var unwindExitBranches []*obj.Prog currentDepth := 0 - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -562,7 +562,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.From.Name { case obj.NAME_AUTO: p.From.Offset += int64(framesize) @@ -712,7 +712,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } { - p := s.Func.Text + p := s.Func().Text if len(unwindExitBranches) > 0 { p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack for _, b := range unwindExitBranches { @@ -749,7 +749,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { currentDepth = 0 blockDepths := make(map[*obj.Prog]int) - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case ABlock, ALoop, AIf: currentDepth++ @@ -850,7 +850,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { hasLocalSP = true var regUsed [MAXREG - MINREG]bool - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.From.Reg != 0 { regUsed[p.From.Reg-MINREG] = true } @@ -896,7 +896,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { updateLocalSP(w) } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { switch p.As { case AGet: if p.From.Type != obj.TYPE_REG { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 4940c79eaa..c412f4945d 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2050,7 +2050,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("x86 tables not initialized, call x86.instinit first") } - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { p.To.SetTarget(p) } @@ -2085,7 +2085,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } var count int64 // rough count of number of instructions - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { count++ p.Back = branchShort // use short branches first time through if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { @@ -2113,7 +2113,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { c = 0 var pPrev *obj.Prog nops = nops[:0] - for p := s.Func.Text; p != nil; p = p.Link { + for p := s.Func().Text; p != nil; p = p.Link { c0 := c c = pjc.padJump(ctxt, s, p, c) @@ -2227,7 +2227,7 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { // the first instruction.) return p.From.Index == REG_TLS } - obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil) + obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) } } diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 18a6afcd77..e11fa13f65 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -563,11 +563,11 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if cursym.Func.Text == nil || cursym.Func.Text.Link == nil { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return } - p := cursym.Func.Text + p := cursym.Func().Text autoffset := int32(p.To.Offset) if autoffset < 0 { autoffset = 0 @@ -602,12 +602,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } textarg := int64(p.To.Val.(int32)) - cursym.Func.Args = int32(textarg) - cursym.Func.Locals = int32(p.To.Offset) + cursym.Func().Args = int32(textarg) + cursym.Func().Locals = int32(p.To.Offset) // TODO(rsc): Remove. - if ctxt.Arch.Family == sys.I386 && cursym.Func.Locals < 0 { - cursym.Func.Locals = 0 + if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 { + cursym.Func().Locals = 0 } // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'. @@ -642,7 +642,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p = load_g_cx(ctxt, p, newprog) // load g into CX } - if !cursym.Func.Text.From.Sym.NoSplit() { + if !cursym.Func().Text.From.Sym.NoSplit() { p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check } @@ -690,7 +690,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.To.Reg = REG_BP } - if cursym.Func.Text.From.Sym.Wrapper() { + if cursym.Func().Text.From.Sym.Wrapper() { // if g._panic != nil && g._panic.argp == FP { // g._panic.argp = bottom-of-frame // } @@ -808,7 +808,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } var deltasp int32 - for p = cursym.Func.Text; p != nil; p = p.Link { + for p = cursym.Func().Text; p != nil; p = p.Link { pcsize := ctxt.Arch.RegSize switch p.From.Name { case obj.NAME_AUTO: @@ -1103,7 +1103,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA end := ctxt.EndUnsafePoint(jls, newprog, -1) var last *obj.Prog - for last = cursym.Func.Text; last.Link != nil; last = last.Link { + for last = cursym.Func().Text; last.Link != nil; last = last.Link { } // Now we are at the end of the function, but logically @@ -1117,7 +1117,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA pcdata = ctxt.StartUnsafePoint(pcdata, newprog) call := obj.Appendp(pcdata, newprog) - call.Pos = cursym.Func.Text.Pos + call.Pos = cursym.Func().Text.Pos call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH call.To.Name = obj.NAME_EXTERN @@ -1125,7 +1125,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA switch { case cursym.CFunc(): morestack = "runtime.morestackc" - case !cursym.Func.Text.From.Sym.NeedCtxt(): + case !cursym.Func().Text.From.Sym.NeedCtxt(): morestack = "runtime.morestack_noctxt" } call.To.Sym = ctxt.Lookup(morestack) @@ -1144,7 +1144,7 @@ func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgA jmp := obj.Appendp(pcdata, newprog) jmp.As = obj.AJMP jmp.To.Type = obj.TYPE_BRANCH - jmp.To.SetTarget(cursym.Func.Text.Link) + jmp.To.SetTarget(cursym.Func().Text.Link) jmp.Spadj = +framesize jls.To.SetTarget(call) -- cgit v1.3 From e981936855383883edb5fcc85a196c485b15f0f9 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Tue, 6 Oct 2020 17:08:31 -0400 Subject: cmd/internal/obj/ppc64,cmd/asm/internal/asm/testdata: fix up ppc64 testcases When a fix was made at the end of the last release related to NOPs, it was discovered that the ppc64.s testcase was out of date and contained comments that weren't being processed. Essentially the instructions in that test were being assembled but there was no verification that the encodings weres correct. The ppc64enc.s file was mostly complete and included the valid encodings for verification. This change moves ppc64enc.s to ppc64.s and adds the instructions that were missing. This also adds a minor fix to asm9.go on the assembly of the addex that was discovered during this testing. Change-Id: Iaada1563b137849ad195fa88f32ecc9ab3e1e95f Reviewed-on: https://go-review.googlesource.com/c/go/+/260217 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Reviewed-by: Keith Randall Trust: Lynn Boger --- src/cmd/asm/internal/asm/endtoend_test.go | 4 - src/cmd/asm/internal/asm/testdata/ppc64.s | 2006 +++++++++----------------- src/cmd/asm/internal/asm/testdata/ppc64enc.s | 642 --------- src/cmd/internal/obj/ppc64/asm9.go | 2 +- 4 files changed, 707 insertions(+), 1947 deletions(-) delete mode 100644 src/cmd/asm/internal/asm/testdata/ppc64enc.s (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index b21e3156ae..decf5391db 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -442,10 +442,6 @@ func TestPPC64EndToEnd(t *testing.T) { testEndToEnd(t, "ppc64", "ppc64") } -func TestPPC64Encoder(t *testing.T) { - testEndToEnd(t, "ppc64", "ppc64enc") -} - func TestRISCVEncoder(t *testing.T) { testEndToEnd(t, "riscv64", "riscvenc") } diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index ba64d84a35..2b1191c44b 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -2,1311 +2,717 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// This input was created by taking the instruction productions in -// the old assembler's (9a's) grammar and hand-writing complete -// instructions for each rule, to guarantee we cover the same space. +// This contains the majority of valid opcode combinations +// available in cmd/internal/obj/ppc64/asm9.go with +// their valid instruction encodings. #include "../../../../../runtime/textflag.h" -TEXT foo(SB),DUPOK|NOSPLIT,$0 +TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + // move constants + MOVD $1, R3 // 38600001 + MOVD $-1, R4 // 3880ffff + MOVD $65535, R5 // 6005ffff + MOVD $65536, R6 // 64060001 + MOVD $-32767, R5 // 38a08001 + MOVD $-32768, R6 // 38c08000 + MOVD $1234567, R5 // 6405001260a5d687 + MOVW $1, R3 // 38600001 + MOVW $-1, R4 // 3880ffff + MOVW $65535, R5 // 6005ffff + MOVW $65536, R6 // 64060001 + MOVW $-32767, R5 // 38a08001 + MOVW $-32768, R6 // 38c08000 + MOVW $1234567, R5 // 6405001260a5d687 + MOVD 8(R3), R4 // e8830008 + MOVD (R3)(R4), R5 // 7ca4182a + MOVW 4(R3), R4 // e8830006 + MOVW (R3)(R4), R5 // 7ca41aaa + MOVWZ 4(R3), R4 // 80830004 + MOVWZ (R3)(R4), R5 // 7ca4182e + MOVH 4(R3), R4 // a8830004 + MOVH (R3)(R4), R5 // 7ca41aae + MOVHZ 2(R3), R4 // a0830002 + MOVHZ (R3)(R4), R5 // 7ca41a2e + MOVB 1(R3), R4 // 888300017c840774 + MOVB (R3)(R4), R5 // 7ca418ae7ca50774 + MOVBZ 1(R3), R4 // 88830001 + MOVBZ (R3)(R4), R5 // 7ca418ae + MOVDBR (R3)(R4), R5 // 7ca41c28 + MOVWBR (R3)(R4), R5 // 7ca41c2c + MOVHBR (R3)(R4), R5 // 7ca41e2c + + MOVDU 8(R3), R4 // e8830009 + MOVDU (R3)(R4), R5 // 7ca4186a + MOVWU (R3)(R4), R5 // 7ca41aea + MOVWZU 4(R3), R4 // 84830004 + MOVWZU (R3)(R4), R5 // 7ca4186e + MOVHU 2(R3), R4 // ac830002 + MOVHU (R3)(R4), R5 // 7ca41aee + MOVHZU 2(R3), R4 // a4830002 + MOVHZU (R3)(R4), R5 // 7ca41a6e + MOVBU 1(R3), R4 // 8c8300017c840774 + MOVBU (R3)(R4), R5 // 7ca418ee7ca50774 + MOVBZU 1(R3), R4 // 8c830001 + MOVBZU (R3)(R4), R5 // 7ca418ee + + MOVD R4, 8(R3) // f8830008 + MOVD R5, (R3)(R4) // 7ca4192a + MOVW R4, 4(R3) // 90830004 + MOVW R5, (R3)(R4) // 7ca4192e + MOVH R4, 2(R3) // b0830002 + MOVH R5, (R3)(R4) // 7ca41b2e + MOVB R4, 1(R3) // 98830001 + MOVB R5, (R3)(R4) // 7ca419ae + MOVDBR R5, (R3)(R4) // 7ca41d28 + MOVWBR R5, (R3)(R4) // 7ca41d2c + MOVHBR R5, (R3)(R4) // 7ca41f2c + + MOVDU R4, 8(R3) // f8830009 + MOVDU R5, (R3)(R4) // 7ca4196a + MOVWU R4, 4(R3) // 94830004 + MOVWU R5, (R3)(R4) // 7ca4196e + MOVHU R4, 2(R3) // b4830002 + MOVHU R5, (R3)(R4) // 7ca41b6e + MOVBU R4, 1(R3) // 9c830001 + MOVBU R5, (R3)(R4) // 7ca419ee + + ADD $1, R3 // 38630001 + ADD $1, R3, R4 // 38830001 + ADD $-1, R4 // 3884ffff + ADD $-1, R4, R5 // 38a4ffff + ADD $65535, R5 // 601fffff7cbf2a14 + ADD $65535, R5, R6 // 601fffff7cdf2a14 + ADD $65536, R6 // 3cc60001 + ADD $65536, R6, R7 // 3ce60001 + ADD $-32767, R5 // 38a58001 + ADD $-32767, R5, R4 // 38858001 + ADD $-32768, R6 // 38c68000 + ADD $-32768, R6, R5 // 38a68000 + ADD $1234567, R5 // 641f001263ffd6877cbf2a14 + ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 + ADDEX R3, R5, $3, R6 // 7cc32f54 + ADDIS $8, R3 // 3c630008 + ADDIS $1000, R3, R4 // 3c8303e8 + + ANDCC $1, R3 // 70630001 + ANDCC $1, R3, R4 // 70640001 + ANDCC $-1, R4 // 3be0ffff7fe42039 + ANDCC $-1, R4, R5 // 3be0ffff7fe52039 + ANDCC $65535, R5 // 70a5ffff + ANDCC $65535, R5, R6 // 70a6ffff + ANDCC $65536, R6 // 74c60001 + ANDCC $65536, R6, R7 // 74c70001 + ANDCC $-32767, R5 // 3be080017fe52839 + ANDCC $-32767, R5, R4 // 3be080017fe42839 + ANDCC $-32768, R6 // 3be080007fe63039 + ANDCC $-32768, R5, R6 // 3be080007fe62839 + ANDCC $1234567, R5 // 641f001263ffd6877fe52839 + ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839 + ANDISCC $1, R3 // 74630001 + ANDISCC $1000, R3, R4 // 746403e8 + + OR $1, R3 // 60630001 + OR $1, R3, R4 // 60640001 + OR $-1, R4 // 3be0ffff7fe42378 + OR $-1, R4, R5 // 3be0ffff7fe52378 + OR $65535, R5 // 60a5ffff + OR $65535, R5, R6 // 60a6ffff + OR $65536, R6 // 64c60001 + OR $65536, R6, R7 // 64c70001 + OR $-32767, R5 // 3be080017fe52b78 + OR $-32767, R5, R6 // 3be080017fe62b78 + OR $-32768, R6 // 3be080007fe63378 + OR $-32768, R6, R7 // 3be080007fe73378 + OR $1234567, R5 // 641f001263ffd6877fe52b78 + OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 + ORIS $255, R3, R4 + + XOR $1, R3 // 68630001 + XOR $1, R3, R4 // 68640001 + XOR $-1, R4 // 3be0ffff7fe42278 + XOR $-1, R4, R5 // 3be0ffff7fe52278 + XOR $65535, R5 // 68a5ffff + XOR $65535, R5, R6 // 68a6ffff + XOR $65536, R6 // 6cc60001 + XOR $65536, R6, R7 // 6cc70001 + XOR $-32767, R5 // 3be080017fe52a78 + XOR $-32767, R5, R6 // 3be080017fe62a78 + XOR $-32768, R6 // 3be080007fe63278 + XOR $-32768, R6, R7 // 3be080007fe73278 + XOR $1234567, R5 // 641f001263ffd6877fe52a78 + XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 + XORIS $15, R3, R4 + + // TODO: the order of CR operands don't match + CMP R3, R4 // 7c232000 + CMPU R3, R4 // 7c232040 + CMPW R3, R4 // 7c032000 + CMPWU R3, R4 // 7c032040 + CMPB R3,R4,R4 // 7c6423f8 + CMPEQB R3,R4,CR6 // 7f0321c0 + + // TODO: constants for ADDC? + ADD R3, R4 // 7c841a14 + ADD R3, R4, R5 // 7ca41a14 + ADDC R3, R4 // 7c841814 + ADDC R3, R4, R5 // 7ca41814 + ADDE R3, R4 // 7c841914 + ADDECC R3, R4 // 7c841915 + ADDEV R3, R4 // 7c841d14 + ADDEVCC R3, R4 // 7c841d15 + ADDV R3, R4 // 7c841e14 + ADDVCC R3, R4 // 7c841e15 + ADDCCC R3, R4, R5 // 7ca41815 + ADDME R3, R4 // 7c8301d4 + ADDMECC R3, R4 // 7c8301d5 + ADDMEV R3, R4 // 7c8305d4 + ADDMEVCC R3, R4 // 7c8305d5 + ADDCV R3, R4 // 7c841c14 + ADDCVCC R3, R4 // 7c841c15 + ADDZE R3, R4 // 7c830194 + ADDZECC R3, R4 // 7c830195 + ADDZEV R3, R4 // 7c830594 + ADDZEVCC R3, R4 // 7c830595 + SUBME R3, R4 // 7c8301d0 + SUBMECC R3, R4 // 7c8301d1 + SUBMEV R3, R4 // 7c8305d0 + SUBZE R3, R4 // 7c830190 + SUBZECC R3, R4 // 7c830191 + SUBZEV R3, R4 // 7c830590 + SUBZEVCC R3, R4 // 7c830591 + + AND R3, R4 // 7c841838 + AND R3, R4, R5 // 7c851838 + ANDN R3, R4, R5 // 7c851878 + ANDCC R3, R4, R5 // 7c851839 + OR R3, R4 // 7c841b78 + OR R3, R4, R5 // 7c851b78 + ORN R3, R4, R5 // 7c851b38 + ORCC R3, R4, R5 // 7c851b79 + XOR R3, R4 // 7c841a78 + XOR R3, R4, R5 // 7c851a78 + XORCC R3, R4, R5 // 7c851a79 + NAND R3, R4, R5 // 7c851bb8 + NANDCC R3, R4, R5 // 7c851bb9 + EQV R3, R4, R5 // 7c851a38 + EQVCC R3, R4, R5 // 7c851a39 + NOR R3, R4, R5 // 7c8518f8 + NORCC R3, R4, R5 // 7c8518f9 + + SUB R3, R4 // 7c832050 + SUB R3, R4, R5 // 7ca32050 + SUBC R3, R4 // 7c832010 + SUBC R3, R4, R5 // 7ca32010 + + MULLW R3, R4 // 7c8419d6 + MULLW R3, R4, R5 // 7ca419d6 + MULLW $10, R3 // 1c63000a + MULLW $10000000, R3 // 641f009863ff96807c7f19d6 + + MULLWCC R3, R4, R5 // 7ca419d7 + MULHW R3, R4, R5 // 7ca41896 + + MULHWU R3, R4, R5 // 7ca41816 + MULLD R3, R4 // 7c8419d2 + MULLD R4, R4, R5 // 7ca421d2 + MULLD $20, R4 // 1c840014 + MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 + + MULLDCC R3, R4, R5 // 7ca419d3 + MULHD R3, R4, R5 // 7ca41892 + MULHDCC R3, R4, R5 // 7ca41893 + + MULLWV R3, R4 // 7c841dd6 + MULLWV R3, R4, R5 // 7ca41dd6 + MULLWVCC R3, R4, R5 // 7ca41dd7 + MULHWUCC R3, R4, R5 // 7ca41817 + MULLDV R3, R4, R5 // 7ca41dd2 + MULLDVCC R3, R4, R5 // 7ca41dd3 + + DIVD R3,R4 // 7c841bd2 + DIVD R3, R4, R5 // 7ca41bd2 + DIVDCC R3,R4, R5 // 7ca41bd3 + DIVDU R3, R4, R5 // 7ca41b92 + DIVDV R3, R4, R5 // 7ca41fd2 + DIVDUCC R3, R4, R5 // 7ca41b93 + DIVDVCC R3, R4, R5 // 7ca41fd3 + DIVDUV R3, R4, R5 // 7ca41f92 + DIVDUVCC R3, R4, R5 // 7ca41f93 + DIVDE R3, R4, R5 // 7ca41b52 + DIVDECC R3, R4, R5 // 7ca41b53 + DIVDEU R3, R4, R5 // 7ca41b12 + DIVDEUCC R3, R4, R5 // 7ca41b13 + + REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050 + REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050 + REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050 + REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050 + + MADDHD R3,R4,R5,R6 // 10c32170 + MADDHDU R3,R4,R5,R6 // 10c32171 + + MODUD R3, R4, R5 // 7ca41a12 + MODUW R3, R4, R5 // 7ca41a16 + MODSD R3, R4, R5 // 7ca41e12 + MODSW R3, R4, R5 // 7ca41e16 + + SLW $8, R3, R4 // 5464402e + SLW R3, R4, R5 // 7c851830 + SLWCC R3, R4 // 7c841831 + SLD $16, R3, R4 // 786483e4 + SLD R3, R4, R5 // 7c851836 + SLDCC R3, R4 // 7c841837 + + SRW $8, R3, R4 // 5464c23e + SRW R3, R4, R5 // 7c851c30 + SRWCC R3, R4 // 7c841c31 + SRAW $8, R3, R4 // 7c644670 + SRAW R3, R4, R5 // 7c851e30 + SRAWCC R3, R4 // 7c841e31 + SRD $16, R3, R4 // 78648402 + SRD R3, R4, R5 // 7c851c36 + SRDCC R3, R4 // 7c841c37 + SRAD $16, R3, R4 // 7c648674 + SRAD R3, R4, R5 // 7c851e34 + SRDCC R3, R4 // 7c841c37 + ROTLW $16, R3, R4 // 5464803e + ROTLW R3, R4, R5 // 5c85183e + EXTSWSLI $3, R4, R5 // 7c851ef4 + RLWMI $7, R3, $65535, R6 // 50663c3e + RLWMICC $7, R3, $65535, R6 // 50663c3f + RLWNM $3, R4, $7, R6 // 54861f7e + RLWNMCC $3, R4, $7, R6 // 54861f7f + RLDMI $0, R4, $7, R6 // 7886076c + RLDMICC $0, R4, $7, R6 // 7886076d + RLDIMI $0, R4, $7, R6 // 788601cc + RLDIMICC $0, R4, $7, R6 // 788601cd + RLDC $0, R4, $15, R6 // 78860728 + RLDCCC $0, R4, $15, R6 // 78860729 + RLDCL $0, R4, $7, R6 // 78860770 + RLDCLCC $0, R4, $15, R6 // 78860721 + RLDCR $0, R4, $-16, R6 // 788606f2 + RLDCRCC $0, R4, $-16, R6 // 788606f3 + RLDICL $0, R4, $15, R6 // 788603c0 + RLDICLCC $0, R4, $15, R6 // 788603c1 + RLDICR $0, R4, $15, R6 // 788603c4 + RLDICRCC $0, R4, $15, R6 // 788603c5 + RLDIC $0, R4, $15, R6 // 788603c8 + RLDICCC $0, R4, $15, R6 // 788603c9 + CLRLSLWI $16, R5, $8, R4 // 54a4422e + CLRLSLDI $24, R4, $2, R3 // 78831588 + + BEQ 0(PC) // 41820000 + BEQ CR1,0(PC) // 41860000 + BGE 0(PC) // 40800000 + BGE CR2,0(PC) // 40880000 + BGT 4(PC) // 41810010 + BGT CR3,4(PC) // 418d0010 + BLE 0(PC) // 40810000 + BLE CR4,0(PC) // 40910000 + BLT 0(PC) // 41800000 + BLT CR5,0(PC) // 41940000 + BNE 0(PC) // 40820000 + BLT CR6,0(PC) // 41980000 + JMP 8(PC) // 48000010 -//inst: -// -// load ints and bytes -// -// LMOVW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, R2 - -// LMOVW addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW foo<>+4(SB), R2 - MOVW 16(R1), R2 - -// LMOVW regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW (R1), R2 - MOVW (R1+R2), R3 // MOVW (R1)(R2*1), R3 - -// LMOVB rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, R2 - -// LMOVB addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB foo<>+3(SB), R2 - MOVB 16(R1), R2 - -// LMOVB regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB (R1), R2 - MOVB (R1+R2), R3 // MOVB (R1)(R2*1), R3 - -// -// load floats -// -// LFMOV addr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD foo<>+4(SB), F2 - FMOVD 16(R1), F2 - -// LFMOV regaddr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD (R1), F2 - -// LFMOV fimm ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD $0.1, F2 // FMOVD $(0.10000000000000001), F2 - -// LFMOV freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, F2 - -// LFMOV freg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F2, foo<>+4(SB) - FMOVD F2, 16(R1) - -// LFMOV freg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F2, (R1) - -// -// store ints and bytes -// -// LMOVW rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, foo<>+3(SB) - MOVW R1, 16(R2) - -// LMOVW rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, (R1) - MOVW R1, (R2+R3) // MOVW R1, (R2)(R3*1) - -// LMOVB rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB R1, foo<>+3(SB) - MOVB R1, 16(R2) - -// LMOVB rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVB R1, (R1) - MOVB R1, (R2+R3) // MOVB R1, (R2)(R3*1) -// -// store floats -// -// LMOVW freg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, foo<>+4(SB) - FMOVD F1, 16(R2) - -// LMOVW freg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - FMOVD F1, (R1) - -// -// floating point status -// -// LMOVW fpscr ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL FPSCR, F1 - -// LMOVW freg ',' fpscr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL F1, FPSCR - -// LMOVW freg ',' imm ',' fpscr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - MOVFL F1, $4, FPSCR - -// LMOVW fpscr ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL FPSCR, CR0 - -// LMTFSB imm ',' con -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -//TODO 9a doesn't work MTFSB0 $4, 4 - -// -// field moves (mtcrf) -// -// LMOVW rreg ',' imm ',' lcr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } -// TODO 9a doesn't work MOVFL R1,$4,CR - -// LMOVW rreg ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, CR1 - -// LMOVW rreg ',' lcr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, CR - -// -// integer operations -// logical instructions -// shift instructions -// unary instructions -// -// LADDW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADD R1, R2, R3 - -// LADDW imm ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADD $1, R2, R3 - -// LADDW rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } -//TODO 9a trouble ADD R1, $2, R3 maybe swap rreg and imm - -// LADDW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADD R1, R2 - -// LADDW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADD $4, R1 - -// LLOGW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - ADDE R1, R2, R3 - -// LLOGW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - ADDE R1, R2 - -// LSHW rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - SLW R1, R2, R3 - -// LSHW rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW R1, R2 - -// LSHW imm ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } - SLW $4, R1, R2 - -// LSHW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW $4, R1 - -// LABS rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - SLW $4, R1 - -// LABS rreg -// { -// outcode(int($1), &$2, 0, &$2); -// } - SUBME R1 // SUBME R1, R1 - -// -// multiply-accumulate -// -// LMA rreg ',' sreg ',' rreg -// { -// outcode(int($1), &$2, int($4), &$6); -// } -//TODO this instruction is undefined in lex.go LMA R1, R2, R3 NOT SUPPORTED (called MAC) - -// -// move immediate: macro for cau+or, addi, addis, and other combinations -// -// LMOVW imm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW $1, R1 - -// LMOVW ximm ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW $1, R1 - MOVW $foo(SB), R1 - -// condition register operations -// -// LCROP cbit ',' cbit -// { -// outcode(int($1), &$2, int($4.Reg), &$4); -// } -//TODO 9a trouble CREQV 1, 2 delete? liblink encodes like a divide (maybe wrong too) - -// LCROP cbit ',' con ',' cbit -// { -// outcode(int($1), &$2, int($4), &$6); -// } -//TODO 9a trouble CREQV 1, 2, 3 - -// -// condition register moves -// move from machine state register -// -// LMOVW creg ',' creg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVFL CR0, CR1 - -// LMOVW psr ',' creg // TODO: should psr should be fpscr -// { -// outcode(int($1), &$2, 0, &$4); -// } -//TODO 9a trouble MOVW FPSCR, CR1 - -// LMOVW lcr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW CR, R1 - -// LMOVW psr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW SPR(0), R1 - MOVW SPR(7), R1 - -// LMOVW xlreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW LR, R1 - MOVW CTR, R1 - -// LMOVW rreg ',' xlreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, LR - MOVW R1, CTR - -// LMOVW creg ',' psr // TODO doesn't exist -// { -// outcode(int($1), &$2, 0, &$4); -// } -//TODO 9a trouble MOVW CR1, SPR(7) - -// LMOVW rreg ',' psr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVW R1, SPR(7) - -// -// branch, branch conditional -// branch conditional register -// branch conditional to count register -// -// LBRA rel -// { -// outcode(int($1), &nullgen, 0, &$2); -// } - BEQ CR1, 2(PC) -label0: - BR 1(PC) // JMP 1(PC) - BEQ CR1, 2(PC) - BR label0+0 // JMP 62 - -// LBRA addr -// { -// outcode(int($1), &nullgen, 0, &$2); -// } - BEQ CR1, 2(PC) - BR LR // JMP LR - BEQ CR1, 2(PC) -// BR 0(R1) // TODO should work - BEQ CR1, 2(PC) - BR foo+0(SB) // JMP foo(SB) - -// LBRA '(' xlreg ')' -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - BEQ CR1, 2(PC) - BR (CTR) // JMP CTR - -// LBRA ',' rel // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$3); -// } -// LBRA ',' addr // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$3); -// } -// LBRA ',' '(' xlreg ')' // asm doesn't support the leading comma -// { -// outcode(int($1), &nullgen, 0, &$4); -// } -// LBRA creg ',' rel -// { -// outcode(int($1), &$2, 0, &$4); -// } -label1: - BEQ CR1, 1(PC) - BEQ CR1, label1 // BEQ CR1, 72 - -// LBRA creg ',' addr // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &$2, 0, &$4); -// } - -// LBRA creg ',' '(' xlreg ')' // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &$2, 0, &$5); -// } - -// LBRA con ',' rel // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &nullgen, int($2), &$4); -// } - -// LBRA con ',' addr // TODO DOES NOT WORK in 9a -// { -// outcode(int($1), &nullgen, int($2), &$4); -// } - -// LBRA con ',' '(' xlreg ')' -// { -// outcode(int($1), &nullgen, int($2), &$5); -// } -// BC 4, (CTR) // TODO - should work - -// LBRA con ',' con ',' rel -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$6); -// } -// BC 3, 4, label1 // TODO - should work - -// LBRA con ',' con ',' addr // TODO mystery -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$6); -// } -//TODO 9a trouble BC 3, 3, 4(R1) - -// LBRA con ',' con ',' '(' xlreg ')' -// { -// var g obj.Addr -// g = nullgen; -// g.Type = obj.TYPE_CONST; -// g.Offset = $2; -// outcode(int($1), &g, int(REG_R0+$4), &$7); -// } - BC 3, 3, (LR) // BC $3, R3, LR - -// -// conditional trap // TODO NOT DEFINED -// TODO these instructions are not in lex.go -// -// LTRAP rreg ',' sreg -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -// LTRAP imm ',' sreg -// { -// outcode(int($1), &$2, int($4), &nullgen); -// } -// LTRAP rreg comma -// { -// outcode(int($1), &$2, 0, &nullgen); -// } -// LTRAP comma -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - -// -// floating point operate -// -// LFCONV freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FABS F1, F2 - -// LFADD freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FADD F1, F2 - -// LFADD freg ',' freg ',' freg -// { -// outcode(int($1), &$2, int($4.Reg), &$6); -// } - FADD F1, F2, F3 - -// LFMA freg ',' freg ',' freg ',' freg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - FMADD F1, F2, F3, F4 - -// LFCMP freg ',' freg -// { -// outcode(int($1), &$2, 0, &$4); -// } - FCMPU F1, F2 - -// LFCMP freg ',' freg ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } -// FCMPU F1, F2, CR0 - -// FTDIV FRA, FRB, BF produces -// ftdiv BF, FRA, FRB - FTDIV F1,F2,$7 - -// FTSQRT FRB, BF produces -// ftsqrt BF, FRB - FTSQRT F2,$7 - -// FCFID -// FCFIDS - - FCFID F2,F3 - FCFIDCC F3,F3 - FCFIDS F2,F3 - FCFIDSCC F2,F3 - -// -// CMP -// -// LCMP rreg ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - CMP R1, R2 - -// LCMP rreg ',' imm -// { -// outcode(int($1), &$2, 0, &$4); -// } - CMP R1, $4 - -// LCMP rreg ',' rreg ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } - CMP R1, R2, CR0 // CMP R1, CR0, R2 - -// LCMP rreg ',' imm ',' creg -// { -// outcode(int($1), &$2, int($6.Reg), &$4); -// } - CMP R1, $4, CR0 // CMP R1, CR0, $4 - -// CMPB RS,RB,RA produces -// cmpb RA,RS,RB - CMPB R2,R2,R1 - -// CMPEQB RA,RB,BF produces -// cmpeqb BF,RA,RB - CMPEQB R1, R2, CR0 - -// -// rotate extended mnemonics map onto other shift instructions -// - - ROTL $12,R2,R3 - ROTL R2,R3,R4 - ROTLW $9,R2,R3 - ROTLW R2,R3,R4 - -// -// rotate and mask -// -// LRLWM imm ',' rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDC $4, R1, $16, R2 - -// LRLWM imm ',' rreg ',' mask ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDC $26, R1, 4, 5, R2 // RLDC $26, R1, $201326592, R2 - -// LRLWM rreg ',' rreg ',' imm ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLDCL R1, R2, $7, R3 - -// LRLWM rreg ',' rreg ',' mask ',' rreg -// { -// outgcode(int($1), &$2, int($4.Reg), &$6, &$8); -// } - RLWMI R1, R2, 4, 5, R3 // RLWMI R1, R2, $201326592, R3 - - -// opcodes added with constant shift counts, not masks - - RLDICR $3, R2, $24, R4 - - RLDICL $1, R2, $61, R6 - - RLDIMI $7, R2, $52, R7 - -// opcodes for right and left shifts, const and reg shift counts - - SLD $4, R3, R4 - SLD R2, R3, R4 - SLW $4, R3, R4 - SLW R2, R3, R4 - SRD $8, R3, R4 - SRD R2, R3, R4 - SRW $8, R3, R4 - SRW R2, R3, R4 - -// -// load/store multiple -// -// LMOVMW addr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } -// MOVMW foo+0(SB), R2 // TODO TLS broke this! - MOVMW 4(R1), R2 - -// LMOVMW rreg ',' addr -// { -// outcode(int($1), &$2, 0, &$4); -// } -// MOVMW R1, foo+0(SB) // TODO TLS broke this! - MOVMW R1, 4(R2) - -// -// various indexed load/store -// indexed unary (eg, cache clear) -// -// LXLD regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - LSW (R1), R2 - LSW (R1+R2), R3 // LSW (R1)(R2*1), R3 - -// LXLD regaddr ',' imm ',' rreg -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - LSW (R1), $1, R2 - LSW (R1+R2), $1, R3 // LSW (R1)(R2*1), $1, R3 - -// LXST rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - STSW R1, (R2) - STSW R1, (R2+R3) // STSW R1, (R2)(R3*1) - -// LXST rreg ',' imm ',' regaddr -// { -// outgcode(int($1), &$2, 0, &$4, &$6); -// } - STSW R1, $1, (R2) - STSW R1, $1, (R2+R3) // STSW R1, $1, (R2)(R3*1) - -// LXMV regaddr ',' rreg -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVHBR (R1), R2 - MOVHBR (R1+R2), R3 // MOVHBR (R1)(R2*1), R3 - -// LXMV rreg ',' regaddr -// { -// outcode(int($1), &$2, 0, &$4); -// } - MOVHBR R1, (R2) - MOVHBR R1, (R2+R3) // MOVHBR R1, (R2)(R3*1) - -// LXOP regaddr -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - DCBF (R1) - DCBF (R1+R2) // DCBF (R1)(R2*1) - DCBF (R1), $1 - DCBF (R1)(R2*1), $1 - DCBT (R1), $1 - DCBT (R1)(R2*1), $1 - -// LDMX (RB)(RA*1),RT produces -// ldmx RT,RA,RB - LDMX (R2)(R1*1), R3 - -// Population count, X-form -// RS,RA produces -// RA,RS - POPCNTD R1,R2 - POPCNTW R1,R2 - POPCNTB R1,R2 - -// Copysign - FCPSGN F1,F2,F3 - -// Random number generator, X-form -// DARN L,RT produces -// darn RT,L - DARN $1, R1 - -// Copy/Paste facility -// RB,RA produces -// RA,RB - COPY R2,R1 - PASTECC R2,R1 - -// Modulo signed/unsigned double/word X-form -// RA,RB,RT produces -// RT,RA,RB - MODUD R3,R4,R5 - MODUW R3,R4,R5 - MODSD R3,R4,R5 - MODSW R3,R4,R5 - -// VMX instructions - -// Described as: -// , -// produces -// - -// Vector load, VX-form -// (RB)(RA*1),VRT produces -// VRT,RA,RB - LVEBX (R1)(R2*1), V0 - LVEHX (R3)(R4*1), V1 - LVEWX (R5)(R6*1), V2 - LVX (R7)(R8*1), V3 - LVXL (R9)(R10*1), V4 - LVSL (R11)(R12*1), V5 - LVSR (R14)(R15*1), V6 - -// Vector store, VX-form -// VRT,(RB)(RA*1) produces -// VRT,RA,RB - STVEBX V31, (R1)(R2*1) - STVEHX V30, (R2)(R3*1) - STVEWX V29, (R4)(R5*1) - STVX V28, (R6)(R7*1) - STVXL V27, (R9)(R9*1) - -// Vector AND, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VAND V10, V9, V8 - VANDC V15, V14, V13 - VNAND V19, V18, V17 - -// Vector OR, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VOR V26, V25, V24 - VORC V23, V22, V21 - VNOR V20, V19, V18 - VXOR V17, V16, V15 - VEQV V14, V13, V12 - -// Vector ADD, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VADDUBM V3, V2, V1 - VADDUHM V3, V2, V1 - VADDUWM V3, V2, V1 - VADDUDM V3, V2, V1 - VADDUQM V3, V2, V1 - VADDCUQ V3, V2, V1 - VADDCUW V3, V2, V1 - VADDUBS V3, V2, V1 - VADDUHS V3, V2, V1 - VADDUWS V3, V2, V1 - VADDSBS V3, V2, V1 - VADDSHS V3, V2, V1 - VADDSWS V3, V2, V1 - -// Vector ADD extended, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VADDEUQM V4, V3, V2, V1 - VADDECUQ V4, V3, V2, V1 - -// Vector multiply, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VMULESB V2, V3, V1 - VMULOSB V2, V3, V1 - VMULEUB V2, V3, V1 - VMULOUB V2, V3, V1 - VMULESH V2, V3, V1 - VMULOSH V2, V3, V1 - VMULEUH V2, V3, V1 - VMULOUH V2, V3, V1 - VMULESW V2, V3, V1 - VMULOSW V2, V3, V1 - VMULEUW V2, V3, V1 - VMULOUW V2, V3, V1 - VMULUWM V2, V3, V1 - -// Vector polynomial multiply-sum, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VPMSUMB V2, V3, V1 - VPMSUMH V2, V3, V1 - VPMSUMW V2, V3, V1 - VPMSUMD V2, V3, V1 - -// Vector multiply-sum, VA-form -// VRA, VRB, VRC, VRT produces -// VRT, VRA, VRB, VRC - VMSUMUDM V4, V3, V2, V1 - -// Vector SUB, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VSUBUBM V3, V2, V1 - VSUBUHM V3, V2, V1 - VSUBUWM V3, V2, V1 - VSUBUDM V3, V2, V1 - VSUBUQM V3, V2, V1 - VSUBCUQ V3, V2, V1 - VSUBCUW V3, V2, V1 - VSUBUBS V3, V2, V1 - VSUBUHS V3, V2, V1 - VSUBUWS V3, V2, V1 - VSUBSBS V3, V2, V1 - VSUBSHS V3, V2, V1 - VSUBSWS V3, V2, V1 - -// Vector SUB extended, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VSUBEUQM V4, V3, V2, V1 - VSUBECUQ V4, V3, V2, V1 - -// Vector rotate, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VRLB V2, V1, V0 - VRLH V2, V1, V0 - VRLW V2, V1, V0 - VRLD V2, V1, V0 - -// Vector shift, VX-form -// VRA,VRB,VRT -// VRT,VRA,VRB - VSLB V2, V1, V0 - VSLH V2, V1, V0 - VSLW V2, V1, V0 - VSL V2, V1, V0 - VSLO V2, V1, V0 - VSRB V2, V1, V0 - VSRH V2, V1, V0 - VSRW V2, V1, V0 - VSR V2, V1, V0 - VSRO V2, V1, V0 - VSLD V2, V1, V0 - VSRD V2, V1, V0 - VSRAB V2, V1, V0 - VSRAH V2, V1, V0 - VSRAW V2, V1, V0 - VSRAD V2, V1, V0 - -// Vector shift by octect immediate, VA-form with SHB 4-bit field -// SHB,VRA,VRB,VRT produces -// VRT,VRA,VRB,SHB - VSLDOI $4, V2, V1, V0 - -// Vector merge odd and even word -// VRA,VRB,VRT produces -// VRT,VRA,VRB - - VMRGOW V4,V5,V6 - VMRGEW V4,V5,V6 - -// Vector count, VX-form -// VRB,VRT produces -// VRT,VRB - VCLZB V4, V5 - VCLZH V4, V5 - VCLZW V4, V5 - VCLZD V4, V5 - VPOPCNTB V4, V5 - VPOPCNTH V4, V5 - VPOPCNTW V4, V5 - VPOPCNTD V4, V5 - -// Vector compare, VC-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB -// * Note: 'CC' suffix denotes Rc=1 -// i.e. vcmpequb. v3,v1,v2 equals VCMPEQUBCC V1,V2,V3 - VCMPEQUB V3, V2, V1 - VCMPEQUBCC V3, V2, V1 - VCMPEQUH V3, V2, V1 - VCMPEQUHCC V3, V2, V1 - VCMPEQUW V3, V2, V1 - VCMPEQUWCC V3, V2, V1 - VCMPEQUD V3, V2, V1 - VCMPEQUDCC V3, V2, V1 - VCMPGTUB V3, V2, V1 - VCMPGTUBCC V3, V2, V1 - VCMPGTUH V3, V2, V1 - VCMPGTUHCC V3, V2, V1 - VCMPGTUW V3, V2, V1 - VCMPGTUWCC V3, V2, V1 - VCMPGTUD V3, V2, V1 - VCMPGTUDCC V3, V2, V1 - VCMPGTSB V3, V2, V1 - VCMPGTSBCC V3, V2, V1 - VCMPGTSH V3, V2, V1 - VCMPGTSHCC V3, V2, V1 - VCMPGTSW V3, V2, V1 - VCMPGTSWCC V3, V2, V1 - VCMPGTSD V3, V2, V1 - VCMPGTSDCC V3, V2, V1 - VCMPNEZB V3, V2, V1 - VCMPNEZBCC V3, V2, V1 - VCMPNEB V3, V2, V1 - VCMPNEBCC V3, V2, V1 - VCMPNEH V3, V2, V1 - VCMPNEHCC V3, V2, V1 - VCMPNEW V3, V2, V1 - VCMPNEWCC V3, V2, V1 - -// Vector permute, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VPERM V3, V2, V1, V0 - VPERMXOR V3, V2, V1, V0 - VPERMR V3, V2, V1, V0 - -// Vector bit permute, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VBPERMQ V3,V1,V2 - VBPERMD V3,V1,V2 - -// Vector select, VA-form -// VRA,VRB,VRC,VRT produces -// VRT,VRA,VRB,VRC - VSEL V3, V2, V1, V0 - -// Vector splat, VX-form with 4-bit UIM field -// UIM,VRB,VRT produces -// VRT,VRB,UIM - VSPLTB $15, V1, V0 - VSPLTH $7, V1, V0 - VSPLTW $3, V1, V0 - -// Vector splat immediate signed, VX-form with 5-bit SIM field -// SIM,VRT produces -// VRT,SIM - VSPLTISB $31, V4 - VSPLTISH $31, V4 - VSPLTISW $31, V4 - -// Vector AES cipher, VX-form -// VRA,VRB,VRT produces -// VRT,VRA,VRB - VCIPHER V3, V2, V1 - VCIPHERLAST V3, V2, V1 - VNCIPHER V3, V2, V1 - VNCIPHERLAST V3, V2, V1 - -// Vector AES subbytes, VX-form -// VRA,VRT produces -// VRT,VRA - VSBOX V2, V1 - -// Vector SHA, VX-form with ST bit field and 4-bit SIX field -// SIX,VRA,ST,VRT produces -// VRT,VRA,ST,SIX - VSHASIGMAW $15, V1, $1, V0 - VSHASIGMAD $15, V1, $1, V0 - -// VSX instructions -// Described as: -// , -// produces -// - -// VSX load, XX1-form -// (RB)(RA*1),XT produces -// XT,RA,RB - LXVD2X (R1)(R2*1), VS0 - LXVW4X (R1)(R2*1), VS0 - LXVH8X (R1)(R2*1), VS0 - LXVB16X (R1)(R2*1), VS0 - LXVDSX (R1)(R2*1), VS0 - LXSDX (R1)(R2*1), VS0 - LXSIWAX (R1)(R2*1), VS0 - LXSIWZX (R1)(R2*1), VS0 - -// VSX load with length X-form (also left-justified) - LXVL R3,R4, VS0 - LXVLL R3,R4, VS0 - LXVX R3,R4, VS0 -// VSX load, DQ-form -// DQ(RA), XS produces -// XS, DQ(RA) - LXV 32752(R1), VS0 - -// VSX store, XX1-form -// XS,(RB)(RA*1) produces -// XS,RA,RB - STXVD2X VS63, (R1)(R2*1) - STXVW4X VS63, (R1)(R2*1) - STXVH8X VS63, (R1)(R2*1) - STXVB16X VS63, (R1)(R2*1) - STXSDX VS63, (R1)(R2*1) - STXSIWX VS63, (R1)(R2*1) - -// VSX store, DQ-form -// DQ(RA), XS produces -// XS, DQ(RA) - STXV VS63, -32752(R1) - -// VSX store with length, X-form (also left-justified) - STXVL VS0, R3,R4 - STXVLL VS0, R3,R4 - STXVX VS0, R3,R4 - -// VSX move from VSR, XX1-form -// XS,RA produces -// RA,XS -// Extended mnemonics accept VMX and FP registers as sources - MFVSRD VS0, R1 - MFVSRWZ VS33, R1 - MFVSRLD VS63, R1 - MFVRD V0, R1 - MFFPRD F0, R1 - -// VSX move to VSR, XX1-form -// RA,XT produces -// XT,RA -// Extended mnemonics accept VMX and FP registers as targets - MTVSRD R1, VS0 - MTVSRWA R1, VS31 - MTVSRWZ R1, VS63 - MTVSRDD R1, R2, VS0 - MTVSRWS R1, VS32 - MTVRD R1, V13 - MTFPRD R1, F24 - -// VSX AND, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXLAND VS0,VS1,VS32 - XXLANDC VS0,VS1,VS32 - XXLEQV VS0,VS1,VS32 - XXLNAND VS0,VS1,VS32 - -// VSX OR, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXLORC VS0,VS1,VS32 - XXLNOR VS0,VS1,VS32 - XXLORQ VS0,VS1,VS32 - XXLXOR VS0,VS1,VS32 - XXLOR VS0,VS1,VS32 - -// VSX select, XX4-form -// XA,XB,XC,XT produces -// XT,XA,XB,XC - XXSEL VS0,VS1,VS3,VS32 - -// VSX merge, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXMRGHW VS0,VS1,VS32 - XXMRGLW VS0,VS1,VS32 - -// VSX splat, XX2-form -// XB,UIM,XT produces -// XT,XB,UIM - XXSPLTW VS0,$3,VS32 - XXSPLTIB $26,VS0 - -// VSX permute, XX3-form -// XA,XB,XT produces -// XT,XA,XB - XXPERM VS0,VS1,VS32 - -// VSX permute, XX3-form -// XA,XB,DM,XT produces -// XT,XA,XB,DM - XXPERMDI VS0,VS1,$3,VS32 - -// VSX shift, XX3-form -// XA,XB,SHW,XT produces -// XT,XA,XB,SHW - XXSLDWI VS0,VS1,$3,VS32 - -// VSX byte-reverse XX2-form -// XB,XT produces -// XT,XB - XXBRQ VS0,VS1 - XXBRD VS0,VS1 - XXBRW VS0,VS1 - XXBRH VS0,VS1 - -// VSX scalar FP-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVDPSP VS0,VS32 - XSCVSPDP VS0,VS32 - XSCVDPSPN VS0,VS32 - XSCVSPDPN VS0,VS32 - -// VSX vector FP-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVDPSP VS0,VS32 - XVCVSPDP VS0,VS32 - -// VSX scalar FP-integer conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVDPSXDS VS0,VS32 - XSCVDPSXWS VS0,VS32 - XSCVDPUXDS VS0,VS32 - XSCVDPUXWS VS0,VS32 - -// VSX scalar integer-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XSCVSXDDP VS0,VS32 - XSCVUXDDP VS0,VS32 - XSCVSXDSP VS0,VS32 - XSCVUXDSP VS0,VS32 - -// VSX vector FP-integer conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVDPSXDS VS0,VS32 - XVCVDPSXWS VS0,VS32 - XVCVDPUXDS VS0,VS32 - XVCVDPUXWS VS0,VS32 - XVCVSPSXDS VS0,VS32 - XVCVSPSXWS VS0,VS32 - XVCVSPUXDS VS0,VS32 - XVCVSPUXWS VS0,VS32 - -// VSX scalar integer-FP conversion, XX2-form -// XB,XT produces -// XT,XB - XVCVSXDDP VS0,VS32 - XVCVSXWDP VS0,VS32 - XVCVUXDDP VS0,VS32 - XVCVUXWDP VS0,VS32 - XVCVSXDSP VS0,VS32 - XVCVSXWSP VS0,VS32 - XVCVUXDSP VS0,VS32 - XVCVUXWSP VS0,VS32 - -// Multiply-Add High Doubleword -// RA,RB,RC,RT produces -// RT,RA,RB,RC - MADDHD R1,R2,R3,R4 - MADDHDU R1,R2,R3,R4 - -// Add Extended using alternate carry bit -// ADDEX RA,RB,CY,RT produces -// addex RT, RA, RB, CY - ADDEX R1, R2, $0, R3 - -// Immediate-shifted operations -// ADDIS SI, RA, RT produces -// addis RT, RA, SI - ADDIS $8, R3, R4 - ADDIS $-1, R3, R4 - -// ANDISCC UI, RS, RA produces -// andis. RA, RS, UI - ANDISCC $7, R4, R5 - -// ORIS UI, RS, RA produces -// oris RA, RS, UI - ORIS $4, R2, R3 - -// XORIS UI, RS, RA produces -// xoris RA, RS, UI - XORIS $1, R1, R2 - -// -// NOP -// -// LNOP comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } NOP - -// LNOP rreg comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } NOP R2 + NOP F2 + NOP $4 + + CRAND CR1, CR2, CR3 // 4c620a02 + CRANDN CR1, CR2, CR3 // 4c620902 + CREQV CR1, CR2, CR3 // 4c620a42 + CRNAND CR1, CR2, CR3 // 4c6209c2 + CRNOR CR1, CR2, CR3 // 4c620842 + CROR CR1, CR2, CR3 // 4c620b82 + CRORN CR1, CR2, CR3 // 4c620b42 + CRXOR CR1, CR2, CR3 // 4c620982 + + ISEL $1, R3, R4, R5 // 7ca3205e + ISEL $0, R3, R4, R5 // 7ca3201e + ISEL $2, R3, R4, R5 // 7ca3209e + ISEL $3, R3, R4, R5 // 7ca320de + ISEL $4, R3, R4, R5 // 7ca3211e + POPCNTB R3, R4 // 7c6400f4 + POPCNTW R3, R4 // 7c6402f4 + POPCNTD R3, R4 // 7c6403f4 + + PASTECC R3, R4 // 7c23270d + COPY R3, R4 // 7c23260c + + // load-and-reserve + LBAR (R4)(R3*1),$1,R5 // 7ca32069 + LBAR (R4),$0,R5 // 7ca02068 + LBAR (R3),R5 // 7ca01868 + LHAR (R4)(R3*1),$1,R5 // 7ca320e9 + LHAR (R4),$0,R5 // 7ca020e8 + LHAR (R3),R5 // 7ca018e8 + LWAR (R4)(R3*1),$1,R5 // 7ca32029 + LWAR (R4),$0,R5 // 7ca02028 + LWAR (R3),R5 // 7ca01828 + LDAR (R4)(R3*1),$1,R5 // 7ca320a9 + LDAR (R4),$0,R5 // 7ca020a8 + LDAR (R3),R5 // 7ca018a8 + + STBCCC R3, (R4)(R5) // 7c65256d + STWCCC R3, (R4)(R5) // 7c65212d + STDCCC R3, (R4)(R5) // 7c6521ad + STHCCC R3, (R4)(R5) + STSW R3, (R4)(R5) + + SYNC // 7c0004ac + ISYNC // 4c00012c + LWSYNC // 7c2004ac + + DARN $1, R5 // 7ca105e6 + + DCBF (R3)(R4) // 7c0418ac + DCBI (R3)(R4) // 7c041bac + DCBST (R3)(R4) // 7c04186c + DCBZ (R3)(R4) // 7c041fec + DCBT (R3)(R4) // 7c041a2c + ICBI (R3)(R4) // 7c041fac + + // float constants + FMOVD $(0.0), F1 // f0210cd0 + FMOVD $(-0.0), F1 // f0210cd0fc200850 + + FMOVD 8(R3), F1 // c8230008 + FMOVD (R3)(R4), F1 // 7c241cae + FMOVDU 8(R3), F1 // cc230008 + FMOVDU (R3)(R4), F1 // 7c241cee + FMOVS 4(R3), F1 // c0230004 + FMOVS (R3)(R4), F1 // 7c241c2e + FMOVSU 4(R3), F1 // c4230004 + FMOVSU (R3)(R4), F1 // 7c241c6e + + FMOVD F1, 8(R3) // d8230008 + FMOVD F1, (R3)(R4) // 7c241dae + FMOVDU F1, 8(R3) // dc230008 + FMOVDU F1, (R3)(R4) // 7c241dee + FMOVS F1, 4(R3) // d0230004 + FMOVS F1, (R3)(R4) // 7c241d2e + FMOVSU F1, 4(R3) // d4230004 + FMOVSU F1, (R3)(R4) // 7c241d6e + FADD F1, F2 // fc42082a + FADD F1, F2, F3 // fc62082a + FADDCC F1, F2, F3 // fc62082b + FADDS F1, F2 // ec42082a + FADDS F1, F2, F3 // ec62082a + FADDSCC F1, F2, F3 // ec62082b + FSUB F1, F2 // fc420828 + FSUB F1, F2, F3 // fc620828 + FSUBCC F1, F2, F3 // fc620829 + FSUBS F1, F2 // ec420828 + FSUBS F1, F2, F3 // ec620828 + FSUBCC F1, F2, F3 // fc620829 + FMUL F1, F2 // fc420072 + FMUL F1, F2, F3 // fc620072 + FMULCC F1, F2, F3 // fc620073 + FMULS F1, F2 // ec420072 + FMULS F1, F2, F3 // ec620072 + FMULSCC F1, F2, F3 // ec620073 + FDIV F1, F2 // fc420824 + FDIV F1, F2, F3 // fc620824 + FDIVCC F1, F2, F3 // fc620825 + FDIVS F1, F2 // ec420824 + FDIVS F1, F2, F3 // ec620824 + FDIVSCC F1, F2, F3 // ec620825 + FMADD F1, F2, F3, F4 // fc8110fa + FMADDCC F1, F2, F3, F4 // fc8110fb + FMADDS F1, F2, F3, F4 // ec8110fa + FMADDSCC F1, F2, F3, F4 // ec8110fb + FMSUB F1, F2, F3, F4 // fc8110f8 + FMSUBCC F1, F2, F3, F4 // fc8110f9 + FMSUBS F1, F2, F3, F4 // ec8110f8 + FMSUBSCC F1, F2, F3, F4 // ec8110f9 + FNMADD F1, F2, F3, F4 // fc8110fe + FNMADDCC F1, F2, F3, F4 // fc8110ff + FNMADDS F1, F2, F3, F4 // ec8110fe + FNMADDSCC F1, F2, F3, F4 // ec8110ff + FNMSUB F1, F2, F3, F4 // fc8110fc + FNMSUBCC F1, F2, F3, F4 // fc8110fd + FNMSUBS F1, F2, F3, F4 // ec8110fc + FNMSUBSCC F1, F2, F3, F4 // ec8110fd + FSEL F1, F2, F3, F4 // fc8110ee + FSELCC F1, F2, F3, F4 // fc8110ef + FABS F1, F2 // fc400a10 + FABSCC F1, F2 // fc400a11 + FNEG F1, F2 // fc400850 + FABSCC F1, F2 // fc400a11 + FRSP F1, F2 // fc400818 + FRSPCC F1, F2 // fc400819 + FCTIW F1, F2 // fc40081c + FCTIWCC F1, F2 // fc40081d + FCTIWZ F1, F2 // fc40081e + FCTIWZCC F1, F2 // fc40081f + FCTID F1, F2 // fc400e5c + FCTIDCC F1, F2 // fc400e5d + FCTIDZ F1, F2 // fc400e5e + FCTIDZCC F1, F2 // fc400e5f + FCFID F1, F2 // fc400e9c + FCFIDCC F1, F2 // fc400e9d + FCFIDU F1, F2 // fc400f9c + FCFIDUCC F1, F2 // fc400f9d + FCFIDS F1, F2 // ec400e9c + FCFIDSCC F1, F2 // ec400e9d + FRES F1, F2 // ec400830 + FRESCC F1, F2 // ec400831 + FRIM F1, F2 // fc400bd0 + FRIMCC F1, F2 // fc400bd1 + FRIP F1, F2 // fc400b90 + FRIPCC F1, F2 // fc400b91 + FRIZ F1, F2 // fc400b50 + FRIZCC F1, F2 // fc400b51 + FRIN F1, F2 // fc400b10 + FRINCC F1, F2 // fc400b11 + FRSQRTE F1, F2 // fc400834 + FRSQRTECC F1, F2 // fc400835 + FSQRT F1, F2 // fc40082c + FSQRTCC F1, F2 // fc40082d + FSQRTS F1, F2 // ec40082c + FSQRTSCC F1, F2 // ec40082d + FCPSGN F1, F2 // fc420810 + FCPSGNCC F1, F2 // fc420811 + FCMPO F1, F2 // fc011040 + FCMPU F1, F2 // fc011000 + LVX (R3)(R4), V1 // 7c2418ce + LVXL (R3)(R4), V1 // 7c241ace + LVSL (R3)(R4), V1 // 7c24180c + LVSR (R3)(R4), V1 // 7c24184c + LVEBX (R3)(R4), V1 // 7c24180e + LVEHX (R3)(R4), V1 // 7c24184e + LVEWX (R3)(R4), V1 // 7c24188e + STVX V1, (R3)(R4) // 7c2419ce + STVXL V1, (R3)(R4) // 7c241bce + STVEBX V1, (R3)(R4) // 7c24190e + STVEHX V1, (R3)(R4) // 7c24194e + STVEWX V1, (R3)(R4) // 7c24198e + + VAND V1, V2, V3 // 10611404 + VANDC V1, V2, V3 // 10611444 + VNAND V1, V2, V3 // 10611584 + VOR V1, V2, V3 // 10611484 + VORC V1, V2, V3 // 10611544 + VXOR V1, V2, V3 // 106114c4 + VNOR V1, V2, V3 // 10611504 + VEQV V1, V2, V3 // 10611684 + VADDUBM V1, V2, V3 // 10611000 + VADDUHM V1, V2, V3 // 10611040 + VADDUWM V1, V2, V3 // 10611080 + VADDUDM V1, V2, V3 // 106110c0 + VADDUQM V1, V2, V3 // 10611100 + VADDCUQ V1, V2, V3 // 10611140 + VADDCUW V1, V2, V3 // 10611180 + VADDUBS V1, V2, V3 // 10611200 + VADDUHS V1, V2, V3 // 10611240 + VADDUWS V1, V2, V3 // 10611280 + VSUBUBM V1, V2, V3 // 10611400 + VSUBUHM V1, V2, V3 // 10611440 + VSUBUWM V1, V2, V3 // 10611480 + VSUBUDM V1, V2, V3 // 106114c0 + VSUBUQM V1, V2, V3 // 10611500 + VSUBCUQ V1, V2, V3 // 10611540 + VSUBCUW V1, V2, V3 // 10611580 + VSUBUBS V1, V2, V3 // 10611600 + VSUBUHS V1, V2, V3 // 10611640 + VSUBUWS V1, V2, V3 // 10611680 + VSUBSBS V1, V2, V3 // 10611700 + VSUBSHS V1, V2, V3 // 10611740 + VSUBSWS V1, V2, V3 // 10611780 + VSUBEUQM V1, V2, V3, V4 // 108110fe + VSUBECUQ V1, V2, V3, V4 // 108110ff + VMULESB V1, V2, V3 // 10611308 + VMULOSB V1, V2, V3 // 10611108 + VMULEUB V1, V2, V3 // 10611208 + VMULOUB V1, V2, V3 // 10611008 + VMULESH V1, V2, V3 // 10611348 + VMULOSH V1, V2, V3 // 10611148 + VMULEUH V1, V2, V3 // 10611248 + VMULOUH V1, V2, V3 // 10611048 + VMULESH V1, V2, V3 // 10611348 + VMULOSW V1, V2, V3 // 10611188 + VMULEUW V1, V2, V3 // 10611288 + VMULOUW V1, V2, V3 // 10611088 + VMULUWM V1, V2, V3 // 10611089 + VPMSUMB V1, V2, V3 // 10611408 + VPMSUMH V1, V2, V3 // 10611448 + VPMSUMW V1, V2, V3 // 10611488 + VPMSUMD V1, V2, V3 // 106114c8 + VMSUMUDM V1, V2, V3, V4 // 108110e3 + VRLB V1, V2, V3 // 10611004 + VRLH V1, V2, V3 // 10611044 + VRLW V1, V2, V3 // 10611084 + VRLD V1, V2, V3 // 106110c4 + VSLB V1, V2, V3 // 10611104 + VSLH V1, V2, V3 // 10611144 + VSLW V1, V2, V3 // 10611184 + VSL V1, V2, V3 // 106111c4 + VSLO V1, V2, V3 // 1061140c + VSRB V1, V2, V3 // 10611204 + VSRH V1, V2, V3 // 10611244 + VSRW V1, V2, V3 // 10611284 + VSR V1, V2, V3 // 106112c4 + VSRO V1, V2, V3 // 1061144c + VSLD V1, V2, V3 // 106115c4 + VSRAB V1, V2, V3 // 10611304 + VSRAH V1, V2, V3 // 10611344 + VSRAW V1, V2, V3 // 10611384 + VSRAD V1, V2, V3 // 106113c4 + VSLDOI $3, V1, V2, V3 // 106110ec + VCLZB V1, V2 // 10400f02 + VCLZH V1, V2 // 10400f42 + VCLZW V1, V2 // 10400f82 + VCLZD V1, V2 // 10400fc2 + VPOPCNTB V1, V2 // 10400f03 + VPOPCNTH V1, V2 // 10400f43 + VPOPCNTW V1, V2 // 10400f83 + VPOPCNTD V1, V2 // 10400fc3 + VCMPEQUB V1, V2, V3 // 10611006 + VCMPEQUBCC V1, V2, V3 // 10611406 + VCMPEQUH V1, V2, V3 // 10611046 + VCMPEQUHCC V1, V2, V3 // 10611446 + VCMPEQUW V1, V2, V3 // 10611086 + VCMPEQUWCC V1, V2, V3 // 10611486 + VCMPEQUD V1, V2, V3 // 106110c7 + VCMPEQUDCC V1, V2, V3 // 106114c7 + VCMPGTUB V1, V2, V3 // 10611206 + VCMPGTUBCC V1, V2, V3 // 10611606 + VCMPGTUH V1, V2, V3 // 10611246 + VCMPGTUHCC V1, V2, V3 // 10611646 + VCMPGTUW V1, V2, V3 // 10611286 + VCMPGTUWCC V1, V2, V3 // 10611686 + VCMPGTUD V1, V2, V3 // 106112c7 + VCMPGTUDCC V1, V2, V3 // 106116c7 + VCMPGTSB V1, V2, V3 // 10611306 + VCMPGTSBCC V1, V2, V3 // 10611706 + VCMPGTSH V1, V2, V3 // 10611346 + VCMPGTSHCC V1, V2, V3 // 10611746 + VCMPGTSW V1, V2, V3 // 10611386 + VCMPGTSWCC V1, V2, V3 // 10611786 + VCMPGTSD V1, V2, V3 // 106113c7 + VCMPGTSDCC V1, V2, V3 // 106117c7 + VCMPNEZB V1, V2, V3 // 10611107 + VCMPNEZBCC V1, V2, V3 // 10611507 + VCMPNEB V1, V2, V3 // 10611007 + VCMPNEBCC V1, V2, V3 // 10611407 + VCMPNEH V1, V2, V3 // 10611047 + VCMPNEHCC V1, V2, V3 // 10611447 + VCMPNEW V1, V2, V3 // 10611087 + VCMPNEWCC V1, V2, V3 // 10611487 + VPERM V1, V2, V3, V4 // 108110eb + VPERMR V1, V2, V3, V4 // 108110fb + VPERMXOR V1, V2, V3, V4 // 108110ed + VBPERMQ V1, V2, V3 // 1061154c + VBPERMD V1, V2, V3 // 106115cc + VSEL V1, V2, V3, V4 // 108110ea + VSPLTB $1, V1, V2 // 10410a0c + VSPLTH $1, V1, V2 // 10410a4c + VSPLTW $1, V1, V2 // 10410a8c + VSPLTISB $1, V1 // 1021030c + VSPLTISW $1, V1 // 1021038c + VSPLTISH $1, V1 // 1021034c + VCIPHER V1, V2, V3 // 10611508 + VCIPHERLAST V1, V2, V3 // 10611509 + VNCIPHER V1, V2, V3 // 10611548 + VNCIPHERLAST V1, V2, V3 // 10611549 + VSBOX V1, V2 // 104105c8 + VSHASIGMAW $1, V1, $15, V2 // 10418e82 + VSHASIGMAD $2, V1, $15, V2 // 104196c2 + + LXVD2X (R3)(R4), VS1 // 7c241e98 + LXVDSX (R3)(R4), VS1 // 7c241a98 + LXVH8X (R3)(R4), VS1 // 7c241e58 + LXVB16X (R3)(R4), VS1 // 7c241ed8 + LXVW4X (R3)(R4), VS1 // 7c241e18 + LXV 16(R3), VS1 // f4230011 + LXVL R3, R4, VS1 // 7c23221a + LXVLL R3, R4, VS1 // 7c23225a + LXVX R3, R4, VS1 // 7c232218 + LXSDX (R3)(R4), VS1 // 7c241c98 + STXVD2X VS1, (R3)(R4) // 7c241f98 + STXV VS1,16(R3) // f4230015 + STXVL VS1, R3, R4 // 7c23231a + STXVLL VS1, R3, R4 // 7c23235a + STXVX VS1, R3, R4 // 7c232318 + STXVB16X VS1, (R4)(R5) // 7c2527d8 + STXVH8X VS1, (R4)(R5) // 7c252758 + + STXSDX VS1, (R3)(R4) // 7c241d98 + LXSIWAX (R3)(R4), VS1 // 7c241898 + STXSIWX VS1, (R3)(R4) // 7c241918 + MFVSRD VS1, R3 // 7c230066 + MTFPRD R3, F0 // 7c030166 + MFVRD V0, R3 // 7c030067 + MFVSRLD VS63,R4 // 7fe40267 + MFVSRWZ VS33,R4 // 7c2400e7 + MTVSRD R3, VS1 // 7c230166 + MTVRD R3, V13 // 7da30167 + MTVSRWA R4, VS31 // 7fe401a6 + MTVSRWS R4, VS32 // 7c040327 + MTVSRWZ R4, VS63 // 7fe401e7 + XXBRD VS0, VS1 // f037076c + XXBRW VS1, VS2 // f04f0f6c + XXBRH VS2, VS3 // f067176c + XXLAND VS1, VS2, VS3 // f0611410 + XXLANDC VS1, VS2, VS3 // f0611450 + XXLEQV VS0, VS1, VS2 // f0400dd0 + XXLNAND VS0, VS1, VS2 // f0400d90 + XXLNOR VS0, VS1, VS32 // f0000d11 + XXLOR VS1, VS2, VS3 // f0611490 + XXLORC VS1, VS2, VS3 // f0611550 + XXLORQ VS1, VS2, VS3 // f0611490 + XXLXOR VS1, VS2, VS3 // f06114d0 + XXSEL VS1, VS2, VS3, VS4 // f08110f0 + XXMRGHW VS1, VS2, VS3 // f0611090 + XXMRGLW VS1, VS2, VS3 // f0611190 + XXSPLTW VS1, $1, VS2 // f0410a90 + XXPERM VS1, VS2, VS3 // f06110d0 + XXSLDWI VS1, VS2, $1, VS3 // f0611110 + XSCVDPSP VS1, VS2 // f0400c24 + XVCVDPSP VS1, VS2 // f0400e24 + XSCVSXDDP VS1, VS2 // f0400de0 + XVCVDPSXDS VS1, VS2 // f0400f60 + XVCVSXDDP VS1, VS2 // f0400fe0 + XSCVDPSPN VS1,VS32 // f0000c2d + XSCVDPSP VS1,VS32 // f0000c25 + XSCVDPSXDS VS1,VS32 // f0000d61 + XSCVDPSXWS VS1,VS32 // f0000961 + XSCVDPUXDS VS1,VS32 // f0000d21 + XSCVDPUXWS VS1,VS32 // f0000921 + XSCVSPDPN VS1,VS32 // f0000d2d + XSCVSPDP VS1,VS32 // f0000d25 + XSCVSXDDP VS1,VS32 // f0000de1 + XSCVSXDSP VS1,VS32 // f0000ce1 + XSCVUXDDP VS1,VS32 // f0000da1 + XSCVUXDSP VS1,VS32 // f0000ca1 + XVCVDPSP VS1,VS32 // f0000e25 + XVCVDPSXDS VS1,VS32 // f0000f61 + XVCVDPSXWS VS1,VS32 // f0000b61 + XVCVDPUXDS VS1,VS32 // f0000f21 + XVCVDPUXWS VS1,VS32 // f0000b21 + XVCVSPDP VS1,VS32 // f0000f25 + XVCVSPSXDS VS1,VS32 // f0000e61 + XVCVSPSXWS VS1,VS32 // f0000a61 + XVCVSPUXDS VS1,VS32 // f0000e21 + XVCVSPUXWS VS1,VS32 // f0000a21 + XVCVSXDDP VS1,VS32 // f0000fe1 + XVCVSXDSP VS1,VS32 // f0000ee1 + XVCVSXWDP VS1,VS32 // f0000be1 + XVCVSXWSP VS1,VS32 // f0000ae1 + XVCVUXDDP VS1,VS32 // f0000fa1 + XVCVUXDSP VS1,VS32 // f0000ea1 + XVCVUXWDP VS1,VS32 // f0000ba1 + XVCVUXWSP VS1,VS32 // f0000aa1 + + MOVD R3, LR // 7c6803a6 + MOVD R3, CTR // 7c6903a6 + MOVD R3, XER // 7c6103a6 + MOVD LR, R3 // 7c6802a6 + MOVD CTR, R3 // 7c6902a6 + MOVD XER, R3 // 7c6102a6 + MOVFL CR3, CR1 // 4c8c0000 -// LNOP freg comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - NOP F2 - -// LNOP ',' rreg // asm doesn't support the leading comma. -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - NOP R2 - -// LNOP ',' freg // asm doesn't support the leading comma. -// { -// outcode(int($1), &nullgen, 0, &$3); -// } - NOP F2 - -// LNOP imm // SYSCALL $num: load $num to R0 before syscall and restore R0 to 0 afterwards. -// { -// outcode(int($1), &$2, 0, &nullgen); -// } - NOP $4 - -// RET -// -// LRETRN comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - BEQ 2(PC) RET - -// More BR/BL cases, and canonical names JMP, CALL. - - BEQ 2(PC) - BR foo(SB) // JMP foo(SB) - BL foo(SB) // CALL foo(SB) - BEQ 2(PC) - JMP foo(SB) - CALL foo(SB) - RET foo(SB) - -// load-and-reserve -// L*AR (RB)(RA*1),EH,RT produces -// l*arx RT,RA,RB,EH -// -// Extended forms also accepted. Assumes RA=0, EH=0: -// L*AR (RB),RT -// L*AR (RB),EH,RT - LBAR (R4)(R3*1), $1, R5 - LBAR (R4), $0, R5 - LBAR (R3), R5 - LHAR (R4)(R3*1), $1, R5 - LHAR (R4), $0, R5 - LHAR (R3), R5 - LWAR (R4)(R3*1), $1, R5 - LWAR (R4), $0, R5 - LWAR (R3), R5 - LDAR (R4)(R3*1), $1, R5 - LDAR (R4), $0, R5 - LDAR (R3), R5 - -// END -// -// LEND comma // asm doesn't support the trailing comma. -// { -// outcode(int($1), &nullgen, 0, &nullgen); -// } - END diff --git a/src/cmd/asm/internal/asm/testdata/ppc64enc.s b/src/cmd/asm/internal/asm/testdata/ppc64enc.s deleted file mode 100644 index c6d7b59aad..0000000000 --- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Initial set of opcode combinations based on -// improvements to processing of constant -// operands. - -// Full set will be added at a later date. - -#include "../../../../../runtime/textflag.h" - -TEXT asmtest(SB),DUPOK|NOSPLIT,$0 - // move constants - MOVD $1, R3 // 38600001 - MOVD $-1, R4 // 3880ffff - MOVD $65535, R5 // 6005ffff - MOVD $65536, R6 // 64060001 - MOVD $-32767, R5 // 38a08001 - MOVD $-32768, R6 // 38c08000 - MOVD $1234567, R5 // 6405001260a5d687 - MOVW $1, R3 // 38600001 - MOVW $-1, R4 // 3880ffff - MOVW $65535, R5 // 6005ffff - MOVW $65536, R6 // 64060001 - MOVW $-32767, R5 // 38a08001 - MOVW $-32768, R6 // 38c08000 - MOVW $1234567, R5 // 6405001260a5d687 - MOVD 8(R3), R4 // e8830008 - MOVD (R3)(R4), R5 // 7ca4182a - MOVW 4(R3), R4 // e8830006 - MOVW (R3)(R4), R5 // 7ca41aaa - MOVWZ 4(R3), R4 // 80830004 - MOVWZ (R3)(R4), R5 // 7ca4182e - MOVH 4(R3), R4 // a8830004 - MOVH (R3)(R4), R5 // 7ca41aae - MOVHZ 2(R3), R4 // a0830002 - MOVHZ (R3)(R4), R5 // 7ca41a2e - MOVB 1(R3), R4 // 888300017c840774 - MOVB (R3)(R4), R5 // 7ca418ae7ca50774 - MOVBZ 1(R3), R4 // 88830001 - MOVBZ (R3)(R4), R5 // 7ca418ae - MOVDBR (R3)(R4), R5 // 7ca41c28 - MOVWBR (R3)(R4), R5 // 7ca41c2c - MOVHBR (R3)(R4), R5 // 7ca41e2c - - MOVDU 8(R3), R4 // e8830009 - MOVDU (R3)(R4), R5 // 7ca4186a - MOVWU (R3)(R4), R5 // 7ca41aea - MOVWZU 4(R3), R4 // 84830004 - MOVWZU (R3)(R4), R5 // 7ca4186e - MOVHU 2(R3), R4 // ac830002 - MOVHU (R3)(R4), R5 // 7ca41aee - MOVHZU 2(R3), R4 // a4830002 - MOVHZU (R3)(R4), R5 // 7ca41a6e - MOVBU 1(R3), R4 // 8c8300017c840774 - MOVBU (R3)(R4), R5 // 7ca418ee7ca50774 - MOVBZU 1(R3), R4 // 8c830001 - MOVBZU (R3)(R4), R5 // 7ca418ee - - MOVD R4, 8(R3) // f8830008 - MOVD R5, (R3)(R4) // 7ca4192a - MOVW R4, 4(R3) // 90830004 - MOVW R5, (R3)(R4) // 7ca4192e - MOVH R4, 2(R3) // b0830002 - MOVH R5, (R3)(R4) // 7ca41b2e - MOVB R4, 1(R3) // 98830001 - MOVB R5, (R3)(R4) // 7ca419ae - MOVDBR R5, (R3)(R4) // 7ca41d28 - MOVWBR R5, (R3)(R4) // 7ca41d2c - MOVHBR R5, (R3)(R4) // 7ca41f2c - - MOVDU R4, 8(R3) // f8830009 - MOVDU R5, (R3)(R4) // 7ca4196a - MOVWU R4, 4(R3) // 94830004 - MOVWU R5, (R3)(R4) // 7ca4196e - MOVHU R4, 2(R3) // b4830002 - MOVHU R5, (R3)(R4) // 7ca41b6e - MOVBU R4, 1(R3) // 9c830001 - MOVBU R5, (R3)(R4) // 7ca419ee - - ADD $1, R3 // 38630001 - ADD $1, R3, R4 // 38830001 - ADD $-1, R4 // 3884ffff - ADD $-1, R4, R5 // 38a4ffff - ADD $65535, R5 // 601fffff7cbf2a14 - ADD $65535, R5, R6 // 601fffff7cdf2a14 - ADD $65536, R6 // 3cc60001 - ADD $65536, R6, R7 // 3ce60001 - ADD $-32767, R5 // 38a58001 - ADD $-32767, R5, R4 // 38858001 - ADD $-32768, R6 // 38c68000 - ADD $-32768, R6, R5 // 38a68000 - ADD $1234567, R5 // 641f001263ffd6877cbf2a14 - ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 - ADDIS $8, R3 // 3c630008 - ADDIS $1000, R3, R4 // 3c8303e8 - - ANDCC $1, R3 // 70630001 - ANDCC $1, R3, R4 // 70640001 - ANDCC $-1, R4 // 3be0ffff7fe42039 - ANDCC $-1, R4, R5 // 3be0ffff7fe52039 - ANDCC $65535, R5 // 70a5ffff - ANDCC $65535, R5, R6 // 70a6ffff - ANDCC $65536, R6 // 74c60001 - ANDCC $65536, R6, R7 // 74c70001 - ANDCC $-32767, R5 // 3be080017fe52839 - ANDCC $-32767, R5, R4 // 3be080017fe42839 - ANDCC $-32768, R6 // 3be080007fe63039 - ANDCC $-32768, R5, R6 // 3be080007fe62839 - ANDCC $1234567, R5 // 641f001263ffd6877fe52839 - ANDCC $1234567, R5, R6 // 641f001263ffd6877fe62839 - ANDISCC $1, R3 // 74630001 - ANDISCC $1000, R3, R4 // 746403e8 - - OR $1, R3 // 60630001 - OR $1, R3, R4 // 60640001 - OR $-1, R4 // 3be0ffff7fe42378 - OR $-1, R4, R5 // 3be0ffff7fe52378 - OR $65535, R5 // 60a5ffff - OR $65535, R5, R6 // 60a6ffff - OR $65536, R6 // 64c60001 - OR $65536, R6, R7 // 64c70001 - OR $-32767, R5 // 3be080017fe52b78 - OR $-32767, R5, R6 // 3be080017fe62b78 - OR $-32768, R6 // 3be080007fe63378 - OR $-32768, R6, R7 // 3be080007fe73378 - OR $1234567, R5 // 641f001263ffd6877fe52b78 - OR $1234567, R5, R3 // 641f001263ffd6877fe32b78 - - XOR $1, R3 // 68630001 - XOR $1, R3, R4 // 68640001 - XOR $-1, R4 // 3be0ffff7fe42278 - XOR $-1, R4, R5 // 3be0ffff7fe52278 - XOR $65535, R5 // 68a5ffff - XOR $65535, R5, R6 // 68a6ffff - XOR $65536, R6 // 6cc60001 - XOR $65536, R6, R7 // 6cc70001 - XOR $-32767, R5 // 3be080017fe52a78 - XOR $-32767, R5, R6 // 3be080017fe62a78 - XOR $-32768, R6 // 3be080007fe63278 - XOR $-32768, R6, R7 // 3be080007fe73278 - XOR $1234567, R5 // 641f001263ffd6877fe52a78 - XOR $1234567, R5, R3 // 641f001263ffd6877fe32a78 - - // TODO: the order of CR operands don't match - CMP R3, R4 // 7c232000 - CMPU R3, R4 // 7c232040 - CMPW R3, R4 // 7c032000 - CMPWU R3, R4 // 7c032040 - - // TODO: constants for ADDC? - ADD R3, R4 // 7c841a14 - ADD R3, R4, R5 // 7ca41a14 - ADDC R3, R4 // 7c841814 - ADDC R3, R4, R5 // 7ca41814 - ADDE R3, R4 // 7c841914 - ADDECC R3, R4 // 7c841915 - ADDEV R3, R4 // 7c841d14 - ADDEVCC R3, R4 // 7c841d15 - ADDV R3, R4 // 7c841e14 - ADDVCC R3, R4 // 7c841e15 - ADDCCC R3, R4, R5 // 7ca41815 - ADDME R3, R4 // 7c8301d4 - ADDMECC R3, R4 // 7c8301d5 - ADDMEV R3, R4 // 7c8305d4 - ADDMEVCC R3, R4 // 7c8305d5 - ADDCV R3, R4 // 7c841c14 - ADDCVCC R3, R4 // 7c841c15 - ADDZE R3, R4 // 7c830194 - ADDZECC R3, R4 // 7c830195 - ADDZEV R3, R4 // 7c830594 - ADDZEVCC R3, R4 // 7c830595 - SUBME R3, R4 // 7c8301d0 - SUBMECC R3, R4 // 7c8301d1 - SUBMEV R3, R4 // 7c8305d0 - SUBZE R3, R4 // 7c830190 - SUBZECC R3, R4 // 7c830191 - SUBZEV R3, R4 // 7c830590 - SUBZEVCC R3, R4 // 7c830591 - - AND R3, R4 // 7c841838 - AND R3, R4, R5 // 7c851838 - ANDN R3, R4, R5 // 7c851878 - ANDCC R3, R4, R5 // 7c851839 - OR R3, R4 // 7c841b78 - OR R3, R4, R5 // 7c851b78 - ORN R3, R4, R5 // 7c851b38 - ORCC R3, R4, R5 // 7c851b79 - XOR R3, R4 // 7c841a78 - XOR R3, R4, R5 // 7c851a78 - XORCC R3, R4, R5 // 7c851a79 - NAND R3, R4, R5 // 7c851bb8 - NANDCC R3, R4, R5 // 7c851bb9 - EQV R3, R4, R5 // 7c851a38 - EQVCC R3, R4, R5 // 7c851a39 - NOR R3, R4, R5 // 7c8518f8 - NORCC R3, R4, R5 // 7c8518f9 - - SUB R3, R4 // 7c832050 - SUB R3, R4, R5 // 7ca32050 - SUBC R3, R4 // 7c832010 - SUBC R3, R4, R5 // 7ca32010 - - MULLW R3, R4 // 7c8419d6 - MULLW R3, R4, R5 // 7ca419d6 - MULLW $10, R3 // 1c63000a - MULLW $10000000, R3 // 641f009863ff96807c7f19d6 - MULLWCC R3, R4, R5 // 7ca419d7 - MULHW R3, R4, R5 // 7ca41896 - - MULHWU R3, R4, R5 // 7ca41816 - MULLD R3, R4 // 7c8419d2 - MULLD R4, R4, R5 // 7ca421d2 - MULLD $20, R4 // 1c840014 - MULLD $200000000, R4 // 641f0beb63ffc2007c9f21d2 - MULLDCC R3, R4, R5 // 7ca419d3 - MULHD R3, R4, R5 // 7ca41892 - MULHDCC R3, R4, R5 // 7ca41893 - - MULLWV R3, R4 // 7c841dd6 - MULLWV R3, R4, R5 // 7ca41dd6 - MULLWVCC R3, R4, R5 // 7ca41dd7 - MULHWUCC R3, R4, R5 // 7ca41817 - MULLDV R3, R4, R5 // 7ca41dd2 - MULLDVCC R3, R4, R5 // 7ca41dd3 - - DIVD R3,R4 // 7c841bd2 - DIVD R3, R4, R5 // 7ca41bd2 - DIVDCC R3,R4, R5 // 7ca41bd3 - DIVDU R3, R4, R5 // 7ca41b92 - DIVDV R3, R4, R5 // 7ca41fd2 - DIVDUCC R3, R4, R5 // 7ca41b93 - DIVDVCC R3, R4, R5 // 7ca41fd3 - DIVDUV R3, R4, R5 // 7ca41f92 - DIVDUVCC R3, R4, R5 // 7ca41f93 - DIVDE R3, R4, R5 // 7ca41b52 - DIVDECC R3, R4, R5 // 7ca41b53 - DIVDEU R3, R4, R5 // 7ca41b12 - DIVDEUCC R3, R4, R5 // 7ca41b13 - - REM R3, R4, R5 // 7fe41bd67fff19d67cbf2050 - REMU R3, R4, R5 // 7fe41b967fff19d67bff00287cbf2050 - REMD R3, R4, R5 // 7fe41bd27fff19d27cbf2050 - REMDU R3, R4, R5 // 7fe41b927fff19d27cbf2050 - - MODUD R3, R4, R5 // 7ca41a12 - MODUW R3, R4, R5 // 7ca41a16 - MODSD R3, R4, R5 // 7ca41e12 - MODSW R3, R4, R5 // 7ca41e16 - - SLW $8, R3, R4 // 5464402e - SLW R3, R4, R5 // 7c851830 - SLWCC R3, R4 // 7c841831 - SLD $16, R3, R4 // 786483e4 - SLD R3, R4, R5 // 7c851836 - SLDCC R3, R4 // 7c841837 - - SRW $8, R3, R4 // 5464c23e - SRW R3, R4, R5 // 7c851c30 - SRWCC R3, R4 // 7c841c31 - SRAW $8, R3, R4 // 7c644670 - SRAW R3, R4, R5 // 7c851e30 - SRAWCC R3, R4 // 7c841e31 - SRD $16, R3, R4 // 78648402 - SRD R3, R4, R5 // 7c851c36 - SRDCC R3, R4 // 7c841c37 - SRAD $16, R3, R4 // 7c648674 - SRAD R3, R4, R5 // 7c851e34 - SRDCC R3, R4 // 7c841c37 - ROTLW $16, R3, R4 // 5464803e - ROTLW R3, R4, R5 // 5c85183e - EXTSWSLI $3, R4, R5 // 7c851ef4 - RLWMI $7, R3, $65535, R6 // 50663c3e - RLWMICC $7, R3, $65535, R6 // 50663c3f - RLWNM $3, R4, $7, R6 // 54861f7e - RLWNMCC $3, R4, $7, R6 // 54861f7f - RLDMI $0, R4, $7, R6 // 7886076c - RLDMICC $0, R4, $7, R6 // 7886076d - RLDIMI $0, R4, $7, R6 // 788601cc - RLDIMICC $0, R4, $7, R6 // 788601cd - RLDC $0, R4, $15, R6 // 78860728 - RLDCCC $0, R4, $15, R6 // 78860729 - RLDCL $0, R4, $7, R6 // 78860770 - RLDCLCC $0, R4, $15, R6 // 78860721 - RLDCR $0, R4, $-16, R6 // 788606f2 - RLDCRCC $0, R4, $-16, R6 // 788606f3 - RLDICL $0, R4, $15, R6 // 788603c0 - RLDICLCC $0, R4, $15, R6 // 788603c1 - RLDICR $0, R4, $15, R6 // 788603c4 - RLDICRCC $0, R4, $15, R6 // 788603c5 - RLDIC $0, R4, $15, R6 // 788603c8 - RLDICCC $0, R4, $15, R6 // 788603c9 - CLRLSLWI $8, R5, $6, R4 // 54a430b2 - CLRLSLDI $24, R4, $4, R3 // 78832508 - - BEQ 0(PC) // 41820000 - BGE 0(PC) // 40800000 - BGT 4(PC) // 41810030 - BLE 0(PC) // 40810000 - BLT 0(PC) // 41800000 - BNE 0(PC) // 40820000 - JMP 8(PC) // 48000020 - - CRAND CR1, CR2, CR3 // 4c620a02 - CRANDN CR1, CR2, CR3 // 4c620902 - CREQV CR1, CR2, CR3 // 4c620a42 - CRNAND CR1, CR2, CR3 // 4c6209c2 - CRNOR CR1, CR2, CR3 // 4c620842 - CROR CR1, CR2, CR3 // 4c620b82 - CRORN CR1, CR2, CR3 // 4c620b42 - CRXOR CR1, CR2, CR3 // 4c620982 - - ISEL $1, R3, R4, R5 // 7ca3205e - ISEL $0, R3, R4, R5 // 7ca3201e - ISEL $2, R3, R4, R5 // 7ca3209e - ISEL $3, R3, R4, R5 // 7ca320de - ISEL $4, R3, R4, R5 // 7ca3211e - POPCNTB R3, R4 // 7c6400f4 - POPCNTW R3, R4 // 7c6402f4 - POPCNTD R3, R4 // 7c6403f4 - - PASTECC R3, R4 // 7c23270d - COPY R3, R4 // 7c23260c - - // load-and-reserve - LBAR (R4)(R3*1),$1,R5 // 7ca32069 - LBAR (R4),$0,R5 // 7ca02068 - LBAR (R3),R5 // 7ca01868 - LHAR (R4)(R3*1),$1,R5 // 7ca320e9 - LHAR (R4),$0,R5 // 7ca020e8 - LHAR (R3),R5 // 7ca018e8 - LWAR (R4)(R3*1),$1,R5 // 7ca32029 - LWAR (R4),$0,R5 // 7ca02028 - LWAR (R3),R5 // 7ca01828 - LDAR (R4)(R3*1),$1,R5 // 7ca320a9 - LDAR (R4),$0,R5 // 7ca020a8 - LDAR (R3),R5 // 7ca018a8 - - STBCCC R3, (R4)(R5) // 7c65256d - STWCCC R3, (R4)(R5) // 7c65212d - STDCCC R3, (R4)(R5) // 7c6521ad - STHCCC R3, (R4)(R5) - - SYNC // 7c0004ac - ISYNC // 4c00012c - LWSYNC // 7c2004ac - - DCBF (R3)(R4) // 7c0418ac - DCBI (R3)(R4) // 7c041bac - DCBST (R3)(R4) // 7c04186c - DCBZ (R3)(R4) // 7c041fec - DCBT (R3)(R4) // 7c041a2c - ICBI (R3)(R4) // 7c041fac - - // float constants - FMOVD $(0.0), F1 // f0210cd0 - FMOVD $(-0.0), F1 // f0210cd0fc200850 - - FMOVD 8(R3), F1 // c8230008 - FMOVD (R3)(R4), F1 // 7c241cae - FMOVDU 8(R3), F1 // cc230008 - FMOVDU (R3)(R4), F1 // 7c241cee - FMOVS 4(R3), F1 // c0230004 - FMOVS (R3)(R4), F1 // 7c241c2e - FMOVSU 4(R3), F1 // c4230004 - FMOVSU (R3)(R4), F1 // 7c241c6e - - FMOVD F1, 8(R3) // d8230008 - FMOVD F1, (R3)(R4) // 7c241dae - FMOVDU F1, 8(R3) // dc230008 - FMOVDU F1, (R3)(R4) // 7c241dee - FMOVS F1, 4(R3) // d0230004 - FMOVS F1, (R3)(R4) // 7c241d2e - FMOVSU F1, 4(R3) // d4230004 - FMOVSU F1, (R3)(R4) // 7c241d6e - FADD F1, F2 // fc42082a - FADD F1, F2, F3 // fc62082a - FADDCC F1, F2, F3 // fc62082b - FADDS F1, F2 // ec42082a - FADDS F1, F2, F3 // ec62082a - FADDSCC F1, F2, F3 // ec62082b - FSUB F1, F2 // fc420828 - FSUB F1, F2, F3 // fc620828 - FSUBCC F1, F2, F3 // fc620829 - FSUBS F1, F2 // ec420828 - FSUBS F1, F2, F3 // ec620828 - FSUBCC F1, F2, F3 // fc620829 - FMUL F1, F2 // fc420072 - FMUL F1, F2, F3 // fc620072 - FMULCC F1, F2, F3 // fc620073 - FMULS F1, F2 // ec420072 - FMULS F1, F2, F3 // ec620072 - FMULSCC F1, F2, F3 // ec620073 - FDIV F1, F2 // fc420824 - FDIV F1, F2, F3 // fc620824 - FDIVCC F1, F2, F3 // fc620825 - FDIVS F1, F2 // ec420824 - FDIVS F1, F2, F3 // ec620824 - FDIVSCC F1, F2, F3 // ec620825 - FMADD F1, F2, F3, F4 // fc8110fa - FMADDCC F1, F2, F3, F4 // fc8110fb - FMADDS F1, F2, F3, F4 // ec8110fa - FMADDSCC F1, F2, F3, F4 // ec8110fb - FMSUB F1, F2, F3, F4 // fc8110f8 - FMSUBCC F1, F2, F3, F4 // fc8110f9 - FMSUBS F1, F2, F3, F4 // ec8110f8 - FMSUBSCC F1, F2, F3, F4 // ec8110f9 - FNMADD F1, F2, F3, F4 // fc8110fe - FNMADDCC F1, F2, F3, F4 // fc8110ff - FNMADDS F1, F2, F3, F4 // ec8110fe - FNMADDSCC F1, F2, F3, F4 // ec8110ff - FNMSUB F1, F2, F3, F4 // fc8110fc - FNMSUBCC F1, F2, F3, F4 // fc8110fd - FNMSUBS F1, F2, F3, F4 // ec8110fc - FNMSUBSCC F1, F2, F3, F4 // ec8110fd - FSEL F1, F2, F3, F4 // fc8110ee - FSELCC F1, F2, F3, F4 // fc8110ef - FABS F1, F2 // fc400a10 - FABSCC F1, F2 // fc400a11 - FNEG F1, F2 // fc400850 - FABSCC F1, F2 // fc400a11 - FRSP F1, F2 // fc400818 - FRSPCC F1, F2 // fc400819 - FCTIW F1, F2 // fc40081c - FCTIWCC F1, F2 // fc40081d - FCTIWZ F1, F2 // fc40081e - FCTIWZCC F1, F2 // fc40081f - FCTID F1, F2 // fc400e5c - FCTIDCC F1, F2 // fc400e5d - FCTIDZ F1, F2 // fc400e5e - FCTIDZCC F1, F2 // fc400e5f - FCFID F1, F2 // fc400e9c - FCFIDCC F1, F2 // fc400e9d - FCFIDU F1, F2 // fc400f9c - FCFIDUCC F1, F2 // fc400f9d - FCFIDS F1, F2 // ec400e9c - FCFIDSCC F1, F2 // ec400e9d - FRES F1, F2 // ec400830 - FRESCC F1, F2 // ec400831 - FRIM F1, F2 // fc400bd0 - FRIMCC F1, F2 // fc400bd1 - FRIP F1, F2 // fc400b90 - FRIPCC F1, F2 // fc400b91 - FRIZ F1, F2 // fc400b50 - FRIZCC F1, F2 // fc400b51 - FRIN F1, F2 // fc400b10 - FRINCC F1, F2 // fc400b11 - FRSQRTE F1, F2 // fc400834 - FRSQRTECC F1, F2 // fc400835 - FSQRT F1, F2 // fc40082c - FSQRTCC F1, F2 // fc40082d - FSQRTS F1, F2 // ec40082c - FSQRTSCC F1, F2 // ec40082d - FCPSGN F1, F2 // fc420810 - FCPSGNCC F1, F2 // fc420811 - FCMPO F1, F2 // fc011040 - FCMPU F1, F2 // fc011000 - LVX (R3)(R4), V1 // 7c2418ce - LVXL (R3)(R4), V1 // 7c241ace - LVSL (R3)(R4), V1 // 7c24180c - LVSR (R3)(R4), V1 // 7c24184c - LVEBX (R3)(R4), V1 // 7c24180e - LVEHX (R3)(R4), V1 // 7c24184e - LVEWX (R3)(R4), V1 // 7c24188e - STVX V1, (R3)(R4) // 7c2419ce - STVXL V1, (R3)(R4) // 7c241bce - STVEBX V1, (R3)(R4) // 7c24190e - STVEHX V1, (R3)(R4) // 7c24194e - STVEWX V1, (R3)(R4) // 7c24198e - - VAND V1, V2, V3 // 10611404 - VANDC V1, V2, V3 // 10611444 - VNAND V1, V2, V3 // 10611584 - VOR V1, V2, V3 // 10611484 - VORC V1, V2, V3 // 10611544 - VXOR V1, V2, V3 // 106114c4 - VNOR V1, V2, V3 // 10611504 - VEQV V1, V2, V3 // 10611684 - VADDUBM V1, V2, V3 // 10611000 - VADDUHM V1, V2, V3 // 10611040 - VADDUWM V1, V2, V3 // 10611080 - VADDUDM V1, V2, V3 // 106110c0 - VADDUQM V1, V2, V3 // 10611100 - VADDCUQ V1, V2, V3 // 10611140 - VADDCUW V1, V2, V3 // 10611180 - VADDUBS V1, V2, V3 // 10611200 - VADDUHS V1, V2, V3 // 10611240 - VADDUWS V1, V2, V3 // 10611280 - VSUBUBM V1, V2, V3 // 10611400 - VSUBUHM V1, V2, V3 // 10611440 - VSUBUWM V1, V2, V3 // 10611480 - VSUBUDM V1, V2, V3 // 106114c0 - VSUBUQM V1, V2, V3 // 10611500 - VSUBCUQ V1, V2, V3 // 10611540 - VSUBCUW V1, V2, V3 // 10611580 - VSUBUBS V1, V2, V3 // 10611600 - VSUBUHS V1, V2, V3 // 10611640 - VSUBUWS V1, V2, V3 // 10611680 - VSUBSBS V1, V2, V3 // 10611700 - VSUBSHS V1, V2, V3 // 10611740 - VSUBSWS V1, V2, V3 // 10611780 - VSUBEUQM V1, V2, V3, V4 // 108110fe - VSUBECUQ V1, V2, V3, V4 // 108110ff - VMULESB V1, V2, V3 // 10611308 - VMULOSB V1, V2, V3 // 10611108 - VMULEUB V1, V2, V3 // 10611208 - VMULOUB V1, V2, V3 // 10611008 - VMULESH V1, V2, V3 // 10611348 - VMULOSH V1, V2, V3 // 10611148 - VMULEUH V1, V2, V3 // 10611248 - VMULOUH V1, V2, V3 // 10611048 - VMULESH V1, V2, V3 // 10611348 - VMULOSW V1, V2, V3 // 10611188 - VMULEUW V1, V2, V3 // 10611288 - VMULOUW V1, V2, V3 // 10611088 - VMULUWM V1, V2, V3 // 10611089 - VPMSUMB V1, V2, V3 // 10611408 - VPMSUMH V1, V2, V3 // 10611448 - VPMSUMW V1, V2, V3 // 10611488 - VPMSUMD V1, V2, V3 // 106114c8 - VMSUMUDM V1, V2, V3, V4 // 108110e3 - VRLB V1, V2, V3 // 10611004 - VRLH V1, V2, V3 // 10611044 - VRLW V1, V2, V3 // 10611084 - VRLD V1, V2, V3 // 106110c4 - VSLB V1, V2, V3 // 10611104 - VSLH V1, V2, V3 // 10611144 - VSLW V1, V2, V3 // 10611184 - VSL V1, V2, V3 // 106111c4 - VSLO V1, V2, V3 // 1061140c - VSRB V1, V2, V3 // 10611204 - VSRH V1, V2, V3 // 10611244 - VSRW V1, V2, V3 // 10611284 - VSR V1, V2, V3 // 106112c4 - VSRO V1, V2, V3 // 1061144c - VSLD V1, V2, V3 // 106115c4 - VSRAB V1, V2, V3 // 10611304 - VSRAH V1, V2, V3 // 10611344 - VSRAW V1, V2, V3 // 10611384 - VSRAD V1, V2, V3 // 106113c4 - VSLDOI $3, V1, V2, V3 // 106110ec - VCLZB V1, V2 // 10400f02 - VCLZH V1, V2 // 10400f42 - VCLZW V1, V2 // 10400f82 - VCLZD V1, V2 // 10400fc2 - VPOPCNTB V1, V2 // 10400f03 - VPOPCNTH V1, V2 // 10400f43 - VPOPCNTW V1, V2 // 10400f83 - VPOPCNTD V1, V2 // 10400fc3 - VCMPEQUB V1, V2, V3 // 10611006 - VCMPEQUBCC V1, V2, V3 // 10611406 - VCMPEQUH V1, V2, V3 // 10611046 - VCMPEQUHCC V1, V2, V3 // 10611446 - VCMPEQUW V1, V2, V3 // 10611086 - VCMPEQUWCC V1, V2, V3 // 10611486 - VCMPEQUD V1, V2, V3 // 106110c7 - VCMPEQUDCC V1, V2, V3 // 106114c7 - VCMPGTUB V1, V2, V3 // 10611206 - VCMPGTUBCC V1, V2, V3 // 10611606 - VCMPGTUH V1, V2, V3 // 10611246 - VCMPGTUHCC V1, V2, V3 // 10611646 - VCMPGTUW V1, V2, V3 // 10611286 - VCMPGTUWCC V1, V2, V3 // 10611686 - VCMPGTUD V1, V2, V3 // 106112c7 - VCMPGTUDCC V1, V2, V3 // 106116c7 - VCMPGTSB V1, V2, V3 // 10611306 - VCMPGTSBCC V1, V2, V3 // 10611706 - VCMPGTSH V1, V2, V3 // 10611346 - VCMPGTSHCC V1, V2, V3 // 10611746 - VCMPGTSW V1, V2, V3 // 10611386 - VCMPGTSWCC V1, V2, V3 // 10611786 - VCMPGTSD V1, V2, V3 // 106113c7 - VCMPGTSDCC V1, V2, V3 // 106117c7 - VCMPNEZB V1, V2, V3 // 10611107 - VCMPNEZBCC V1, V2, V3 // 10611507 - VCMPNEB V1, V2, V3 // 10611007 - VCMPNEBCC V1, V2, V3 // 10611407 - VCMPNEH V1, V2, V3 // 10611047 - VCMPNEHCC V1, V2, V3 // 10611447 - VCMPNEW V1, V2, V3 // 10611087 - VCMPNEWCC V1, V2, V3 // 10611487 - VPERM V1, V2, V3, V4 // 108110eb - VPERMR V1, V2, V3, V4 // 108110fb - VPERMXOR V1, V2, V3, V4 // 108110ed - VBPERMQ V1, V2, V3 // 1061154c - VBPERMD V1, V2, V3 // 106115cc - VSEL V1, V2, V3, V4 // 108110ea - VSPLTB $1, V1, V2 // 10410a0c - VSPLTH $1, V1, V2 // 10410a4c - VSPLTW $1, V1, V2 // 10410a8c - VSPLTISB $1, V1 // 1021030c - VSPLTISW $1, V1 // 1021038c - VSPLTISH $1, V1 // 1021034c - VCIPHER V1, V2, V3 // 10611508 - VCIPHERLAST V1, V2, V3 // 10611509 - VNCIPHER V1, V2, V3 // 10611548 - VNCIPHERLAST V1, V2, V3 // 10611549 - VSBOX V1, V2 // 104105c8 - VSHASIGMAW $1, V1, $15, V2 // 10418e82 - VSHASIGMAD $2, V1, $15, V2 // 104196c2 - - LXVD2X (R3)(R4), VS1 // 7c241e98 - LXV 16(R3), VS1 // f4230011 - LXVL R3, R4, VS1 // 7c23221a - LXVLL R3, R4, VS1 // 7c23225a - LXVX R3, R4, VS1 // 7c232218 - LXSDX (R3)(R4), VS1 // 7c241c98 - STXVD2X VS1, (R3)(R4) // 7c241f98 - STXV VS1,16(R3) // f4230015 - STXVL VS1, R3, R4 // 7c23231a - STXVLL VS1, R3, R4 // 7c23235a - STXVX VS1, R3, R4 // 7c232318 - STXSDX VS1, (R3)(R4) // 7c241d98 - LXSIWAX (R3)(R4), VS1 // 7c241898 - STXSIWX VS1, (R3)(R4) // 7c241918 - MFVSRD VS1, R3 // 7c230066 - MTVSRD R3, VS1 // 7c230166 - XXLAND VS1, VS2, VS3 // f0611410 - XXLOR VS1, VS2, VS3 // f0611490 - XXLORC VS1, VS2, VS3 // f0611550 - XXLXOR VS1, VS2, VS3 // f06114d0 - XXSEL VS1, VS2, VS3, VS4 // f08110f0 - XXMRGHW VS1, VS2, VS3 // f0611090 - XXSPLTW VS1, $1, VS2 // f0410a90 - XXPERM VS1, VS2, VS3 // f06110d0 - XXSLDWI VS1, VS2, $1, VS3 // f0611110 - XSCVDPSP VS1, VS2 // f0400c24 - XVCVDPSP VS1, VS2 // f0400e24 - XSCVSXDDP VS1, VS2 // f0400de0 - XVCVDPSXDS VS1, VS2 // f0400f60 - XVCVSXDDP VS1, VS2 // f0400fe0 - - MOVD R3, LR // 7c6803a6 - MOVD R3, CTR // 7c6903a6 - MOVD R3, XER // 7c6103a6 - MOVD LR, R3 // 7c6802a6 - MOVD CTR, R3 // 7c6902a6 - MOVD XER, R3 // 7c6102a6 - MOVFL CR3, CR1 // 4c8c0000 - - RET diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index dcabb3cd6a..090fefb4d8 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -2159,7 +2159,7 @@ func AOP_DQ(op uint32, d uint32, a uint32, b uint32) uint32 { /* Z23-form, 3-register operands + CY field */ func AOP_Z23I(op uint32, d uint32, a uint32, b uint32, c uint32) uint32 { - return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<7 + return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 | (c&3)<<9 } /* X-form, 3-register operands + EH field */ -- cgit v1.3 From dd58239dd20f7002b3e219a477514b91dd0cc5fc Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Wed, 7 Oct 2020 12:31:05 -0400 Subject: cmd/asm: allow def/ref of func ABI when compiling runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function symbols defined and referenced by assembly source currently always default to ABI0; this patch adds preliminary support for accepting an explicit ABI selector clause for func defs/refs. This functionality is currently only enabled when compiling runtime-related packages (runtime, syscall, reflect). Examples: TEXT ·DefinedAbi0Symbol(SB),NOSPLIT,$0 RET TEXT ·DefinedAbi1Symbol(SB),NOSPLIT,$0 CALL ·AbiZerolSym(SB) ... JMP ·AbiInternalSym(SB) RET Also included is a small change to the code in the compiler that reads the symabis file emitted by the assembler. New behavior is currently gated under GOEXPERIMENT=regabi. Updates #27539, #40724. Change-Id: Ia22221fe26df0fa002191cfb13bdfaaa38d7df38 Reviewed-on: https://go-review.googlesource.com/c/go/+/260477 Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Than McIntosh --- src/cmd/asm/internal/asm/endtoend_test.go | 4 +- src/cmd/asm/internal/asm/expr_test.go | 4 +- src/cmd/asm/internal/asm/line_test.go | 2 +- src/cmd/asm/internal/asm/operand_test.go | 24 ++++- src/cmd/asm/internal/asm/parse.go | 148 ++++++++++++++++++++---------- src/cmd/asm/internal/asm/pseudo_test.go | 1 + src/cmd/asm/main.go | 3 +- src/cmd/compile/internal/gc/main.go | 11 ++- src/cmd/internal/obj/link.go | 14 +++ src/cmd/internal/obj/util.go | 39 +++++++- src/cmd/internal/objabi/path.go | 22 +++++ 11 files changed, 207 insertions(+), 65 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index decf5391db..989b7a5405 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -31,7 +31,7 @@ func testEndToEnd(t *testing.T, goarch, file string) { architecture, ctxt := setArch(goarch) architecture.Init(ctxt) lexer := lex.NewLexer(input) - parser := NewParser(ctxt, architecture, lexer) + parser := NewParser(ctxt, architecture, lexer, false) pList := new(obj.Plist) var ok bool testOut = new(bytes.Buffer) // The assembler writes test output to this buffer. @@ -273,7 +273,7 @@ func testErrors(t *testing.T, goarch, file string) { input := filepath.Join("testdata", file+".s") architecture, ctxt := setArch(goarch) lexer := lex.NewLexer(input) - parser := NewParser(ctxt, architecture, lexer) + parser := NewParser(ctxt, architecture, lexer, false) pList := new(obj.Plist) var ok bool testOut = new(bytes.Buffer) // The assembler writes test output to this buffer. diff --git a/src/cmd/asm/internal/asm/expr_test.go b/src/cmd/asm/internal/asm/expr_test.go index 1251594349..e9c92df1f3 100644 --- a/src/cmd/asm/internal/asm/expr_test.go +++ b/src/cmd/asm/internal/asm/expr_test.go @@ -57,7 +57,7 @@ var exprTests = []exprTest{ } func TestExpr(t *testing.T) { - p := NewParser(nil, nil, nil) // Expression evaluation uses none of these fields of the parser. + p := NewParser(nil, nil, nil, false) // Expression evaluation uses none of these fields of the parser. for i, test := range exprTests { p.start(lex.Tokenize(test.input)) result := int64(p.expr()) @@ -113,7 +113,7 @@ func TestBadExpr(t *testing.T) { } func runBadTest(i int, test badExprTest, t *testing.T) (err error) { - p := NewParser(nil, nil, nil) // Expression evaluation uses none of these fields of the parser. + p := NewParser(nil, nil, nil, false) // Expression evaluation uses none of these fields of the parser. p.start(lex.Tokenize(test.input)) return tryParse(t, func() { p.expr() diff --git a/src/cmd/asm/internal/asm/line_test.go b/src/cmd/asm/internal/asm/line_test.go index 01b058bd95..da857ced3a 100644 --- a/src/cmd/asm/internal/asm/line_test.go +++ b/src/cmd/asm/internal/asm/line_test.go @@ -39,7 +39,7 @@ func testBadInstParser(t *testing.T, goarch string, tests []badInstTest) { for i, test := range tests { arch, ctxt := setArch(goarch) tokenizer := lex.NewTokenizer("", strings.NewReader(test.input+"\n"), nil) - parser := NewParser(ctxt, arch, tokenizer) + parser := NewParser(ctxt, arch, tokenizer, false) err := tryParse(t, func() { parser.Parse() diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go index f187d0b166..2e83e176b2 100644 --- a/src/cmd/asm/internal/asm/operand_test.go +++ b/src/cmd/asm/internal/asm/operand_test.go @@ -28,7 +28,7 @@ func setArch(goarch string) (*arch.Arch, *obj.Link) { func newParser(goarch string) *Parser { architecture, ctxt := setArch(goarch) - return NewParser(ctxt, architecture, nil) + return NewParser(ctxt, architecture, nil, false) } // tryParse executes parse func in panicOnError=true context. @@ -75,7 +75,12 @@ func testOperandParser(t *testing.T, parser *Parser, tests []operandTest) { parser.start(lex.Tokenize(test.input)) addr := obj.Addr{} parser.operand(&addr) - result := obj.Dconv(&emptyProg, &addr) + var result string + if parser.compilingRuntime { + result = obj.DconvWithABIDetail(&emptyProg, &addr) + } else { + result = obj.Dconv(&emptyProg, &addr) + } if result != test.output { t.Errorf("fail at %s: got %s; expected %s\n", test.input, result, test.output) } @@ -86,6 +91,9 @@ func TestAMD64OperandParser(t *testing.T) { parser := newParser("amd64") testOperandParser(t, parser, amd64OperandTests) testBadOperandParser(t, parser, amd64BadOperandTests) + parser.compilingRuntime = true + testOperandParser(t, parser, amd64RuntimeOperandTests) + testBadOperandParser(t, parser, amd64BadOperandRuntimeTests) } func Test386OperandParser(t *testing.T) { @@ -141,7 +149,7 @@ func TestFuncAddress(t *testing.T) { parser := newParser(sub.arch) for _, test := range sub.tests { parser.start(lex.Tokenize(test.input)) - name, ok := parser.funcAddress() + name, _, ok := parser.funcAddress() isFuncSym := strings.HasSuffix(test.input, "(SB)") && // Ignore static symbols. @@ -298,6 +306,11 @@ var amd64OperandTests = []operandTest{ {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms. } +var amd64RuntimeOperandTests = []operandTest{ + {"$bar(SB)", "$bar(SB)"}, + {"$foo(SB)", "$foo(SB)"}, +} + var amd64BadOperandTests = []badOperandTest{ {"[", "register list: expected ']', found EOF"}, {"[4", "register list: bad low register in `[4`"}, @@ -311,6 +324,11 @@ var amd64BadOperandTests = []badOperandTest{ {"[X0-X1-X2]", "register list: expected ']' after `[X0-X1`, found '-'"}, {"[X0,X3]", "register list: expected '-' after `[X0`, found ','"}, {"[X0,X1,X2,X3]", "register list: expected '-' after `[X0`, found ','"}, + {"$foo", "ABI selector only permitted when compiling runtime, reference was to \"foo\""}, +} + +var amd64BadOperandRuntimeTests = []badOperandTest{ + {"$foo", "malformed ABI selector \"bletch\" in reference to \"foo\""}, } var x86OperandTests = []operandTest{ diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go index d9dbd92cb0..154cf9c7a7 100644 --- a/src/cmd/asm/internal/asm/parse.go +++ b/src/cmd/asm/internal/asm/parse.go @@ -25,25 +25,26 @@ import ( ) type Parser struct { - lex lex.TokenReader - lineNum int // Line number in source file. - errorLine int // Line number of last error. - errorCount int // Number of errors. - sawCode bool // saw code in this file (as opposed to comments and blank lines) - pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. - input []lex.Token - inputPos int - pendingLabels []string // Labels to attach to next instruction. - labels map[string]*obj.Prog - toPatch []Patch - addr []obj.Addr - arch *arch.Arch - ctxt *obj.Link - firstProg *obj.Prog - lastProg *obj.Prog - dataAddr map[string]int64 // Most recent address for DATA for this symbol. - isJump bool // Instruction being assembled is a jump. - errorWriter io.Writer + lex lex.TokenReader + lineNum int // Line number in source file. + errorLine int // Line number of last error. + errorCount int // Number of errors. + sawCode bool // saw code in this file (as opposed to comments and blank lines) + pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. + input []lex.Token + inputPos int + pendingLabels []string // Labels to attach to next instruction. + labels map[string]*obj.Prog + toPatch []Patch + addr []obj.Addr + arch *arch.Arch + ctxt *obj.Link + firstProg *obj.Prog + lastProg *obj.Prog + dataAddr map[string]int64 // Most recent address for DATA for this symbol. + isJump bool // Instruction being assembled is a jump. + compilingRuntime bool + errorWriter io.Writer } type Patch struct { @@ -51,14 +52,15 @@ type Patch struct { label string } -func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { +func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader, compilingRuntime bool) *Parser { return &Parser{ - ctxt: ctxt, - arch: ar, - lex: lexer, - labels: make(map[string]*obj.Prog), - dataAddr: make(map[string]int64), - errorWriter: os.Stderr, + ctxt: ctxt, + arch: ar, + lex: lexer, + labels: make(map[string]*obj.Prog), + dataAddr: make(map[string]int64), + errorWriter: os.Stderr, + compilingRuntime: compilingRuntime, } } @@ -310,8 +312,8 @@ func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) { // Defines text symbol in operands[0]. if len(operands) > 0 { p.start(operands[0]) - if name, ok := p.funcAddress(); ok { - fmt.Fprintf(w, "def %s ABI0\n", name) + if name, abi, ok := p.funcAddress(); ok { + fmt.Fprintf(w, "def %s %s\n", name, abi) } } return @@ -329,8 +331,8 @@ func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) { // Search for symbol references. for _, op := range operands { p.start(op) - if name, ok := p.funcAddress(); ok { - fmt.Fprintf(w, "ref %s ABI0\n", name) + if name, abi, ok := p.funcAddress(); ok { + fmt.Fprintf(w, "ref %s %s\n", name, abi) } } } @@ -765,20 +767,19 @@ func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) { case '*': a.Type = obj.TYPE_INDIR } - // Weirdness with statics: Might now have "<>". - isStatic := false - if p.peek() == '<' { - isStatic = true - p.next() - p.get('>') - } + + // Parse optional <> (indicates a static symbol) or + // (selecting text symbol with specific ABI). + doIssueError := true + isStatic, abi := p.symRefAttrs(name, doIssueError) + if p.peek() == '+' || p.peek() == '-' { a.Offset = int64(p.expr()) } if isStatic { a.Sym = p.ctxt.LookupStatic(name) } else { - a.Sym = p.ctxt.Lookup(name) + a.Sym = p.ctxt.LookupABI(name, abi) } if p.peek() == scanner.EOF { if prefix == 0 && p.isJump { @@ -823,12 +824,60 @@ func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, pr } } +// symRefAttrs parses an optional function symbol attribute clause for +// the function symbol 'name', logging an error for a malformed +// attribute clause if 'issueError' is true. The return value is a +// (boolean, ABI) pair indicating that the named symbol is either +// static or a particular ABI specification. +// +// The expected form of the attribute clause is: +// +// empty, yielding (false, obj.ABI0) +// "<>", yielding (true, obj.ABI0) +// "" yielding (false, obj.ABI0) +// "" yielding (false, obj.ABIInternal) +// +// Anything else beginning with "<" logs an error if issueError is +// true, otherwise returns (false, obj.ABI0). +// +func (p *Parser) symRefAttrs(name string, issueError bool) (bool, obj.ABI) { + abi := obj.ABI0 + isStatic := false + if p.peek() != '<' { + return isStatic, abi + } + p.next() + tok := p.peek() + if tok == '>' { + isStatic = true + } else if tok == scanner.Ident { + abistr := p.get(scanner.Ident).String() + if !p.compilingRuntime { + if issueError { + p.errorf("ABI selector only permitted when compiling runtime, reference was to %q", name) + } + } else { + theabi, valid := obj.ParseABI(abistr) + if !valid { + if issueError { + p.errorf("malformed ABI selector %q in reference to %q", + abistr, name) + } + } else { + abi = theabi + } + } + } + p.get('>') + return isStatic, abi +} + // funcAddress parses an external function address. This is a // constrained form of the operand syntax that's always SB-based, // non-static, and has at most a simple integer offset: // -// [$|*]sym[+Int](SB) -func (p *Parser) funcAddress() (string, bool) { +// [$|*]sym[][+Int](SB) +func (p *Parser) funcAddress() (string, obj.ABI, bool) { switch p.peek() { case '$', '*': // Skip prefix. @@ -838,25 +887,32 @@ func (p *Parser) funcAddress() (string, bool) { tok := p.next() name := tok.String() if tok.ScanToken != scanner.Ident || p.atStartOfRegister(name) { - return "", false + return "", obj.ABI0, false + } + // Parse optional <> (indicates a static symbol) or + // (selecting text symbol with specific ABI). + noErrMsg := false + isStatic, abi := p.symRefAttrs(name, noErrMsg) + if isStatic { + return "", obj.ABI0, false // This function rejects static symbols. } tok = p.next() if tok.ScanToken == '+' { if p.next().ScanToken != scanner.Int { - return "", false + return "", obj.ABI0, false } tok = p.next() } if tok.ScanToken != '(' { - return "", false + return "", obj.ABI0, false } if reg := p.next(); reg.ScanToken != scanner.Ident || reg.String() != "SB" { - return "", false + return "", obj.ABI0, false } if p.next().ScanToken != ')' || p.peek() != scanner.EOF { - return "", false + return "", obj.ABI0, false } - return name, true + return name, abi, true } // registerIndirect parses the general form of a register indirection. diff --git a/src/cmd/asm/internal/asm/pseudo_test.go b/src/cmd/asm/internal/asm/pseudo_test.go index 100bef91cf..622ee25ce7 100644 --- a/src/cmd/asm/internal/asm/pseudo_test.go +++ b/src/cmd/asm/internal/asm/pseudo_test.go @@ -37,6 +37,7 @@ func TestErroneous(t *testing.T) { {"TEXT", "$0É:0, 0, $1", "expected end of operand, found É"}, // Issue #12467. {"TEXT", "$:0:(SB, 0, $1", "expected '(', found 0"}, // Issue 12468. {"TEXT", "@B(SB),0,$0", "expected '(', found B"}, // Issue 23580. + {"TEXT", "foo(SB),0", "ABI selector only permitted when compiling runtime, reference was to \"foo\""}, {"FUNCDATA", "", "expect two operands for FUNCDATA"}, {"FUNCDATA", "(SB ", "expect two operands for FUNCDATA"}, {"DATA", "", "expect two operands for DATA"}, diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index fd079a2ccd..01c963ac72 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -52,6 +52,7 @@ func main() { case "all", "ret": ctxt.Retpoline = true } + compilingRuntime := objabi.IsRuntimePackagePath(*flags.Importpath) ctxt.Bso = bufio.NewWriter(os.Stdout) defer ctxt.Bso.Flush() @@ -74,7 +75,7 @@ func main() { var failedFile string for _, f := range flag.Args() { lexer := lex.NewLexer(f) - parser := asm.NewParser(ctxt, architecture, lexer) + parser := asm.NewParser(ctxt, architecture, lexer, compilingRuntime) ctxt.DiagFunc = func(format string, args ...interface{}) { diag = true log.Printf(format, args...) diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index e4e4ce72fd..2fffe625cd 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -968,9 +968,10 @@ func readSymABIs(file, myimportpath string) { if len(parts) != 3 { log.Fatalf(`%s:%d: invalid symabi: syntax is "%s sym abi"`, file, lineNum, parts[0]) } - sym, abi := parts[1], parts[2] - if abi != "ABI0" { // Only supported external ABI right now - log.Fatalf(`%s:%d: invalid symabi: unknown abi "%s"`, file, lineNum, abi) + sym, abistr := parts[1], parts[2] + abi, valid := obj.ParseABI(abistr) + if !valid { + log.Fatalf(`%s:%d: invalid symabi: unknown abi "%s"`, file, lineNum, abistr) } // If the symbol is already prefixed with @@ -983,9 +984,9 @@ func readSymABIs(file, myimportpath string) { // Record for later. if parts[0] == "def" { - symabiDefs[sym] = obj.ABI0 + symabiDefs[sym] = abi } else { - symabiRefs[sym] = obj.ABI0 + symabiRefs[sym] = abi } default: log.Fatalf(`%s:%d: invalid symabi type "%s"`, file, lineNum, parts[0]) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ae85dbbe4e..ad4708138f 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -502,6 +502,20 @@ const ( ABICount ) +// ParseABI converts from a string representation in 'abistr' to the +// corresponding ABI value. Second return value is TRUE if the +// abi string is recognized, FALSE otherwise. +func ParseABI(abistr string) (ABI, bool) { + switch abistr { + default: + return ABI0, false + case "ABI0": + return ABI0, true + case "ABIInternal": + return ABIInternal, true + } +} + // Attribute is a set of symbol attributes. type Attribute uint32 diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go index a30ccf0564..21e28807a6 100644 --- a/src/cmd/internal/obj/util.go +++ b/src/cmd/internal/obj/util.go @@ -210,13 +210,30 @@ func (ctxt *Link) CanReuseProgs() bool { return ctxt.Debugasm == 0 } +// Dconv accepts an argument 'a' within a prog 'p' and returns a string +// with a formatted version of the argument. func Dconv(p *Prog, a *Addr) string { buf := new(bytes.Buffer) - WriteDconv(buf, p, a) + writeDconv(buf, p, a, false) return buf.String() } +// DconvDconvWithABIDetail accepts an argument 'a' within a prog 'p' +// and returns a string with a formatted version of the argument, in +// which text symbols are rendered with explicit ABI selectors. +func DconvWithABIDetail(p *Prog, a *Addr) string { + buf := new(bytes.Buffer) + writeDconv(buf, p, a, true) + return buf.String() +} + +// WriteDconv accepts an argument 'a' within a prog 'p' +// and writes a formatted version of the arg to the writer. func WriteDconv(w io.Writer, p *Prog, a *Addr) { + writeDconv(w, p, a, false) +} + +func writeDconv(w io.Writer, p *Prog, a *Addr, abiDetail bool) { switch a.Type { default: fmt.Fprintf(w, "type=%d", a.Type) @@ -250,7 +267,7 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { case TYPE_BRANCH: if a.Sym != nil { - fmt.Fprintf(w, "%s(SB)", a.Sym.Name) + fmt.Fprintf(w, "%s%s(SB)", a.Sym.Name, abiDecorate(a, abiDetail)) } else if a.Target() != nil { fmt.Fprint(w, a.Target().Pc) } else { @@ -259,7 +276,7 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { case TYPE_INDIR: io.WriteString(w, "*") - a.WriteNameTo(w) + a.writeNameTo(w, abiDetail) case TYPE_MEM: a.WriteNameTo(w) @@ -299,7 +316,7 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { case TYPE_ADDR: io.WriteString(w, "$") - a.WriteNameTo(w) + a.writeNameTo(w, abiDetail) case TYPE_SHIFT: v := int(a.Offset) @@ -335,6 +352,11 @@ func WriteDconv(w io.Writer, p *Prog, a *Addr) { } func (a *Addr) WriteNameTo(w io.Writer) { + a.writeNameTo(w, false) +} + +func (a *Addr) writeNameTo(w io.Writer, abiDetail bool) { + switch a.Name { default: fmt.Fprintf(w, "name=%d", a.Name) @@ -356,7 +378,7 @@ func (a *Addr) WriteNameTo(w io.Writer) { reg = Rconv(int(a.Reg)) } if a.Sym != nil { - fmt.Fprintf(w, "%s%s(%s)", a.Sym.Name, offConv(a.Offset), reg) + fmt.Fprintf(w, "%s%s%s(%s)", a.Sym.Name, abiDecorate(a, abiDetail), offConv(a.Offset), reg) } else { fmt.Fprintf(w, "%s(%s)", offConv(a.Offset), reg) } @@ -596,3 +618,10 @@ func Bool2int(b bool) int { } return i } + +func abiDecorate(a *Addr, abiDetail bool) string { + if !abiDetail || a.Sym == nil { + return "" + } + return fmt.Sprintf("<%s>", a.Sym.ABI()) +} diff --git a/src/cmd/internal/objabi/path.go b/src/cmd/internal/objabi/path.go index 2a42179a36..fd1c9981c6 100644 --- a/src/cmd/internal/objabi/path.go +++ b/src/cmd/internal/objabi/path.go @@ -39,3 +39,25 @@ func PathToPrefix(s string) string { return string(p) } + +// IsRuntimePackagePath examines 'pkgpath' and returns TRUE if it +// belongs to the collection of "runtime-related" packages, including +// "runtime" itself, "reflect", "syscall", and the +// "runtime/internal/*" packages. The compiler and/or assembler in +// some cases need to be aware of when they are building such a +// package, for example to enable features such as ABI selectors in +// assembly sources. +func IsRuntimePackagePath(pkgpath string) bool { + rval := false + switch pkgpath { + case "runtime": + rval = true + case "reflect": + rval = true + case "syscall": + rval = true + default: + rval = strings.HasPrefix(pkgpath, "runtime/internal") + } + return rval +} -- cgit v1.3 From c9c64886ef041b096d7f93c4e7d2ef5faf87ad43 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 19 Oct 2020 10:40:24 -0400 Subject: cmd/internal/obj: reject too large symbols We never supported symbol larger than 2GB (issue #9862), so the object file uses 32-bit for symbol sizes. Check and reject too large symbol before truncating its size. Fixes #42054. Change-Id: I0d1d585ebdba9556f2fd3a97043bd4296d5cc9e4 Reviewed-on: https://go-review.googlesource.com/c/go/+/263641 Trust: Cherry Zhang Trust: Cuong Manh Le Run-TryBot: Cherry Zhang Reviewed-by: Cuong Manh Le TryBot-Result: Go Bot --- src/cmd/internal/obj/objfile.go | 7 +++++++ src/cmd/internal/obj/objfile_test.go | 36 ++++++++++++++++++++++++++++++++++++ test/fixedbugs/issue4348.go | 4 +++- 3 files changed, 46 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index a08de891d3..a24a7b878f 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -261,6 +261,10 @@ func (w *writer) StringTable() { } } +// cutoff is the maximum data section size permitted by the linker +// (see issue #9862). +const cutoff = int64(2e9) // 2 GB (or so; looks better in errors than 2^31) + func (w *writer) Sym(s *LSym) { abi := uint16(s.ABI()) if s.Static() { @@ -325,6 +329,9 @@ func (w *writer) Sym(s *LSym) { // don't bother setting align to 1. } } + if s.Size > cutoff { + w.ctxt.Diag("%s: symbol too large (%d bytes > %d bytes)", s.Name, s.Size, cutoff) + } var o goobj.Sym o.SetName(name, w.Writer) o.SetABI(abi) diff --git a/src/cmd/internal/obj/objfile_test.go b/src/cmd/internal/obj/objfile_test.go index 155701fa4e..146627b62b 100644 --- a/src/cmd/internal/obj/objfile_test.go +++ b/src/cmd/internal/obj/objfile_test.go @@ -5,9 +5,16 @@ package obj import ( + "bytes" "cmd/internal/goobj" "cmd/internal/sys" + "internal/testenv" + "io/ioutil" + "os" + "os/exec" + "path/filepath" "testing" + "unsafe" ) var dummyArch = LinkArch{Arch: sys.ArchAMD64} @@ -85,3 +92,32 @@ func TestContentHash(t *testing.T) { } } } + +func TestSymbolTooLarge(t *testing.T) { // Issue 42054 + testenv.MustHaveGoBuild(t) + if unsafe.Sizeof(uintptr(0)) < 8 { + t.Skip("skip on 32-bit architectures") + } + + tmpdir, err := ioutil.TempDir("", "TestSymbolTooLarge") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + src := filepath.Join(tmpdir, "p.go") + err = ioutil.WriteFile(src, []byte("package p; var x [1<<32]byte"), 0666) + if err != nil { + t.Fatalf("failed to write source file: %v\n", err) + } + obj := filepath.Join(tmpdir, "p.o") + cmd := exec.Command(testenv.GoToolPath(t), "tool", "compile", "-o", obj, src) + out, err := cmd.CombinedOutput() + if err == nil { + t.Fatalf("did not fail\noutput: %s", out) + } + const want = "symbol too large" + if !bytes.Contains(out, []byte(want)) { + t.Errorf("unexpected error message: want: %q, got: %s", want, out) + } +} diff --git a/test/fixedbugs/issue4348.go b/test/fixedbugs/issue4348.go index c59b6b8caa..8b1a56c1d5 100644 --- a/test/fixedbugs/issue4348.go +++ b/test/fixedbugs/issue4348.go @@ -1,4 +1,4 @@ -// compile +// skip // Copyright 2012 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style @@ -7,6 +7,8 @@ // Issue 4348. After switch to 64-bit ints the compiler generates // illegal instructions when using large array bounds or indexes. +// Skip. We reject symbols larger that 2GB (Issue #9862). + package main // 1<<32 on a 64-bit machine, 1 otherwise. -- cgit v1.3 From 6f45b39e4dbabf0b179a60ffacf434e55b2d5eab Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 19 May 2020 18:55:31 +1000 Subject: cmd/compile,cmd/internal/obj/riscv: move g register on riscv64 The original riscv64 port used the thread pointer (TP aka X4) register for the g pointer, however this register is also used when TLS support is required, resulting in a conflict (for example, when a signal is received we have no way of readily knowing if X4 contains a pointer to the TCB or a pointer to a g). In order to support cgo, free up the X4 register by moving g to X27. This unfortunately means that the X4 register is unused in non-cgo mode, however the alternative is to not support cgo on this platform. Update #36641 Change-Id: Idcaf3e8ccbe42972a1b8943aeefde7149d9c960a Reviewed-on: https://go-review.googlesource.com/c/go/+/263477 Trust: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 7 +- src/cmd/compile/internal/ssa/opGen.go | 406 ++++++++++++------------- src/cmd/internal/obj/riscv/cpu.go | 12 +- src/runtime/asm_riscv64.s | 12 +- 4 files changed, 219 insertions(+), 218 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index b06b86075e..fb944f3132 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -24,10 +24,11 @@ import ( // L = 64 bit int, used when the opcode starts with F const ( - riscv64REG_G = 4 + riscv64REG_G = 27 riscv64REG_CTXT = 20 riscv64REG_LR = 1 riscv64REG_SP = 2 + riscv64REG_TP = 4 riscv64REG_TMP = 31 riscv64REG_ZERO = 0 ) @@ -78,8 +79,8 @@ func init() { // Add general purpose registers to gpMask. switch r { - // ZERO, and TMP are not in any gp mask. - case riscv64REG_ZERO, riscv64REG_TMP: + // ZERO, TP and TMP are not in any gp mask. + case riscv64REG_ZERO, riscv64REG_TP, riscv64REG_TMP: case riscv64REG_G: gpgMask |= mask gpspsbgMask |= mask diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index eae30c79ba..e9c63cdddf 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -27039,11 +27039,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AADD, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27054,10 +27054,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AADDI, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27068,10 +27068,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AADDIW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27081,10 +27081,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ANEG, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27094,10 +27094,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ANEGW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27107,11 +27107,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASUB, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27121,11 +27121,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASUBW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27136,11 +27136,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMUL, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27151,11 +27151,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMULW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27166,11 +27166,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMULH, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27181,11 +27181,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMULHU, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27195,11 +27195,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ADIV, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27209,11 +27209,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ADIVU, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27223,11 +27223,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ADIVW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27237,11 +27237,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ADIVUW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27251,11 +27251,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AREM, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27265,11 +27265,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AREMU, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27279,11 +27279,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AREMW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27293,11 +27293,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AREMUW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27310,10 +27310,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27325,7 +27325,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27337,7 +27337,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27349,7 +27349,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27361,7 +27361,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27374,10 +27374,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVB, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27390,10 +27390,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVH, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27406,10 +27406,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVW, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27422,10 +27422,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27438,10 +27438,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVBU, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27454,10 +27454,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVHU, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27470,10 +27470,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVWU, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27486,8 +27486,8 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVB, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27500,8 +27500,8 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVH, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27514,8 +27514,8 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVW, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27528,8 +27528,8 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27542,7 +27542,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVB, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27555,7 +27555,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVH, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27568,7 +27568,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVW, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27581,7 +27581,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27591,11 +27591,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASLL, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27605,11 +27605,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASRA, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27619,11 +27619,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASRL, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27634,10 +27634,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASLLI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27648,10 +27648,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASRAI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27662,10 +27662,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASRLI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27676,11 +27676,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AXOR, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27691,10 +27691,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AXORI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27705,11 +27705,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AOR, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27720,10 +27720,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AORI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27734,11 +27734,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.AAND, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27749,10 +27749,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AANDI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27762,10 +27762,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ANOT, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27775,10 +27775,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASEQZ, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27788,10 +27788,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASNEZ, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27801,11 +27801,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASLT, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27816,10 +27816,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASLTI, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27829,11 +27829,11 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASLTU, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27844,10 +27844,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.ASLTIU, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27857,10 +27857,10 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOV, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27870,7 +27870,7 @@ var opcodeTable = [...]opInfo{ argLen: 1, call: true, reg: regInfo{ - clobbers: 9223372035781033980, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + clobbers: 9223372035781033972, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, }, { @@ -27881,9 +27881,9 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {1, 524288}, // X20 - {0, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, - clobbers: 9223372035781033980, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + clobbers: 9223372035781033972, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, }, { @@ -27893,9 +27893,9 @@ var opcodeTable = [...]opInfo{ call: true, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, - clobbers: 9223372035781033980, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + clobbers: 9223372035781033972, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, }, { @@ -27906,7 +27906,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {0, 16}, // X5 - {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, clobbers: 16, // X5 }, @@ -27921,7 +27921,7 @@ var opcodeTable = [...]opInfo{ inputs: []inputInfo{ {0, 16}, // X5 {1, 32}, // X6 - {2, 1073741748}, // X3 X5 X6 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {2, 1006632884}, // X3 X5 X6 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, clobbers: 112, // X5 X6 X7 }, @@ -27932,10 +27932,10 @@ var opcodeTable = [...]opInfo{ faultOnNilArg0: true, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27945,10 +27945,10 @@ var opcodeTable = [...]opInfo{ faultOnNilArg0: true, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27958,10 +27958,10 @@ var opcodeTable = [...]opInfo{ faultOnNilArg0: true, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -27972,8 +27972,8 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27984,8 +27984,8 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -27996,8 +27996,8 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, }, }, @@ -28009,11 +28009,11 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517630}, // SP X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28025,11 +28025,11 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517630}, // SP X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28042,11 +28042,11 @@ var opcodeTable = [...]opInfo{ unsafePoint: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517630}, // SP X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28059,11 +28059,11 @@ var opcodeTable = [...]opInfo{ unsafePoint: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517630}, // SP X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28076,12 +28076,12 @@ var opcodeTable = [...]opInfo{ unsafePoint: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {2, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517630}, // SP X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {2, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28094,12 +28094,12 @@ var opcodeTable = [...]opInfo{ unsafePoint: true, reg: regInfo{ inputs: []inputInfo{ - {1, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {2, 1073741820}, // X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 - {0, 9223372037928517630}, // SP X3 g X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {1, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {2, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 + {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 SB }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28110,7 +28110,7 @@ var opcodeTable = [...]opInfo{ faultOnNilArg0: true, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741814}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632950}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28129,7 +28129,7 @@ var opcodeTable = [...]opInfo{ rematerializeable: true, reg: regInfo{ outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28139,7 +28139,7 @@ var opcodeTable = [...]opInfo{ rematerializeable: true, reg: regInfo{ outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28283,7 +28283,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AFMVSX, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28296,7 +28296,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AFCVTSW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28309,7 +28309,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AFCVTSL, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28325,7 +28325,7 @@ var opcodeTable = [...]opInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28338,7 +28338,7 @@ var opcodeTable = [...]opInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28351,7 +28351,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVF, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28367,7 +28367,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVF, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, }, @@ -28383,7 +28383,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28398,7 +28398,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28412,7 +28412,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28426,7 +28426,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28520,7 +28520,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AFMVDX, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28533,7 +28533,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AFCVTDW, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28546,7 +28546,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AFCVTDL, reg: regInfo{ inputs: []inputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28562,7 +28562,7 @@ var opcodeTable = [...]opInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28575,7 +28575,7 @@ var opcodeTable = [...]opInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28614,7 +28614,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVD, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB }, outputs: []outputInfo{ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 @@ -28630,7 +28630,7 @@ var opcodeTable = [...]opInfo{ asm: riscv.AMOVD, reg: regInfo{ inputs: []inputInfo{ - {0, 9223372037928517622}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 SB + {0, 9223372037861408758}, // SP X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 SB {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, }, @@ -28646,7 +28646,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28661,7 +28661,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28675,7 +28675,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -28689,7 +28689,7 @@ var opcodeTable = [...]opInfo{ {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, outputs: []outputInfo{ - {0, 1073741812}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 + {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 }, }, }, @@ -35931,7 +35931,7 @@ var registersRISCV64 = [...]Register{ {0, riscv.REG_X0, -1, "X0"}, {1, riscv.REGSP, -1, "SP"}, {2, riscv.REG_X3, 0, "X3"}, - {3, riscv.REGG, -1, "g"}, + {3, riscv.REG_X4, -1, "X4"}, {4, riscv.REG_X5, 1, "X5"}, {5, riscv.REG_X6, 2, "X6"}, {6, riscv.REG_X7, 3, "X7"}, @@ -35954,10 +35954,10 @@ var registersRISCV64 = [...]Register{ {23, riscv.REG_X24, 20, "X24"}, {24, riscv.REG_X25, 21, "X25"}, {25, riscv.REG_X26, 22, "X26"}, - {26, riscv.REG_X27, 23, "X27"}, - {27, riscv.REG_X28, 24, "X28"}, - {28, riscv.REG_X29, 25, "X29"}, - {29, riscv.REG_X30, 26, "X30"}, + {26, riscv.REGG, -1, "g"}, + {27, riscv.REG_X28, 23, "X28"}, + {28, riscv.REG_X29, 24, "X29"}, + {29, riscv.REG_X30, 25, "X30"}, {30, riscv.REG_X31, -1, "X31"}, {31, riscv.REG_F0, -1, "F0"}, {32, riscv.REG_F1, -1, "F1"}, @@ -35993,7 +35993,7 @@ var registersRISCV64 = [...]Register{ {62, riscv.REG_F31, -1, "F31"}, {63, 0, -1, "SB"}, } -var gpRegMaskRISCV64 = regMask(1073741812) +var gpRegMaskRISCV64 = regMask(1006632948) var fpRegMaskRISCV64 = regMask(9223372034707292160) var specialRegMaskRISCV64 = regMask(0) var framepointerRegRISCV64 = int8(-1) diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go index 482f9e0b6d..b1324b62a0 100644 --- a/src/cmd/internal/obj/riscv/cpu.go +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -109,7 +109,7 @@ const ( REG_RA = REG_X1 // aka REG_LR REG_SP = REG_X2 REG_GP = REG_X3 // aka REG_SB - REG_TP = REG_X4 // aka REG_G + REG_TP = REG_X4 REG_T0 = REG_X5 REG_T1 = REG_X6 REG_T2 = REG_X7 @@ -132,17 +132,17 @@ const ( REG_S8 = REG_X24 REG_S9 = REG_X25 REG_S10 = REG_X26 - REG_S11 = REG_X27 + REG_S11 = REG_X27 // aka REG_G REG_T3 = REG_X28 REG_T4 = REG_X29 REG_T5 = REG_X30 REG_T6 = REG_X31 // aka REG_TMP // Go runtime register names. - REG_G = REG_TP // G pointer. - REG_CTXT = REG_S4 // Context for closures. - REG_LR = REG_RA // Link register. - REG_TMP = REG_T6 // Reserved for assembler use. + REG_G = REG_S11 // G pointer. + REG_CTXT = REG_S4 // Context for closures. + REG_LR = REG_RA // Link register. + REG_TMP = REG_T6 // Reserved for assembler use. // ABI names for floating point registers. REG_FT0 = REG_F0 diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index 4084ced7f8..a136085084 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -519,12 +519,12 @@ flush: MOV T1, 16(X2) // Also second argument to wbBufFlush // TODO: Optimise - // R3 is g. - // R4 already saved (T0) - // R5 already saved (T1) - // R9 already saved (A0) - // R10 already saved (A1) - // R30 is tmp register. + // X5 already saved (T0) + // X6 already saved (T1) + // X10 already saved (A0) + // X11 already saved (A1) + // X27 is g. + // X31 is tmp register. MOV X0, 24(X2) MOV X1, 32(X2) MOV X2, 40(X2) -- cgit v1.3 From 2ad44158af373f68c1aef528738a5baade77d316 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Thu, 22 Oct 2020 10:05:44 -0400 Subject: cmd/internal/obj: use correct symbol size for Hashed64 classification Use sym.Size, instead of len(sym.P), to decide whether a content-addressable symbol is "short" and hashed as Hashed64. So we don't dedup a small symbol with a gigantic almost-zero symbol. Fixes #42140. Change-Id: Ic65869e1eaf51947517b3ece49c8b0be1b94bb75 Reviewed-on: https://go-review.googlesource.com/c/go/+/264337 Trust: Cherry Zhang Trust: Cuong Manh Le Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Cuong Manh Le Reviewed-by: Than McIntosh --- src/cmd/internal/obj/sym.go | 2 +- src/cmd/link/link_test.go | 53 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index 0182773f8e..4515bdd0d3 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -205,7 +205,7 @@ func (ctxt *Link) NumberSyms() { // if Pkgpath is unknown, cannot hash symbols with relocations, as it // may reference named symbols whose names are not fully expanded. if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) { - if len(s.P) <= 8 && len(s.R) == 0 && !strings.HasPrefix(s.Name, "type.") { + if s.Size <= 8 && len(s.R) == 0 && !strings.HasPrefix(s.Name, "type.") { // We can use short hash only for symbols without relocations. // Don't use short hash for type symbols, as they need special handling. s.PkgIdx = goobj.PkgIdxHashed64 diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go index 968da4837d..204410e976 100644 --- a/src/cmd/link/link_test.go +++ b/src/cmd/link/link_test.go @@ -820,3 +820,56 @@ func TestReadOnly(t *testing.T) { t.Errorf("running test program did not fail. output:\n%s", out) } } + +const testIssue38554Src = ` +package main + +type T [10<<20]byte + +//go:noinline +func f() T { + return T{} // compiler will make a large stmp symbol, but not used. +} + +func main() { + x := f() + println(x[1]) +} +` + +func TestIssue38554(t *testing.T) { + testenv.MustHaveGoBuild(t) + + t.Parallel() + + tmpdir, err := ioutil.TempDir("", "TestIssue38554") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + src := filepath.Join(tmpdir, "x.go") + err = ioutil.WriteFile(src, []byte(testIssue38554Src), 0666) + if err != nil { + t.Fatalf("failed to write source file: %v", err) + } + exe := filepath.Join(tmpdir, "x.exe") + cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", exe, src) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("build failed: %v\n%s", err, out) + } + + fi, err := os.Stat(exe) + if err != nil { + t.Fatalf("failed to stat output file: %v", err) + } + + // The test program is not much different from a helloworld, which is + // typically a little over 1 MB. We allow 5 MB. If the bad stmp is live, + // it will be over 10 MB. + const want = 5 << 20 + if got := fi.Size(); got > want { + t.Errorf("binary too big: got %d, want < %d", got, want) + } +} -- cgit v1.3 From 8bde9b320e25b2d6edf96fa5e694046fea0c04c8 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 19 Jul 2020 00:32:02 -0400 Subject: cmd/compile: add //go:embed support This commit contains the compiler support for //go:embed lines. The go command passes to the compiler an "embed config" that maps literal patterns like *.txt to the set of files to embed. The compiler then lays out the content of those files as static data in the form of an embed.Files or string or []byte in the final object file. The test for this code is the end-to-end test hooking up the embed, cmd/compile, and cmd/go changes, in the next CL. For #41191. Change-Id: I916e57f8cc65871dc0044c13d3f90c252a3fe1bf Reviewed-on: https://go-review.googlesource.com/c/go/+/243944 Trust: Russ Cox Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/gc/embed.go | 273 +++++++++++++++++++++++++++++++ src/cmd/compile/internal/gc/main.go | 8 +- src/cmd/compile/internal/gc/noder.go | 145 ++++++++++++++-- src/cmd/compile/internal/gc/obj.go | 132 +++++++++++++-- src/cmd/compile/internal/gc/syntax.go | 86 +++++++++- src/cmd/compile/internal/gc/typecheck.go | 8 +- src/cmd/internal/obj/link.go | 29 +++- src/cmd/internal/obj/objfile.go | 49 +++++- 8 files changed, 682 insertions(+), 48 deletions(-) create mode 100644 src/cmd/compile/internal/gc/embed.go (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/gc/embed.go b/src/cmd/compile/internal/gc/embed.go new file mode 100644 index 0000000000..103949c1f9 --- /dev/null +++ b/src/cmd/compile/internal/gc/embed.go @@ -0,0 +1,273 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gc + +import ( + "cmd/compile/internal/syntax" + "cmd/compile/internal/types" + "cmd/internal/obj" + "encoding/json" + "io/ioutil" + "log" + "path" + "sort" + "strconv" + "strings" +) + +var embedlist []*Node + +var embedCfg struct { + Patterns map[string][]string + Files map[string]string +} + +func readEmbedCfg(file string) { + data, err := ioutil.ReadFile(file) + if err != nil { + log.Fatalf("-embedcfg: %v", err) + } + if err := json.Unmarshal(data, &embedCfg); err != nil { + log.Fatalf("%s: %v", file, err) + } + if embedCfg.Patterns == nil { + log.Fatalf("%s: invalid embedcfg: missing Patterns", file) + } + if embedCfg.Files == nil { + log.Fatalf("%s: invalid embedcfg: missing Files", file) + } +} + +const ( + embedUnknown = iota + embedBytes + embedString + embedFiles +) + +var numLocalEmbed int + +func varEmbed(p *noder, names []*Node, typ *Node, exprs []*Node, embeds []PragmaEmbed) (newExprs []*Node) { + haveEmbed := false + for _, decl := range p.file.DeclList { + imp, ok := decl.(*syntax.ImportDecl) + if !ok { + // imports always come first + break + } + path, _ := strconv.Unquote(imp.Path.Value) + if path == "embed" { + haveEmbed = true + break + } + } + + pos := embeds[0].Pos + if !haveEmbed { + p.yyerrorpos(pos, "invalid go:embed: missing import \"embed\"") + return exprs + } + if embedCfg.Patterns == nil { + p.yyerrorpos(pos, "invalid go:embed: build system did not supply embed configuration") + return exprs + } + if len(names) > 1 { + p.yyerrorpos(pos, "go:embed cannot apply to multiple vars") + return exprs + } + if len(exprs) > 0 { + p.yyerrorpos(pos, "go:embed cannot apply to var with initializer") + return exprs + } + if typ == nil { + // Should not happen, since len(exprs) == 0 now. + p.yyerrorpos(pos, "go:embed cannot apply to var without type") + return exprs + } + + kind := embedKindApprox(typ) + if kind == embedUnknown { + p.yyerrorpos(pos, "go:embed cannot apply to var of type %v", typ) + return exprs + } + + // Build list of files to store. + have := make(map[string]bool) + var list []string + for _, e := range embeds { + for _, pattern := range e.Patterns { + files, ok := embedCfg.Patterns[pattern] + if !ok { + p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map pattern: %s", pattern) + } + for _, file := range files { + if embedCfg.Files[file] == "" { + p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map file: %s", file) + continue + } + if !have[file] { + have[file] = true + list = append(list, file) + } + if kind == embedFiles { + for dir := path.Dir(file); dir != "." && !have[dir]; dir = path.Dir(dir) { + have[dir] = true + list = append(list, dir+"/") + } + } + } + } + } + sort.Slice(list, func(i, j int) bool { + return embedFileLess(list[i], list[j]) + }) + + if kind == embedString || kind == embedBytes { + if len(list) > 1 { + p.yyerrorpos(pos, "invalid go:embed: multiple files for type %v", typ) + return exprs + } + } + + v := names[0] + if dclcontext != PEXTERN { + numLocalEmbed++ + v = newnamel(v.Pos, lookupN("embed.", numLocalEmbed)) + v.Sym.Def = asTypesNode(v) + v.Name.Param.Ntype = typ + v.SetClass(PEXTERN) + externdcl = append(externdcl, v) + exprs = []*Node{v} + } + + v.Name.Param.SetEmbedFiles(list) + embedlist = append(embedlist, v) + return exprs +} + +// embedKindApprox determines the kind of embedding variable, approximately. +// The match is approximate because we haven't done scope resolution yet and +// can't tell whether "string" and "byte" really mean "string" and "byte". +// The result must be confirmed later, after type checking, using embedKind. +func embedKindApprox(typ *Node) int { + if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) { + return embedFiles + } + // These are not guaranteed to match only string and []byte - + // maybe the local package has redefined one of those words. + // But it's the best we can do now during the noder. + // The stricter check happens later, in initEmbed calling embedKind. + if typ.Sym != nil && typ.Sym.Name == "string" && typ.Sym.Pkg == localpkg { + return embedString + } + if typ.Op == OTARRAY && typ.Left == nil && typ.Right.Sym != nil && typ.Right.Sym.Name == "byte" && typ.Right.Sym.Pkg == localpkg { + return embedBytes + } + return embedUnknown +} + +// embedKind determines the kind of embedding variable. +func embedKind(typ *types.Type) int { + if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) { + return embedFiles + } + if typ == types.Types[TSTRING] { + return embedString + } + if typ.Sym == nil && typ.IsSlice() && typ.Elem() == types.Bytetype { + return embedBytes + } + return embedUnknown +} + +func embedFileNameSplit(name string) (dir, elem string, isDir bool) { + if name[len(name)-1] == '/' { + isDir = true + name = name[:len(name)-1] + } + i := len(name) - 1 + for i >= 0 && name[i] != '/' { + i-- + } + if i < 0 { + return ".", name, isDir + } + return name[:i], name[i+1:], isDir +} + +// embedFileLess implements the sort order for a list of embedded files. +// See the comment inside ../../../../embed/embed.go's Files struct for rationale. +func embedFileLess(x, y string) bool { + xdir, xelem, _ := embedFileNameSplit(x) + ydir, yelem, _ := embedFileNameSplit(y) + return xdir < ydir || xdir == ydir && xelem < yelem +} + +func dumpembeds() { + for _, v := range embedlist { + initEmbed(v) + } +} + +// initEmbed emits the init data for a //go:embed variable, +// which is either a string, a []byte, or an embed.FS. +func initEmbed(v *Node) { + files := v.Name.Param.EmbedFiles() + switch kind := embedKind(v.Type); kind { + case embedUnknown: + yyerrorl(v.Pos, "go:embed cannot apply to var of type %v", v.Type) + + case embedString, embedBytes: + file := files[0] + fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], kind == embedString, nil) + if err != nil { + yyerrorl(v.Pos, "embed %s: %v", file, err) + } + sym := v.Sym.Linksym() + off := 0 + off = dsymptr(sym, off, fsym, 0) // data string + off = duintptr(sym, off, uint64(size)) // len + if kind == embedBytes { + duintptr(sym, off, uint64(size)) // cap for slice + } + + case embedFiles: + slicedata := Ctxt.Lookup(`"".` + v.Sym.Name + `.files`) + off := 0 + // []files pointed at by Files + off = dsymptr(slicedata, off, slicedata, 3*Widthptr) // []file, pointing just past slice + off = duintptr(slicedata, off, uint64(len(files))) + off = duintptr(slicedata, off, uint64(len(files))) + + // embed/embed.go type file is: + // name string + // data string + // hash [16]byte + // Emit one of these per file in the set. + const hashSize = 16 + hash := make([]byte, hashSize) + for _, file := range files { + off = dsymptr(slicedata, off, stringsym(v.Pos, file), 0) // file string + off = duintptr(slicedata, off, uint64(len(file))) + if strings.HasSuffix(file, "/") { + // entry for directory - no data + off = duintptr(slicedata, off, 0) + off = duintptr(slicedata, off, 0) + off += hashSize + } else { + fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], true, hash) + if err != nil { + yyerrorl(v.Pos, "embed %s: %v", file, err) + } + off = dsymptr(slicedata, off, fsym, 0) // data string + off = duintptr(slicedata, off, uint64(size)) + off = int(slicedata.WriteBytes(Ctxt, int64(off), hash)) + } + } + ggloblsym(slicedata, int32(off), obj.RODATA|obj.LOCAL) + sym := v.Sym.Linksym() + dsymptr(sym, 0, slicedata, 0) + } +} diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 949755a0e2..4b401f2aa4 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -34,8 +34,6 @@ import ( "strings" ) -var imported_unsafe bool - var ( buildid string spectre string @@ -240,6 +238,7 @@ func Main(archInit func(*Arch)) { flag.BoolVar(&flagDWARF, "dwarf", !Wasm, "generate DWARF symbols") flag.BoolVar(&Ctxt.Flag_locationlists, "dwarflocationlists", true, "add location lists to DWARF in optimized mode") flag.IntVar(&genDwarfInline, "gendwarfinl", 2, "generate DWARF inline info records") + objabi.Flagfn1("embedcfg", "read go:embed configuration from `file`", readEmbedCfg) objabi.Flagfn1("importmap", "add `definition` of the form source=actual to import map", addImportMap) objabi.Flagfn1("importcfg", "read import configuration from `file`", readImportCfg) flag.StringVar(&flag_installsuffix, "installsuffix", "", "set pkg directory `suffix`") @@ -597,7 +596,7 @@ func Main(archInit func(*Arch)) { timings.Start("fe", "typecheck", "top1") for i := 0; i < len(xtop); i++ { n := xtop[i] - if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias) { + if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias()) { xtop[i] = typecheck(n, ctxStmt) } } @@ -609,7 +608,7 @@ func Main(archInit func(*Arch)) { timings.Start("fe", "typecheck", "top2") for i := 0; i < len(xtop); i++ { n := xtop[i] - if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias { + if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias() { xtop[i] = typecheck(n, ctxStmt) } } @@ -1177,7 +1176,6 @@ func importfile(f *Val) *types.Pkg { } if path_ == "unsafe" { - imported_unsafe = true return unsafepkg } diff --git a/src/cmd/compile/internal/gc/noder.go b/src/cmd/compile/internal/gc/noder.go index 85e710086a..67d24ef0bc 100644 --- a/src/cmd/compile/internal/gc/noder.go +++ b/src/cmd/compile/internal/gc/noder.go @@ -11,6 +11,7 @@ import ( "runtime" "strconv" "strings" + "unicode" "unicode/utf8" "cmd/compile/internal/syntax" @@ -90,7 +91,11 @@ func (p *noder) makeSrcPosBase(b0 *syntax.PosBase) *src.PosBase { } else { // line directive base p0 := b0.Pos() - p1 := src.MakePos(p.makeSrcPosBase(p0.Base()), p0.Line(), p0.Col()) + p0b := p0.Base() + if p0b == b0 { + panic("infinite recursion in makeSrcPosBase") + } + p1 := src.MakePos(p.makeSrcPosBase(p0b), p0.Line(), p0.Col()) b1 = src.NewLinePragmaBase(p1, fn, fileh(fn), b0.Line(), b0.Col()) } p.basemap[b0] = b1 @@ -130,11 +135,13 @@ type noder struct { base *src.PosBase } - file *syntax.File - linknames []linkname - pragcgobuf [][]string - err chan syntax.Error - scope ScopeID + file *syntax.File + linknames []linkname + pragcgobuf [][]string + err chan syntax.Error + scope ScopeID + importedUnsafe bool + importedEmbed bool // scopeVars is a stack tracking the number of variables declared in the // current function at the moment each open scope was opened. @@ -236,7 +243,8 @@ type linkname struct { func (p *noder) node() { types.Block = 1 - imported_unsafe = false + p.importedUnsafe = false + p.importedEmbed = false p.setlineno(p.file.PkgName) mkpackage(p.file.PkgName.Value) @@ -249,7 +257,7 @@ func (p *noder) node() { xtop = append(xtop, p.decls(p.file.DeclList)...) for _, n := range p.linknames { - if !imported_unsafe { + if !p.importedUnsafe { p.yyerrorpos(n.pos, "//go:linkname only allowed in Go files that import \"unsafe\"") continue } @@ -324,7 +332,6 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) { val := p.basicLit(imp.Path) ipkg := importfile(&val) - if ipkg == nil { if nerrors == 0 { Fatalf("phase error in import") @@ -332,6 +339,13 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) { return } + if ipkg == unsafepkg { + p.importedUnsafe = true + } + if ipkg.Path == "embed" { + p.importedEmbed = true + } + ipkg.Direct = true var my *types.Sym @@ -373,6 +387,20 @@ func (p *noder) varDecl(decl *syntax.VarDecl) []*Node { } if pragma, ok := decl.Pragma.(*Pragma); ok { + if len(pragma.Embeds) > 0 { + if !p.importedEmbed { + // This check can't be done when building the list pragma.Embeds + // because that list is created before the noder starts walking over the file, + // so at that point it hasn't seen the imports. + // We're left to check now, just before applying the //go:embed lines. + for _, e := range pragma.Embeds { + p.yyerrorpos(e.Pos, "//go:embed only allowed in Go files that import \"embed\"") + } + } else { + exprs = varEmbed(p, names, typ, exprs, pragma.Embeds) + } + pragma.Embeds = nil + } p.checkUnused(pragma) } @@ -455,17 +483,17 @@ func (p *noder) typeDecl(decl *syntax.TypeDecl) *Node { param := n.Name.Param param.Ntype = typ - param.Alias = decl.Alias + param.SetAlias(decl.Alias) if pragma, ok := decl.Pragma.(*Pragma); ok { if !decl.Alias { - param.Pragma = pragma.Flag & TypePragmas + param.SetPragma(pragma.Flag & TypePragmas) pragma.Flag &^= TypePragmas } p.checkUnused(pragma) } nod := p.nod(decl, ODCLTYPE, n, nil) - if param.Alias && !langSupported(1, 9, localpkg) { + if param.Alias() && !langSupported(1, 9, localpkg) { yyerrorl(nod.Pos, "type aliases only supported as of -lang=go1.9") } return nod @@ -1493,13 +1521,15 @@ var allowedStdPragmas = map[string]bool{ "go:cgo_import_dynamic": true, "go:cgo_ldflag": true, "go:cgo_dynamic_linker": true, + "go:embed": true, "go:generate": true, } // *Pragma is the value stored in a syntax.Pragma during parsing. type Pragma struct { - Flag PragmaFlag // collected bits - Pos []PragmaPos // position of each individual flag + Flag PragmaFlag // collected bits + Pos []PragmaPos // position of each individual flag + Embeds []PragmaEmbed } type PragmaPos struct { @@ -1507,12 +1537,22 @@ type PragmaPos struct { Pos syntax.Pos } +type PragmaEmbed struct { + Pos syntax.Pos + Patterns []string +} + func (p *noder) checkUnused(pragma *Pragma) { for _, pos := range pragma.Pos { if pos.Flag&pragma.Flag != 0 { p.yyerrorpos(pos.Pos, "misplaced compiler directive") } } + if len(pragma.Embeds) > 0 { + for _, e := range pragma.Embeds { + p.yyerrorpos(e.Pos, "misplaced go:embed directive") + } + } } func (p *noder) checkUnusedDuringParse(pragma *Pragma) { @@ -1521,6 +1561,11 @@ func (p *noder) checkUnusedDuringParse(pragma *Pragma) { p.error(syntax.Error{Pos: pos.Pos, Msg: "misplaced compiler directive"}) } } + if len(pragma.Embeds) > 0 { + for _, e := range pragma.Embeds { + p.error(syntax.Error{Pos: e.Pos, Msg: "misplaced go:embed directive"}) + } + } } // pragma is called concurrently if files are parsed concurrently. @@ -1565,6 +1610,17 @@ func (p *noder) pragma(pos syntax.Pos, blankLine bool, text string, old syntax.P } p.linknames = append(p.linknames, linkname{pos, f[1], target}) + case text == "go:embed", strings.HasPrefix(text, "go:embed "): + args, err := parseGoEmbed(text[len("go:embed"):]) + if err != nil { + p.error(syntax.Error{Pos: pos, Msg: err.Error()}) + } + if len(args) == 0 { + p.error(syntax.Error{Pos: pos, Msg: "usage: //go:embed pattern..."}) + break + } + pragma.Embeds = append(pragma.Embeds, PragmaEmbed{pos, args}) + case strings.HasPrefix(text, "go:cgo_import_dynamic "): // This is permitted for general use because Solaris // code relies on it in golang.org/x/sys/unix and others. @@ -1637,3 +1693,64 @@ func mkname(sym *types.Sym) *Node { } return n } + +// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns. +// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings. +// go/build/read.go also processes these strings and contains similar logic. +func parseGoEmbed(args string) ([]string, error) { + var list []string + for args = strings.TrimSpace(args); args != ""; args = strings.TrimSpace(args) { + var path string + Switch: + switch args[0] { + default: + i := len(args) + for j, c := range args { + if unicode.IsSpace(c) { + i = j + break + } + } + path = args[:i] + args = args[i:] + + case '`': + i := strings.Index(args[1:], "`") + if i < 0 { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) + } + path = args[1 : 1+i] + args = args[1+i+1:] + + case '"': + i := 1 + for ; i < len(args); i++ { + if args[i] == '\\' { + i++ + continue + } + if args[i] == '"' { + q, err := strconv.Unquote(args[:i+1]) + if err != nil { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1]) + } + path = q + args = args[i+1:] + break Switch + } + } + if i >= len(args) { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) + } + } + + if args != "" { + r, _ := utf8.DecodeRuneInString(args) + if !unicode.IsSpace(r) { + return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args) + } + } + list = append(list, path) + } + return list, nil +} diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index f6557e2d15..226eb45252 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -14,6 +14,8 @@ import ( "encoding/json" "fmt" "io" + "io/ioutil" + "os" "sort" "strconv" ) @@ -125,6 +127,7 @@ func dumpdata() { itabsLen := len(itabs) dumpimportstrings() dumpbasictypes() + dumpembeds() // Calls to dumpsignats can generate functions, // like method wrappers and hash and equality routines. @@ -358,28 +361,31 @@ func dbvec(s *obj.LSym, off int, bv bvec) int { return off } +const ( + stringSymPrefix = "go.string." + stringSymPattern = ".gostring.%d.%x" +) + +// stringsym returns a symbol containing the string s. +// The symbol contains the string data, not a string header. func stringsym(pos src.XPos, s string) (data *obj.LSym) { var symname string if len(s) > 100 { // Huge strings are hashed to avoid long names in object files. // Indulge in some paranoia by writing the length of s, too, // as protection against length extension attacks. + // Same pattern is known to fileStringSym below. h := sha256.New() io.WriteString(h, s) - symname = fmt.Sprintf(".gostring.%d.%x", len(s), h.Sum(nil)) + symname = fmt.Sprintf(stringSymPattern, len(s), h.Sum(nil)) } else { // Small strings get named directly by their contents. symname = strconv.Quote(s) } - const prefix = "go.string." - symdataname := prefix + symname - - symdata := Ctxt.Lookup(symdataname) - + symdata := Ctxt.Lookup(stringSymPrefix + symname) if !symdata.OnList() { - // string data - off := dsname(symdata, 0, s, pos, "string") + off := dstringdata(symdata, 0, s, pos, "string") ggloblsym(symdata, int32(off), obj.DUPOK|obj.RODATA|obj.LOCAL) symdata.Set(obj.AttrContentAddressable, true) } @@ -387,26 +393,122 @@ func stringsym(pos src.XPos, s string) (data *obj.LSym) { return symdata } -var slicebytes_gen int +// fileStringSym returns a symbol for the contents and the size of file. +// If readonly is true, the symbol shares storage with any literal string +// or other file with the same content and is placed in a read-only section. +// If readonly is false, the symbol is a read-write copy separate from any other, +// for use as the backing store of a []byte. +// The content hash of file is copied into hash. (If hash is nil, nothing is copied.) +// The returned symbol contains the data itself, not a string header. +func fileStringSym(pos src.XPos, file string, readonly bool, hash []byte) (*obj.LSym, int64, error) { + f, err := os.Open(file) + if err != nil { + return nil, 0, err + } + defer f.Close() + info, err := f.Stat() + if err != nil { + return nil, 0, err + } + if !info.Mode().IsRegular() { + return nil, 0, fmt.Errorf("not a regular file") + } + size := info.Size() + if size <= 1*1024 { + data, err := ioutil.ReadAll(f) + if err != nil { + return nil, 0, err + } + if int64(len(data)) != size { + return nil, 0, fmt.Errorf("file changed between reads") + } + var sym *obj.LSym + if readonly { + sym = stringsym(pos, string(data)) + } else { + sym = slicedata(pos, string(data)).Sym.Linksym() + } + if len(hash) > 0 { + sum := sha256.Sum256(data) + copy(hash, sum[:]) + } + return sym, size, nil + } + if size > 2e9 { + // ggloblsym takes an int32, + // and probably the rest of the toolchain + // can't handle such big symbols either. + // See golang.org/issue/9862. + return nil, 0, fmt.Errorf("file too large") + } -func slicebytes(nam *Node, s string) { - slicebytes_gen++ - symname := fmt.Sprintf(".gobytes.%d", slicebytes_gen) + // File is too big to read and keep in memory. + // Compute hash if needed for read-only content hashing or if the caller wants it. + var sum []byte + if readonly || len(hash) > 0 { + h := sha256.New() + n, err := io.Copy(h, f) + if err != nil { + return nil, 0, err + } + if n != size { + return nil, 0, fmt.Errorf("file changed between reads") + } + sum = h.Sum(nil) + copy(hash, sum) + } + + var symdata *obj.LSym + if readonly { + symname := fmt.Sprintf(stringSymPattern, size, sum) + symdata = Ctxt.Lookup(stringSymPrefix + symname) + if !symdata.OnList() { + info := symdata.NewFileInfo() + info.Name = file + info.Size = size + ggloblsym(symdata, int32(size), obj.DUPOK|obj.RODATA|obj.LOCAL) + // Note: AttrContentAddressable cannot be set here, + // because the content-addressable-handling code + // does not know about file symbols. + } + } else { + // Emit a zero-length data symbol + // and then fix up length and content to use file. + symdata = slicedata(pos, "").Sym.Linksym() + symdata.Size = size + symdata.Type = objabi.SNOPTRDATA + info := symdata.NewFileInfo() + info.Name = file + info.Size = size + } + + return symdata, size, nil +} + +var slicedataGen int + +func slicedata(pos src.XPos, s string) *Node { + slicedataGen++ + symname := fmt.Sprintf(".gobytes.%d", slicedataGen) sym := localpkg.Lookup(symname) symnode := newname(sym) sym.Def = asTypesNode(symnode) lsym := sym.Linksym() - off := dsname(lsym, 0, s, nam.Pos, "slice") + off := dstringdata(lsym, 0, s, pos, "slice") ggloblsym(lsym, int32(off), obj.NOPTR|obj.LOCAL) + return symnode +} + +func slicebytes(nam *Node, s string) { if nam.Op != ONAME { Fatalf("slicebytes %v", nam) } - slicesym(nam, symnode, int64(len(s))) + slicesym(nam, slicedata(nam.Pos, s), int64(len(s))) } -func dsname(s *obj.LSym, off int, t string, pos src.XPos, what string) int { +func dstringdata(s *obj.LSym, off int, t string, pos src.XPos, what string) int { // Objects that are too large will cause the data section to overflow right away, // causing a cryptic error message by the linker. Check for oversize objects here // and provide a useful error message instead. diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go index e3b4963977..83b5db834f 100644 --- a/src/cmd/compile/internal/gc/syntax.go +++ b/src/cmd/compile/internal/gc/syntax.go @@ -480,11 +480,87 @@ type Param struct { Innermost *Node Outer *Node - // OTYPE - // - // TODO: Should Func pragmas also be stored on the Name? - Pragma PragmaFlag - Alias bool // node is alias for Ntype (only used when type-checking ODCLTYPE) + // OTYPE & ONAME //go:embed info, + // sharing storage to reduce gc.Param size. + // Extra is nil, or else *Extra is a *paramType or an *embedFileList. + Extra *interface{} +} + +type paramType struct { + flag PragmaFlag + alias bool +} + +type embedFileList []string + +// Pragma returns the PragmaFlag for p, which must be for an OTYPE. +func (p *Param) Pragma() PragmaFlag { + if p.Extra == nil { + return 0 + } + return (*p.Extra).(*paramType).flag +} + +// SetPragma sets the PragmaFlag for p, which must be for an OTYPE. +func (p *Param) SetPragma(flag PragmaFlag) { + if p.Extra == nil { + if flag == 0 { + return + } + p.Extra = new(interface{}) + *p.Extra = ¶mType{flag: flag} + return + } + (*p.Extra).(*paramType).flag = flag +} + +// Alias reports whether p, which must be for an OTYPE, is a type alias. +func (p *Param) Alias() bool { + if p.Extra == nil { + return false + } + t, ok := (*p.Extra).(*paramType) + if !ok { + return false + } + return t.alias +} + +// SetAlias sets whether p, which must be for an OTYPE, is a type alias. +func (p *Param) SetAlias(alias bool) { + if p.Extra == nil { + if !alias { + return + } + p.Extra = new(interface{}) + *p.Extra = ¶mType{alias: alias} + return + } + (*p.Extra).(*paramType).alias = alias +} + +// EmbedFiles returns the list of embedded files for p, +// which must be for an ONAME var. +func (p *Param) EmbedFiles() []string { + if p.Extra == nil { + return nil + } + return *(*p.Extra).(*embedFileList) +} + +// SetEmbedFiles sets the list of embedded files for p, +// which must be for an ONAME var. +func (p *Param) SetEmbedFiles(list []string) { + if p.Extra == nil { + if len(list) == 0 { + return + } + f := embedFileList(list) + p.Extra = new(interface{}) + *p.Extra = &f + return + } + *(*p.Extra).(*embedFileList) = list } // Functions diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go index a4b462da1d..ce817db446 100644 --- a/src/cmd/compile/internal/gc/typecheck.go +++ b/src/cmd/compile/internal/gc/typecheck.go @@ -257,12 +257,12 @@ func typecheck(n *Node, top int) (res *Node) { // are substituted. cycle := cycleFor(n) for _, n1 := range cycle { - if n1.Name != nil && !n1.Name.Param.Alias { + if n1.Name != nil && !n1.Name.Param.Alias() { // Cycle is ok. But if n is an alias type and doesn't // have a type yet, we have a recursive type declaration // with aliases that we can't handle properly yet. // Report an error rather than crashing later. - if n.Name != nil && n.Name.Param.Alias && n.Type == nil { + if n.Name != nil && n.Name.Param.Alias() && n.Type == nil { lineno = n.Pos Fatalf("cannot handle alias type declaration (issue #25838): %v", n) } @@ -3504,7 +3504,7 @@ func setUnderlying(t, underlying *types.Type) { } // Propagate go:notinheap pragma from the Name to the Type. - if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma&NotInHeap != 0 { + if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma()&NotInHeap != 0 { t.SetNotInHeap(true) } @@ -3676,7 +3676,7 @@ func typecheckdef(n *Node) { n.Name.Defn = typecheck(n.Name.Defn, ctxStmt) // fills in n.Type case OTYPE: - if p := n.Name.Param; p.Alias { + if p := n.Name.Param; p.Alias() { // Type alias declaration: Simply use the rhs type - no need // to create a new type. // If we have a syntax error, p.Ntype may be nil. diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ad4708138f..2037beca72 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -401,7 +401,7 @@ type LSym struct { P []byte R []Reloc - Extra *interface{} // *FuncInfo if present + Extra *interface{} // *FuncInfo or *FileInfo, if present Pkg string PkgIdx int32 @@ -454,6 +454,33 @@ func (s *LSym) Func() *FuncInfo { return f } +// A FileInfo contains extra fields for SDATA symbols backed by files. +// (If LSym.Extra is a *FileInfo, LSym.P == nil.) +type FileInfo struct { + Name string // name of file to read into object file + Size int64 // length of file +} + +// NewFileInfo allocates and returns a FileInfo for LSym. +func (s *LSym) NewFileInfo() *FileInfo { + if s.Extra != nil { + log.Fatalf("invalid use of LSym - NewFileInfo with Extra of type %T", *s.Extra) + } + f := new(FileInfo) + s.Extra = new(interface{}) + *s.Extra = f + return f +} + +// File returns the *FileInfo associated with s, or else nil. +func (s *LSym) File() *FileInfo { + if s.Extra == nil { + return nil + } + f, _ := (*s.Extra).(*FileInfo) + return f +} + type InlMark struct { // When unwinding from an instruction in an inlined body, mark // where we should unwind to. diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go index a24a7b878f..bb58b4f0c2 100644 --- a/src/cmd/internal/obj/objfile.go +++ b/src/cmd/internal/obj/objfile.go @@ -16,6 +16,8 @@ import ( "encoding/binary" "fmt" "io" + "log" + "os" "path/filepath" "sort" "strings" @@ -147,14 +149,20 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { // Data indexes h.Offsets[goobj.BlkDataIdx] = w.Offset() - dataOff := uint32(0) + dataOff := int64(0) for _, list := range lists { for _, s := range list { - w.Uint32(dataOff) - dataOff += uint32(len(s.P)) + w.Uint32(uint32(dataOff)) + dataOff += int64(len(s.P)) + if file := s.File(); file != nil { + dataOff += int64(file.Size) + } } } - w.Uint32(dataOff) + if int64(uint32(dataOff)) != dataOff { + log.Fatalf("data too large") + } + w.Uint32(uint32(dataOff)) // Relocs h.Offsets[goobj.BlkReloc] = w.Offset() @@ -179,6 +187,9 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { for _, list := range lists { for _, s := range list { w.Bytes(s.P) + if file := s.File(); file != nil { + w.writeFile(ctxt, file) + } } } @@ -218,6 +229,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer) { type writer struct { *goobj.Writer + filebuf []byte ctxt *Link pkgpath string // the package import path (escaped), "" if unknown pkglist []string // list of packages referenced, indexed by ctxt.pkgIdx @@ -232,6 +244,35 @@ func (w *writer) init() { } } +func (w *writer) writeFile(ctxt *Link, file *FileInfo) { + f, err := os.Open(file.Name) + if err != nil { + ctxt.Diag("%v", err) + return + } + defer f.Close() + if w.filebuf == nil { + w.filebuf = make([]byte, 1024) + } + buf := w.filebuf + written := int64(0) + for { + n, err := f.Read(buf) + w.Bytes(buf[:n]) + written += int64(n) + if err == io.EOF { + break + } + if err != nil { + ctxt.Diag("%v", err) + return + } + } + if written != file.Size { + ctxt.Diag("copy %s: unexpected length %d != %d", file.Name, written, file.Size) + } +} + func (w *writer) StringTable() { w.AddString("") for _, p := range w.ctxt.Imports { -- cgit v1.3 From 4a6782562828779025d1423c9a04bc47ca9c3688 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sat, 24 Oct 2020 03:53:53 +1100 Subject: cmd/internal/obj/riscv: support additional register to register moves Add support for signed and unsigned register to register moves of various sizes. This makes it easier to handle zero and sign extension and will allow for further changes that improve the compiler optimisations for riscv64. While here, change the existing register to register moves from obj.Prog rewriting to instruction generation. Change-Id: Id21911019b76922367a134da13c3449a84a1fb08 Reviewed-on: https://go-review.googlesource.com/c/go/+/264657 Trust: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/riscvenc.s | 29 ++++++++------ src/cmd/internal/obj/riscv/obj.go | 56 ++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 26 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/riscvenc.s b/src/cmd/asm/internal/asm/testdata/riscvenc.s index 8d301f2dd5..e30a576473 100644 --- a/src/cmd/asm/internal/asm/testdata/riscvenc.s +++ b/src/cmd/asm/internal/asm/testdata/riscvenc.s @@ -297,6 +297,13 @@ start: MOVW X5, (X6) // 23205300 MOVW X5, 4(X6) // 23225300 + MOVB X5, X6 // 1393820313538343 + MOVH X5, X6 // 1393020313530343 + MOVW X5, X6 // 1b830200 + MOVBU X5, X6 // 13f3f20f + MOVHU X5, X6 // 1393020313530303 + MOVWU X5, X6 // 1393020213530302 + MOVF 4(X5), F0 // 07a04200 MOVF F0, 4(X5) // 27a20200 MOVF F0, F1 // d3000020 @@ -318,7 +325,7 @@ start: // These jumps can get printed as jumps to 2 because they go to the // second instruction in the function (the first instruction is an // invisible stack pointer adjustment). - JMP start // JMP 2 // 6ff01fc5 + JMP start // JMP 2 // 6ff09fc2 JMP (X5) // 67800200 JMP 4(X5) // 67804200 @@ -331,16 +338,16 @@ start: JMP asmtest(SB) // 970f0000 // Branch pseudo-instructions - BEQZ X5, start // BEQZ X5, 2 // e38a02c2 - BGEZ X5, start // BGEZ X5, 2 // e3d802c2 - BGT X5, X6, start // BGT X5, X6, 2 // e3c662c2 - BGTU X5, X6, start // BGTU X5, X6, 2 // e3e462c2 - BGTZ X5, start // BGTZ X5, 2 // e34250c2 - BLE X5, X6, start // BLE X5, X6, 2 // e3d062c2 - BLEU X5, X6, start // BLEU X5, X6, 2 // e3fe62c0 - BLEZ X5, start // BLEZ X5, 2 // e35c50c0 - BLTZ X5, start // BLTZ X5, 2 // e3ca02c0 - BNEZ X5, start // BNEZ X5, 2 // e39802c0 + BEQZ X5, start // BEQZ X5, 2 // e38602c0 + BGEZ X5, start // BGEZ X5, 2 // e3d402c0 + BGT X5, X6, start // BGT X5, X6, 2 // e3c262c0 + BGTU X5, X6, start // BGTU X5, X6, 2 // e3e062c0 + BGTZ X5, start // BGTZ X5, 2 // e34e50be + BLE X5, X6, start // BLE X5, X6, 2 // e3dc62be + BLEU X5, X6, start // BLEU X5, X6, 2 // e3fa62be + BLEZ X5, start // BLEZ X5, 2 // e35850be + BLTZ X5, start // BLTZ X5, 2 // e3c602be + BNEZ X5, start // BNEZ X5, 2 // e39402be // Set pseudo-instructions SEQZ X15, X15 // 93b71700 diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 045c2250b5..7bd3984e51 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -252,19 +252,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { switch p.To.Type { case obj.TYPE_REG: switch p.As { - case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb - p.As = AADDI - p.Reg = p.From.Reg - p.From = obj.Addr{Type: obj.TYPE_CONST} - - case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb - p.As = AFSGNJS - p.Reg = p.From.Reg - - case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb - p.As = AFSGNJD - p.Reg = p.From.Reg - + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: default: ctxt.Diag("unsupported register-register move at %v", p) } @@ -1805,6 +1793,44 @@ func instructionsForProg(p *obj.Prog) []*instruction { } ins.imm = p.To.Offset + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + // Handle register to register moves. + if p.From.Type != obj.TYPE_REG || p.To.Type != obj.TYPE_REG { + break + } + switch p.As { + case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255 + case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg) + case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg) + case AMOVB, AMOVH: + // Use SLLI/SRAI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVB { + ins.imm = 56 + } else if p.As == AMOVH { + ins.imm = 48 + } + ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + case AMOVHU, AMOVWU: + // Use SLLI/SRLI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVHU { + ins.imm = 48 + } else if p.As == AMOVWU { + ins.imm = 32 + } + ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + } + case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD: if p.From.Type != obj.TYPE_MEM { p.Ctxt.Diag("%v requires memory for source", p) @@ -1859,13 +1885,13 @@ func instructionsForProg(p *obj.Prog) []*instruction { } else { ins.as = AFEQD } - ins = &instruction{ + ins2 := &instruction{ as: AXORI, // [bit] xor 1 = not [bit] rd: ins.rd, rs1: ins.rd, imm: 1, } - inss = append(inss, ins) + inss = append(inss, ins2) case AFSQRTS, AFSQRTD: // These instructions expect a zero (i.e. float register 0) -- cgit v1.3 From 751c37fd629e0ebb18c39a219fab2b224d016b70 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 21 Oct 2020 21:29:43 -0400 Subject: cmd/internal/obj/{arm,s390x}: make return jump print nicer When a function with non-zero frame size makes a return jump (RET target), it assembles to, conceptually, MOV (SP), LR ADD $framesize, SP JMP target We did not clear some fields in the first instruction's Prog.To, causing it printed like (on ARM) MOVW.P 4(R13), (R14)(R14)(REG) Clear the fields to make it print nicer. Change-Id: I180901aeea41f1ff287d7c6034a6d69005927744 Reviewed-on: https://go-review.googlesource.com/c/go/+/264343 Trust: Cherry Zhang Reviewed-by: Joel Sing --- src/cmd/internal/obj/arm/obj5.go | 2 ++ src/cmd/internal/obj/s390x/objz.go | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/arm/obj5.go b/src/cmd/internal/obj/arm/obj5.go index f2bfb9679f..29d3a5867d 100644 --- a/src/cmd/internal/obj/arm/obj5.go +++ b/src/cmd/internal/obj/arm/obj5.go @@ -487,6 +487,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // If there are instructions following // this ARET, they come from a branch // with the same stackframe, so no spadj. + if p.To.Sym != nil { // retjmp p.To.Reg = REGLINK q2 = obj.Appendp(p, newprog) @@ -494,6 +495,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q2.To.Type = obj.TYPE_BRANCH q2.To.Sym = p.To.Sym p.To.Sym = nil + p.To.Name = obj.NAME_NONE p = q2 } diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 3af5425b36..970cf827d6 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -497,8 +497,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP p.From.Offset = 0 - p.To.Type = obj.TYPE_REG - p.To.Reg = REG_LR + p.To = obj.Addr{ + Type: obj.TYPE_REG, + Reg: REG_LR, + } q = p -- cgit v1.3 From 3a63d04d2edcbdb0937d775e852692279f73a2e2 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 19 May 2020 18:55:10 +1000 Subject: cmd/link,cmd/internal/obj/riscv: add TLS support for linux/riscv64 Add support for Thread Local Storage (TLS) for linux/riscv64 with external linking, using the initial-exec model. Update #36641 Change-Id: I3106ef9a29cde73215830b00deff43dbec1c76e0 Reviewed-on: https://go-review.googlesource.com/c/go/+/263478 Trust: Joel Sing Run-TryBot: Joel Sing TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/riscv/obj.go | 7 +++++++ src/cmd/internal/objabi/reloctype.go | 8 ++++++++ src/cmd/internal/objabi/reloctype_string.go | 18 ++++++++++-------- src/cmd/link/internal/riscv64/asm.go | 23 +++++++++++++++++++---- 4 files changed, 44 insertions(+), 12 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 7bd3984e51..da49f67138 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -1984,6 +1984,13 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("AUIPC needing PC-relative reloc missing symbol") break } + if addr.Sym.Type == objabi.STLSBSS { + if rt == objabi.R_RISCV_PCREL_ITYPE { + rt = objabi.R_RISCV_TLS_IE_ITYPE + } else if rt == objabi.R_RISCV_PCREL_STYPE { + rt = objabi.R_RISCV_TLS_IE_STYPE + } + } rel := obj.Addrel(cursym) rel.Off = int32(p.Pc) diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go index 9e2e4a150a..938954e07a 100644 --- a/src/cmd/internal/objabi/reloctype.go +++ b/src/cmd/internal/objabi/reloctype.go @@ -223,6 +223,14 @@ const ( // AUIPC + S-type instruction pair. R_RISCV_PCREL_STYPE + // R_RISCV_TLS_IE_ITYPE resolves a 32-bit TLS initial-exec TOC offset + // address using an AUIPC + I-type instruction pair. + R_RISCV_TLS_IE_ITYPE + + // R_RISCV_TLS_IE_STYPE resolves a 32-bit TLS initial-exec TOC offset + // address using an AUIPC + S-type instruction pair. + R_RISCV_TLS_IE_STYPE + // R_PCRELDBL relocates s390x 2-byte aligned PC-relative addresses. // TODO(mundaym): remove once variants can be serialized - see issue 14218. R_PCRELDBL diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go index 01df4cce62..693d9631f5 100644 --- a/src/cmd/internal/objabi/reloctype_string.go +++ b/src/cmd/internal/objabi/reloctype_string.go @@ -59,17 +59,19 @@ func _() { _ = x[R_ADDRPOWER_TOCREL_DS-49] _ = x[R_RISCV_PCREL_ITYPE-50] _ = x[R_RISCV_PCREL_STYPE-51] - _ = x[R_PCRELDBL-52] - _ = x[R_ADDRMIPSU-53] - _ = x[R_ADDRMIPSTLS-54] - _ = x[R_ADDRCUOFF-55] - _ = x[R_WASMIMPORT-56] - _ = x[R_XCOFFREF-57] + _ = x[R_RISCV_TLS_IE_ITYPE-52] + _ = x[R_RISCV_TLS_IE_STYPE-53] + _ = x[R_PCRELDBL-54] + _ = x[R_ADDRMIPSU-55] + _ = x[R_ADDRMIPSTLS-56] + _ = x[R_ADDRCUOFF-57] + _ = x[R_WASMIMPORT-58] + _ = x[R_XCOFFREF-59] } -const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_WEAKADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CALLRISCVR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" +const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_WEAKADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CALLRISCVR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" -var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 60, 66, 72, 81, 92, 101, 112, 122, 133, 140, 147, 155, 163, 171, 177, 183, 189, 199, 208, 218, 234, 245, 256, 266, 275, 288, 302, 316, 330, 346, 357, 370, 383, 397, 411, 426, 440, 454, 465, 479, 494, 511, 529, 550, 569, 588, 598, 609, 622, 633, 645, 655} +var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 60, 66, 72, 81, 92, 101, 112, 122, 133, 140, 147, 155, 163, 171, 177, 183, 189, 199, 208, 218, 234, 245, 256, 266, 275, 288, 302, 316, 330, 346, 357, 370, 383, 397, 411, 426, 440, 454, 465, 479, 494, 511, 529, 550, 569, 588, 608, 628, 638, 649, 662, 673, 685, 695} func (i RelocType) String() string { i -= 1 diff --git a/src/cmd/link/internal/riscv64/asm.go b/src/cmd/link/internal/riscv64/asm.go index 66c47c69f8..c18e0540d8 100644 --- a/src/cmd/link/internal/riscv64/asm.go +++ b/src/cmd/link/internal/riscv64/asm.go @@ -38,7 +38,8 @@ func genSymsLate(ctxt *ld.Link, ldr *loader.Loader) { relocs := ldr.Relocs(s) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) - if r.Type() != objabi.R_RISCV_PCREL_ITYPE && r.Type() != objabi.R_RISCV_PCREL_STYPE { + if r.Type() != objabi.R_RISCV_PCREL_ITYPE && r.Type() != objabi.R_RISCV_PCREL_STYPE && + r.Type() != objabi.R_RISCV_TLS_IE_ITYPE && r.Type() != objabi.R_RISCV_TLS_IE_STYPE { continue } if r.Off() == 0 && ldr.SymType(s) == sym.STEXT { @@ -100,7 +101,7 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, // TODO(jsing): Consider generating elf.R_RISCV_CALL instead of a // HI20/LO12_I pair. - case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE: + case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE: // Find the text symbol for the AUIPC instruction targeted // by this relocation. relocs := ldr.Relocs(s) @@ -126,6 +127,10 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, hiRel, loRel = elf.R_RISCV_PCREL_HI20, elf.R_RISCV_PCREL_LO12_I case objabi.R_RISCV_PCREL_STYPE: hiRel, loRel = elf.R_RISCV_PCREL_HI20, elf.R_RISCV_PCREL_LO12_S + case objabi.R_RISCV_TLS_IE_ITYPE: + hiRel, loRel = elf.R_RISCV_TLS_GOT_HI20, elf.R_RISCV_PCREL_LO12_I + case objabi.R_RISCV_TLS_IE_STYPE: + hiRel, loRel = elf.R_RISCV_TLS_GOT_HI20, elf.R_RISCV_PCREL_LO12_S } out.Write64(uint64(sectoff)) out.Write64(uint64(hiRel) | uint64(elfsym)<<32) @@ -156,7 +161,7 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade case objabi.R_CALLRISCV: return val, 0, true - case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE: + case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE: return val, 2, true } @@ -170,6 +175,16 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade // Nothing to do. return val, 0, true + case objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE: + // TLS relocations are not currently handled for internal linking. + // For now, TLS is only used when cgo is in use and cgo currently + // requires external linking. However, we need to accept these + // relocations so that code containing TLS variables will link, + // even when they're not being used. For now, replace these + // instructions with EBREAK to detect accidental use. + const ebreakIns = 0x00100073 + return ebreakIns<<32 | ebreakIns, 0, true + case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE: pc := ldr.SymValue(s) + int64(r.Off()) off := ldr.SymValue(rs) + r.Add() - pc @@ -222,7 +237,7 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) { switch r.Type() { - case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE: + case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE: return ld.ExtrelocViaOuterSym(ldr, r, s), true } return loader.ExtReloc{}, false -- cgit v1.3 From 150d2448e5a213cd679396371c0a147918dc2125 Mon Sep 17 00:00:00 2001 From: Michał Derkacz Date: Sun, 14 Jun 2020 00:06:24 +0200 Subject: cmd/compile,cmd/internal/obj/riscv,runtime: use Duff's devices on riscv64 Implement runtime.duffzero and runtime.duffcopy for riscv64. Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned zeroing/moving. Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c Reviewed-on: https://go-review.googlesource.com/c/go/+/237797 Run-TryBot: Cherry Zhang Reviewed-by: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/riscv64/ggen.go | 10 +- src/cmd/compile/internal/riscv64/ssa.go | 14 + src/cmd/compile/internal/ssa/gen/RISCV64.rules | 14 + src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 38 ++ src/cmd/compile/internal/ssa/opGen.go | 28 + src/cmd/compile/internal/ssa/rewriteRISCV64.go | 33 + src/cmd/internal/obj/riscv/obj.go | 8 +- src/runtime/duff_riscv64.s | 907 +++++++++++++++++++++++++ src/runtime/mkduff.go | 28 + 9 files changed, 1076 insertions(+), 4 deletions(-) create mode 100644 src/runtime/duff_riscv64.s (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/riscv64/ggen.go b/src/cmd/compile/internal/riscv64/ggen.go index be31fad441..f7c03fe7c2 100644 --- a/src/cmd/compile/internal/riscv64/ggen.go +++ b/src/cmd/compile/internal/riscv64/ggen.go @@ -25,7 +25,15 @@ func zeroRange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog { return p } - // TODO(jsing): Add a duff zero implementation for medium sized ranges. + if cnt <= int64(128*gc.Widthptr) { + p = pp.Appendpp(p, riscv.AADDI, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_A0, 0) + p.Reg = riscv.REG_SP + p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffzero + p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr)) + return p + } // Loop, zeroing pointer width bytes at a time. // ADD $(off), SP, T0 diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go index 064a1ca111..0beb5b4bd1 100644 --- a/src/cmd/compile/internal/riscv64/ssa.go +++ b/src/cmd/compile/internal/riscv64/ssa.go @@ -608,6 +608,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpRISCV64DUFFZERO: + p := s.Prog(obj.ADUFFZERO) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffzero + p.To.Offset = v.AuxInt + + case ssa.OpRISCV64DUFFCOPY: + p := s.Prog(obj.ADUFFCOPY) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffcopy + p.To.Offset = v.AuxInt + default: v.Fatalf("Unhandled op %v", v.Op) } diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules index 3bc2e8498a..325cbeb825 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules @@ -360,6 +360,13 @@ (Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem) (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem) +// Medium zeroing uses a Duff's device +// 8 and 128 are magic constants, see runtime/mkduff.go +(Zero [s] {t} ptr mem) + && s%8 == 0 && s >= 16 && s <= 8*128 + && t.Alignment()%8 == 0 && !config.noDuffDevice => + (DUFFZERO [8 * (128 - s/8)] ptr mem) + // Generic zeroing uses a loop (Zero [s] {t} ptr mem) => (LoweredZero [t.Alignment()] @@ -395,6 +402,13 @@ (Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) +// Medium move uses a Duff's device +// 16 and 128 are magic constants, see runtime/mkduff.go +(Move [s] {t} dst src mem) + && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 + && !config.noDuffDevice && logLargeCopy(v, s) => + (DUFFCOPY [16 * (128 - s/8)] dst src mem) + // Generic move uses a loop (Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) => (LoweredMove [t.Alignment()] diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index ebd515b7fc..f64319230b 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -240,6 +240,44 @@ func init() { {name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem + // duffzero + // arg0 = address of memory to zero (in X10, changed as side effect) + // arg1 = mem + // auxint = offset into duffzero code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFZERO", + aux: "Int64", + argLength: 2, + reg: regInfo{ + inputs: []regMask{regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"], + }, + typ: "Mem", + faultOnNilArg0: true, + }, + + // duffcopy + // arg0 = address of dst memory (in X11, changed as side effect) + // arg1 = address of src memory (in X10, changed as side effect) + // arg2 = mem + // auxint = offset into duffcopy code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFCOPY", + aux: "Int64", + argLength: 3, + reg: regInfo{ + inputs: []regMask{regNamed["X11"], regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"], + }, + typ: "Mem", + faultOnNilArg0: true, + faultOnNilArg1: true, + }, + // Generic moves and zeros // general unaligned zeroing diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index bb1cbc0baa..96aa3adedd 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2111,6 +2111,8 @@ const ( OpRISCV64CALLstatic OpRISCV64CALLclosure OpRISCV64CALLinter + OpRISCV64DUFFZERO + OpRISCV64DUFFCOPY OpRISCV64LoweredZero OpRISCV64LoweredMove OpRISCV64LoweredAtomicLoad8 @@ -28162,6 +28164,32 @@ var opcodeTable = [...]opInfo{ clobbers: 9223372035781033972, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 }, }, + { + name: "DUFFZERO", + auxType: auxInt64, + argLen: 2, + faultOnNilArg0: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 512}, // X10 + }, + clobbers: 512, // X10 + }, + }, + { + name: "DUFFCOPY", + auxType: auxInt64, + argLen: 3, + faultOnNilArg0: true, + faultOnNilArg1: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1024}, // X11 + {1, 512}, // X10 + }, + clobbers: 1536, // X10 X11 + }, + }, { name: "LoweredZero", auxType: auxInt64, diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index ac92945753..c337ffbfe7 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -2017,6 +2017,23 @@ func rewriteValueRISCV64_OpMove(v *Value) bool { return true } // match: (Move [s] {t} dst src mem) + // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s) + // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem) + for { + s := auxIntToInt64(v.AuxInt) + t := auxToType(v.Aux) + dst := v_0 + src := v_1 + mem := v_2 + if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) { + break + } + v.reset(OpRISCV64DUFFCOPY) + v.AuxInt = int64ToAuxInt(16 * (128 - s/8)) + v.AddArg3(dst, src, mem) + return true + } + // match: (Move [s] {t} dst src mem) // cond: (s <= 16 || logLargeCopy(v, s)) // result: (LoweredMove [t.Alignment()] dst src (ADDI [s-moveSize(t.Alignment(), config)] src) mem) for { @@ -5650,6 +5667,22 @@ func rewriteValueRISCV64_OpZero(v *Value) bool { return true } // match: (Zero [s] {t} ptr mem) + // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice + // result: (DUFFZERO [8 * (128 - s/8)] ptr mem) + for { + s := auxIntToInt64(v.AuxInt) + t := auxToType(v.Aux) + ptr := v_0 + mem := v_1 + if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) { + break + } + v.reset(OpRISCV64DUFFZERO) + v.AuxInt = int64ToAuxInt(8 * (128 - s/8)) + v.AddArg2(ptr, mem) + return true + } + // match: (Zero [s] {t} ptr mem) // result: (LoweredZero [t.Alignment()] ptr (ADD ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem) for { s := auxIntToInt64(v.AuxInt) diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index da49f67138..5301e44002 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -33,7 +33,7 @@ func buildop(ctxt *obj.Link) {} // lr is the link register to use for the JALR. // p must be a CALL, JMP or RET. func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog { - if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET { + if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY { ctxt.Diag("unexpected Prog in jalrToSym: %v", p) return p } @@ -417,7 +417,7 @@ func containsCall(sym *obj.LSym) bool { // CALLs are CALL or JAL(R) with link register LR. for p := sym.Func().Text; p != nil; p = p.Link { switch p.As { - case obj.ACALL: + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: return true case AJAL, AJALR: if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR { @@ -656,7 +656,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Reg = REG_SP } - case obj.ACALL: + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: switch p.To.Type { case obj.TYPE_MEM: jalrToSym(ctxt, p, newprog, REG_LR) @@ -1696,6 +1696,8 @@ var encodings = [ALAST & obj.AMask]encoding{ obj.APCDATA: pseudoOpEncoding, obj.ATEXT: pseudoOpEncoding, obj.ANOP: pseudoOpEncoding, + obj.ADUFFZERO: pseudoOpEncoding, + obj.ADUFFCOPY: pseudoOpEncoding, } // encodingForAs returns the encoding for an obj.As. diff --git a/src/runtime/duff_riscv64.s b/src/runtime/duff_riscv64.s new file mode 100644 index 0000000000..f7bd3f326e --- /dev/null +++ b/src/runtime/duff_riscv64.s @@ -0,0 +1,907 @@ +// Code generated by mkduff.go; DO NOT EDIT. +// Run go generate from src/runtime to update. +// See mkduff.go for comments. + +#include "textflag.h" + +TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + RET + +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index 8859ed68cc..6ddf0256e9 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -38,6 +38,7 @@ func main() { gen("arm64", notags, zeroARM64, copyARM64) gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) + gen("riscv64", notags, zeroRISCV64, copyRISCV64) } func gen(arch string, tags, zero, copy func(io.Writer)) { @@ -227,3 +228,30 @@ func copyMIPS64x(w io.Writer) { } fmt.Fprintln(w, "\tRET") } + +func zeroRISCV64(w io.Writer) { + // ZERO: always zero + // X10: ptr to memory to be zeroed + // X10 is updated as a side effect. + fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOV\tZERO, (X10)") + fmt.Fprintln(w, "\tADD\t$8, X10") + } + fmt.Fprintln(w, "\tRET") +} + +func copyRISCV64(w io.Writer) { + // X10: ptr to source memory + // X11: ptr to destination memory + // X10 and X11 are updated as a side effect + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOV\t(X10), X31") + fmt.Fprintln(w, "\tADD\t$8, X10") + fmt.Fprintln(w, "\tMOV\tX31, (X11)") + fmt.Fprintln(w, "\tADD\t$8, X11") + fmt.Fprintln(w) + } + fmt.Fprintln(w, "\tRET") +} -- cgit v1.3 From 308ec220a2e55e0948505881b051cbefa05cf696 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 16 Oct 2020 18:35:30 +0800 Subject: cmd/asm: refactor some encoding functions for load/store with immediate offset on arm64 Some of the current functions for encoding load/store with immediate offset instructions, like opstr12(), opstr9(), opldr12(), opldr9() and opldrpp(), etc., they have the same code, so this patch refactors them and merges them into two functions opstr() and opldr(). Change-Id: I60367f8b720b77c7ebe6d66905a950dcf7701836 Reviewed-on: https://go-review.googlesource.com/c/go/+/263479 Run-TryBot: fannie zhang TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: fannie zhang --- src/cmd/internal/obj/arm64/asm7.go | 153 +++++++++++-------------------------- 1 file changed, 44 insertions(+), 109 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 6b9fe27c05..92c5729d25 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -205,12 +205,8 @@ func SYSHINT(x uint32) uint32 { return SYSOP(0, 0, 3, 2, 0, x, 0x1F) } -func LDSTR12U(sz uint32, v uint32, opc uint32) uint32 { - return sz<<30 | 7<<27 | v<<26 | 1<<24 | opc<<22 -} - -func LDSTR9S(sz uint32, v uint32, opc uint32) uint32 { - return sz<<30 | 7<<27 | v<<26 | 0<<24 | opc<<22 +func LDSTR(sz uint32, v uint32, opc uint32) uint32 { + return sz<<30 | 7<<27 | v<<26 | opc<<22 } func LD2STR(o uint32) uint32 { @@ -3392,10 +3388,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { r = int(o.param) } if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ - o1 = c.olsr9s(p, int32(c.opstr9(p, p.As)), v, r, int(p.From.Reg)) + o1 = c.olsr9s(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) } else { v = int32(c.offsetshift(p, int64(v), int(o.a4))) - o1 = c.olsr12u(p, int32(c.opstr12(p, p.As)), v, r, int(p.From.Reg)) + o1 = c.olsr12u(p, int32(c.opstr(p, p.As)), v, r, int(p.From.Reg)) } case 21: /* movT O(R),R -> ldrT */ @@ -3407,11 +3403,11 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { r = int(o.param) } if v < 0 || v%sz != 0 { /* unscaled 9-bit signed */ - o1 = c.olsr9s(p, int32(c.opldr9(p, p.As)), v, r, int(p.To.Reg)) + o1 = c.olsr9s(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) } else { v = int32(c.offsetshift(p, int64(v), int(o.a1))) //print("offset=%lld v=%ld a1=%d\n", instoffset, v, o->a1); - o1 = c.olsr12u(p, int32(c.opldr12(p, p.As)), v, r, int(p.To.Reg)) + o1 = c.olsr12u(p, int32(c.opldr(p, p.As)), v, r, int(p.To.Reg)) } case 22: /* movT (R)O!,R; movT O(R)!, R -> ldrT */ @@ -3424,7 +3420,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if v < -256 || v > 255 { c.ctxt.Diag("offset out of range [-255,254]: %v", p) } - o1 = c.opldrpp(p, p.As) + o1 = c.opldr(p, p.As) if o.scond == C_XPOST { o1 |= 1 << 10 } else { @@ -3442,7 +3438,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if v < -256 || v > 255 { c.ctxt.Diag("offset out of range [-255,254]: %v", p) } - o1 = LD2STR(c.opldrpp(p, p.As)) + o1 = c.opstr(p, p.As) if o.scond == C_XPOST { o1 |= 1 << 10 } else { @@ -3594,7 +3590,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = c.oaddi(p, int32(c.opirr(p, AADD)), hi, r, REGTMP) - o2 = c.olsr12u(p, int32(c.opstr12(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.From.Reg)) + o2 = c.olsr12u(p, int32(c.opstr(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.From.Reg)) break storeusepool: @@ -3638,7 +3634,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 = c.oaddi(p, int32(c.opirr(p, AADD)), hi, r, REGTMP) - o2 = c.olsr12u(p, int32(c.opldr12(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.To.Reg)) + o2 = c.olsr12u(p, int32(c.opldr(p, p.As)), ((v-hi)>>uint(s))&0xFFF, REGTMP, int(p.To.Reg)) break loadusepool: @@ -4127,7 +4123,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Sym = p.To.Sym rel.Add = p.To.Offset rel.Type = objabi.R_ADDRARM64 - o3 = c.olsr12u(p, int32(c.opstr12(p, p.As)), 0, REGTMP, int(p.From.Reg)) + o3 = c.olsr12u(p, int32(c.opstr(p, p.As)), 0, REGTMP, int(p.From.Reg)) case 65: /* movT addr,R -> adrp + add + movT (REGTMP), R */ o1 = ADR(1, 0, REGTMP) @@ -4138,7 +4134,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rel.Sym = p.From.Sym rel.Add = p.From.Offset rel.Type = objabi.R_ADDRARM64 - o3 = c.olsr12u(p, int32(c.opldr12(p, p.As)), 0, REGTMP, int(p.To.Reg)) + o3 = c.olsr12u(p, int32(c.opldr(p, p.As)), 0, REGTMP, int(p.To.Reg)) case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */ v := int32(c.regoff(&p.From)) @@ -4191,7 +4187,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 70: /* IE model movd $tlsvar, reg -> adrp REGTMP, 0; ldr reg, [REGTMP, #0] + relocs */ o1 = ADR(1, 0, REGTMP) - o2 = c.olsr12u(p, int32(c.opldr12(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 @@ -4204,7 +4200,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { case 71: /* movd sym@GOT, reg -> adrp REGTMP, #0; ldr reg, [REGTMP, #0] + relocs */ o1 = ADR(1, 0, REGTMP) - o2 = c.olsr12u(p, int32(c.opldr12(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) + o2 = c.olsr12u(p, int32(c.opldr(p, AMOVD)), 0, REGTMP, int(p.To.Reg)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 @@ -4765,7 +4761,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } } - o1 = c.opldrpp(p, p.As) + o1 = c.opirr(p, p.As) o1 |= (uint32(r&31) << 5) | (uint32((imm>>3)&0xfff) << 10) | (uint32(v & 31)) case 92: /* vmov Vn.[index], Vd.[index] */ @@ -6106,8 +6102,12 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVUSHLL2, AVUXTL2: return 3<<29 | 15<<24 | 0x29<<10 + case AVXAR: return 0xCE<<24 | 1<<23 + + case APRFM: + return 0xf9<<24 | 2<<22 } c.ctxt.Diag("%v: bad irr %v", p, a) @@ -6502,7 +6502,7 @@ func (c *ctxt7) opstore(p *obj.Prog, a obj.As) uint32 { } /* - * load/store register (unsigned immediate) C3.3.13 + * load/store register (scaled 12-bit unsigned immediate) C3.3.13 * these produce 64-bit values (when there's an option) */ func (c *ctxt7) olsr12u(p *obj.Prog, o int32, v int32, b int, r int) uint32 { @@ -6512,49 +6512,12 @@ func (c *ctxt7) olsr12u(p *obj.Prog, o int32, v int32, b int, r int) uint32 { o |= (v & 0xFFF) << 10 o |= int32(b&31) << 5 o |= int32(r & 31) + o |= 1 << 24 return uint32(o) } -func (c *ctxt7) opldr12(p *obj.Prog, a obj.As) uint32 { - switch a { - case AMOVD: - return LDSTR12U(3, 0, 1) /* imm12<<10 | Rn<<5 | Rt */ - - case AMOVW: - return LDSTR12U(2, 0, 2) - - case AMOVWU: - return LDSTR12U(2, 0, 1) - - case AMOVH: - return LDSTR12U(1, 0, 2) - - case AMOVHU: - return LDSTR12U(1, 0, 1) - - case AMOVB: - return LDSTR12U(0, 0, 2) - - case AMOVBU: - return LDSTR12U(0, 0, 1) - - case AFMOVS: - return LDSTR12U(2, 1, 1) - - case AFMOVD: - return LDSTR12U(3, 1, 1) - } - - c.ctxt.Diag("bad opldr12 %v\n%v", a, p) - return 0 -} - -func (c *ctxt7) opstr12(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr12(p, a)) -} - /* - * load/store register (unscaled immediate) C3.3.12 + * load/store register (unscaled 9-bit signed immediate) C3.3.12 */ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { if v < -256 || v > 255 { @@ -6566,76 +6529,48 @@ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { return uint32(o) } -func (c *ctxt7) opldr9(p *obj.Prog, a obj.As) uint32 { - switch a { - case AMOVD: - return LDSTR9S(3, 0, 1) /* simm9<<12 | Rn<<5 | Rt */ - - case AMOVW: - return LDSTR9S(2, 0, 2) - - case AMOVWU: - return LDSTR9S(2, 0, 1) - - case AMOVH: - return LDSTR9S(1, 0, 2) - - case AMOVHU: - return LDSTR9S(1, 0, 1) - - case AMOVB: - return LDSTR9S(0, 0, 2) - - case AMOVBU: - return LDSTR9S(0, 0, 1) - - case AFMOVS: - return LDSTR9S(2, 1, 1) - - case AFMOVD: - return LDSTR9S(3, 1, 1) - } - - c.ctxt.Diag("bad opldr9 %v\n%v", a, p) - return 0 -} - -func (c *ctxt7) opstr9(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr9(p, a)) +// store(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed store. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { + return LD2STR(c.opldr(p, a)) } -func (c *ctxt7) opldrpp(p *obj.Prog, a obj.As) uint32 { +// load(immediate) +// scaled 12-bit unsigned immediate offset. +// unscaled 9-bit signed immediate offset. +// pre/post-indexed load. +// and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. +func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { switch a { case AMOVD: - return 3<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 /* simm9<<12 | Rn<<5 | Rt */ + return LDSTR(3, 0, 1) /* simm9<<12 | Rn<<5 | Rt */ case AMOVW: - return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22 + return LDSTR(2, 0, 2) case AMOVWU: - return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + return LDSTR(2, 0, 1) case AMOVH: - return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22 + return LDSTR(1, 0, 2) case AMOVHU: - return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + return LDSTR(1, 0, 1) case AMOVB: - return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22 + return LDSTR(0, 0, 2) case AMOVBU: - return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22 + return LDSTR(0, 0, 1) case AFMOVS: - return 2<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 + return LDSTR(2, 1, 1) case AFMOVD: - return 3<<30 | 7<<27 | 1<<26 | 0<<24 | 1<<22 - - case APRFM: - return 0xf9<<24 | 2<<22 - + return LDSTR(3, 1, 1) } c.ctxt.Diag("bad opldr %v\n%v", a, p) -- cgit v1.3 From 3089ef6bd7ecc7af4b23eb68e3d7879d340aa673 Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Mon, 28 Sep 2020 18:58:51 +0800 Subject: cmd/asm: add several arm64 SIMD instructions This patch enables VSLI, VUADDW(2), VUSRA and FMOVQ SIMD instructions required by the issue #40725. And the GNU syntax of 'FMOVQ' is 128-bit ldr/str(immediate, simd&fp). Add test cases. Fixes #40725 Change-Id: Ide968ef4a9385ce4cd8f69bce854289014d30456 Reviewed-on: https://go-review.googlesource.com/c/go/+/258397 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 11 + src/cmd/asm/internal/asm/testdata/arm64error.s | 16 +- src/cmd/internal/obj/arm64/a.out.go | 59 +++-- src/cmd/internal/obj/arm64/anames.go | 5 + src/cmd/internal/obj/arm64/anames7.go | 10 + src/cmd/internal/obj/arm64/asm7.go | 300 ++++++++++++++++++------- 6 files changed, 290 insertions(+), 111 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 7f495b90bb..b6c22e0d6f 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -454,6 +454,17 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 FMOVD F20, (R2) // 540000fd FMOVD.P F20, 8(R1) // 348400fc FMOVD.W 8(R1), F20 // 348c40fc + FMOVQ.P F13, 11(R10) // 4db5803c + FMOVQ.W F15, 11(R20) // 8fbe803c + FMOVQ.P 11(R10), F13 // 4db5c03c + FMOVQ.W 11(R20), F15 // 8fbec03c + FMOVQ F10, 65520(R10) // 4afdbf3d + FMOVQ F11, 64(RSP) // eb13803d + FMOVQ F11, 8(R20) // 8b82803c + FMOVQ F11, 4(R20) // 8b42803c + FMOVQ 32(R5), F2 // a208c03d + FMOVQ 65520(R10), F10 // 4afdff3d + FMOVQ 64(RSP), F11 // eb13c03d PRFM (R2), PLDL1KEEP // 400080f9 PRFM 16(R2), PLDL1KEEP // 400880f9 PRFM 48(R6), PSTL2STRM // d31880f9 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 20b1f3e9f0..c3a617066a 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -87,13 +87,13 @@ TEXT errors(SB),$0 VLD1.P 32(R1), [V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset" VLD1.P 48(R1), [V7.S4, V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset" VPMULL V1.D1, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "invalid arrangement" - VPMULL V1.B16, V2.B16, V3.H8 // ERROR "invalid arrangement" + VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch" + VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "operand mismatch" + VPMULL V1.B16, V2.B16, V3.H8 // ERROR "operand mismatch" VPMULL2 V1.D2, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "invalid arrangement" - VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "invalid arrangement" + VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch" + VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "operand mismatch" + VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "operand mismatch" VEXT $8, V1.B16, V2.B8, V2.B16 // ERROR "invalid arrangement" VEXT $8, V1.H8, V2.H8, V2.H8 // ERROR "invalid arrangement" VRBIT V1.B16, V2.B8 // ERROR "invalid arrangement" @@ -353,4 +353,8 @@ TEXT errors(SB),$0 VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range" VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch" VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement" + VUADDW V9.B8, V12.H8, V14.B8 // ERROR "invalid arrangement" + VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch" + VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" + VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" RET diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 33319e48df..98504353e2 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -419,27 +419,33 @@ const ( C_SBRA // for TYPE_BRANCH C_LBRA - C_ZAUTO // 0(RSP) - C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 - C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 - C_NSAUTO // -256 <= x < 0 - C_NPAUTO // -512 <= x < 0, 0 mod 8 - C_NAUTO4K // -4095 <= x < 0 - C_PSAUTO_8 // 0 to 255, 0 mod 8 - C_PSAUTO_4 // 0 to 255, 0 mod 4 - C_PSAUTO // 0 to 255 - C_PPAUTO // 0 to 504, 0 mod 8 - C_UAUTO4K_8 // 0 to 4095, 0 mod 8 - C_UAUTO4K_4 // 0 to 4095, 0 mod 4 - C_UAUTO4K_2 // 0 to 4095, 0 mod 2 - C_UAUTO4K // 0 to 4095 - C_UAUTO8K_8 // 0 to 8190, 0 mod 8 - C_UAUTO8K_4 // 0 to 8190, 0 mod 4 - C_UAUTO8K // 0 to 8190, 0 mod 2 - C_UAUTO16K_8 // 0 to 16380, 0 mod 8 - C_UAUTO16K // 0 to 16380, 0 mod 4 - C_UAUTO32K // 0 to 32760, 0 mod 8 - C_LAUTO // any other 32-bit constant + C_ZAUTO // 0(RSP) + C_NSAUTO_8 // -256 <= x < 0, 0 mod 8 + C_NSAUTO_4 // -256 <= x < 0, 0 mod 4 + C_NSAUTO // -256 <= x < 0 + C_NPAUTO // -512 <= x < 0, 0 mod 8 + C_NAUTO4K // -4095 <= x < 0 + C_PSAUTO_8 // 0 to 255, 0 mod 8 + C_PSAUTO_4 // 0 to 255, 0 mod 4 + C_PSAUTO // 0 to 255 + C_PPAUTO_16 // 0 to 504, 0 mod 16 + C_PPAUTO // 0 to 504, 0 mod 8 + C_UAUTO4K_16 // 0 to 4095, 0 mod 16 + C_UAUTO4K_8 // 0 to 4095, 0 mod 8 + C_UAUTO4K_4 // 0 to 4095, 0 mod 4 + C_UAUTO4K_2 // 0 to 4095, 0 mod 2 + C_UAUTO4K // 0 to 4095 + C_UAUTO8K_16 // 0 to 8190, 0 mod 16 + C_UAUTO8K_8 // 0 to 8190, 0 mod 8 + C_UAUTO8K_4 // 0 to 8190, 0 mod 4 + C_UAUTO8K // 0 to 8190, 0 mod 2 + C_PSAUTO + C_UAUTO16K_16 // 0 to 16380, 0 mod 16 + C_UAUTO16K_8 // 0 to 16380, 0 mod 8 + C_UAUTO16K // 0 to 16380, 0 mod 4 + C_PSAUTO + C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO + C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO + C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO + C_LAUTO // any other 32-bit constant C_SEXT1 // 0 to 4095, direct C_SEXT2 // 0 to 8190 @@ -457,17 +463,23 @@ const ( C_PSOREG_8 C_PSOREG_4 C_PSOREG + C_PPOREG_16 C_PPOREG + C_UOREG4K_16 C_UOREG4K_8 C_UOREG4K_4 C_UOREG4K_2 C_UOREG4K + C_UOREG8K_16 C_UOREG8K_8 C_UOREG8K_4 C_UOREG8K + C_UOREG16K_16 C_UOREG16K_8 C_UOREG16K + C_UOREG32K_16 C_UOREG32K + C_UOREG64K C_LOREG C_ADDR // TODO(aram): explain difference from C_VCONADDR @@ -873,6 +885,7 @@ const ( AFDIVS AFLDPD AFLDPS + AFMOVQ AFMOVD AFMOVS AVMOVQ @@ -1001,6 +1014,7 @@ const ( AVUZP2 AVSHL AVSRI + AVSLI AVBSL AVBIT AVTBL @@ -1008,6 +1022,9 @@ const ( AVZIP1 AVZIP2 AVCMTST + AVUADDW2 + AVUADDW + AVUSRA ALAST AB = obj.AJMP ABL = obj.ACALL diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index e5534e26b9..126eefd032 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -379,6 +379,7 @@ var Anames = []string{ "FDIVS", "FLDPD", "FLDPS", + "FMOVQ", "FMOVD", "FMOVS", "VMOVQ", @@ -507,6 +508,7 @@ var Anames = []string{ "VUZP2", "VSHL", "VSRI", + "VSLI", "VBSL", "VBIT", "VTBL", @@ -514,5 +516,8 @@ var Anames = []string{ "VZIP1", "VZIP2", "VCMTST", + "VUADDW2", + "VUADDW", + "VUSRA", "LAST", } diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index 96c9f788d9..f7e99517ce 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -51,16 +51,21 @@ var cnames7 = []string{ "PSAUTO_4", "PSAUTO", "PPAUTO", + "UAUTO4K_16", "UAUTO4K_8", "UAUTO4K_4", "UAUTO4K_2", "UAUTO4K", + "UAUTO8K_16", "UAUTO8K_8", "UAUTO8K_4", "UAUTO8K", + "UAUTO16K_16", "UAUTO16K_8", "UAUTO16K", + "UAUTO32K_8", "UAUTO32K", + "UAUTO64K", "LAUTO", "SEXT1", "SEXT2", @@ -78,16 +83,21 @@ var cnames7 = []string{ "PSOREG_4", "PSOREG", "PPOREG", + "UOREG4K_16", "UOREG4K_8", "UOREG4K_4", "UOREG4K_2", "UOREG4K", + "UOREG8K_16", "UOREG8K_8", "UOREG8K_4", "UOREG8K", + "UOREG16K_16", "UOREG16K_8", "UOREG16K", + "UOREG32K_16", "UOREG32K", + "UOREG64K", "LOREG", "ADDR", "GOTADDR", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 92c5729d25..8cbf5e719f 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -483,6 +483,7 @@ var optab = []Optab{ {AVZIP1, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0}, {AVUSHLL, C_VCON, C_ARNG, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, {AVUXTL, C_ARNG, C_NONE, C_NONE, C_ARNG, 102, 4, 0, 0, 0}, + {AVUADDW, C_ARNG, C_ARNG, C_NONE, C_ARNG, 105, 4, 0, 0, 0}, /* conditional operations */ {ACSEL, C_COND, C_REG, C_REG, C_REG, 18, 4, 0, 0, 0}, @@ -509,6 +510,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_UOREG16K, 20, 4, 0, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_UAUTO32K, 20, 4, REGSP, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_UOREG32K, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UAUTO64K, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_UOREG64K, 20, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement store */ {AMOVB, C_REG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, @@ -526,6 +529,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0}, /* scaled 12-bit unsigned displacement load */ {AMOVB, C_UAUTO4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, @@ -543,6 +548,8 @@ var optab = []Optab{ {AFMOVS, C_UOREG16K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, {AFMOVD, C_UAUTO32K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, {AFMOVD, C_UOREG32K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_UAUTO64K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_UOREG64K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* unscaled 9-bit signed displacement load */ {AMOVB, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0}, @@ -560,6 +567,8 @@ var optab = []Optab{ {AFMOVS, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, {AFMOVD, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, {AFMOVD, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, + {AFMOVQ, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0}, + {AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0}, /* long displacement store */ {AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, @@ -577,6 +586,8 @@ var optab = []Optab{ {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, 30, 8, REGSP, LTO, 0}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 30, 8, 0, LTO, 0}, /* long displacement load */ {AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, 31, 8, REGSP, LFROM, 0}, @@ -594,6 +605,8 @@ var optab = []Optab{ {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, {AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, + {AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 31, 8, 0, LFROM, 0}, /* pre/post-indexed load (unscaled, signed 9-bit offset) */ {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, @@ -603,6 +616,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPOST}, {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPOST}, {AMOVD, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, @@ -611,6 +625,7 @@ var optab = []Optab{ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, 22, 4, 0, 0, C_XPRE}, {AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, {AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, 22, 4, 0, 0, C_XPRE}, /* pre/post-indexed store (unscaled, signed 9-bit offset) */ {AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, @@ -620,6 +635,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPOST}, {AMOVD, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, @@ -628,6 +644,7 @@ var optab = []Optab{ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, {AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, + {AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, 23, 4, 0, 0, C_XPRE}, /* load with shifted or extended register offset */ {AMOVD, C_ROFF, C_NONE, C_NONE, C_REG, 98, 4, 0, 0, 0}, @@ -1207,37 +1224,49 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) { C_PSAUTO, C_PSAUTO_8, C_PSAUTO_4, + C_PPAUTO_16, C_PPAUTO, + C_UAUTO4K_16, C_UAUTO4K_8, C_UAUTO4K_4, C_UAUTO4K_2, C_UAUTO4K, + C_UAUTO8K_16, C_UAUTO8K_8, C_UAUTO8K_4, C_UAUTO8K, + C_UAUTO16K_16, C_UAUTO16K_8, C_UAUTO16K, + C_UAUTO32K_16, C_UAUTO32K, + C_UAUTO64K, C_NSAUTO_8, C_NSAUTO_4, C_NSAUTO, C_NPAUTO, C_NAUTO4K, C_LAUTO, - C_PPOREG, C_PSOREG, - C_PSOREG_4, C_PSOREG_8, + C_PSOREG_4, + C_PPOREG_16, + C_PPOREG, + C_UOREG4K_16, C_UOREG4K_8, C_UOREG4K_4, C_UOREG4K_2, C_UOREG4K, + C_UOREG8K_16, C_UOREG8K_8, C_UOREG8K_4, C_UOREG8K, + C_UOREG16K_16, C_UOREG16K_8, C_UOREG16K, + C_UOREG32K_16, C_UOREG32K, + C_UOREG64K, C_NSOREG_8, C_NSOREG_4, C_NSOREG, @@ -1532,10 +1561,18 @@ func autoclass(l int64) int { } return C_PSAUTO } - if l <= 504 && l&7 == 0 { - return C_PPAUTO + if l <= 504 { + if l&15 == 0 { + return C_PPAUTO_16 + } + if l&7 == 0 { + return C_PPAUTO + } } if l <= 4095 { + if l&15 == 0 { + return C_UAUTO4K_16 + } if l&7 == 0 { return C_UAUTO4K_8 } @@ -1548,6 +1585,9 @@ func autoclass(l int64) int { return C_UAUTO4K } if l <= 8190 { + if l&15 == 0 { + return C_UAUTO8K_16 + } if l&7 == 0 { return C_UAUTO8K_8 } @@ -1559,6 +1599,9 @@ func autoclass(l int64) int { } } if l <= 16380 { + if l&15 == 0 { + return C_UAUTO16K_16 + } if l&7 == 0 { return C_UAUTO16K_8 } @@ -1566,8 +1609,16 @@ func autoclass(l int64) int { return C_UAUTO16K } } - if l <= 32760 && (l&7) == 0 { - return C_UAUTO32K + if l <= 32760 { + if l&15 == 0 { + return C_UAUTO32K_16 + } + if l&7 == 0 { + return C_UAUTO32K + } + } + if l <= 65520 && (l&15) == 0 { + return C_UAUTO64K } return C_LAUTO } @@ -1595,6 +1646,8 @@ func (c *ctxt7) offsetshift(p *obj.Prog, v int64, cls int) int64 { s = 2 case C_UAUTO32K, C_UOREG32K: s = 3 + case C_UAUTO64K, C_UOREG64K: + s = 4 default: c.ctxt.Diag("bad class: %v\n%v", DRconv(cls), p) } @@ -2126,46 +2179,66 @@ func cmp(a int, b int) bool { case C_PPAUTO: switch b { - case C_ZAUTO, C_PSAUTO_8: + case C_ZAUTO, C_PSAUTO_8, C_PPAUTO_16: return true } case C_UAUTO4K: switch b { case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, - C_PPAUTO, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8: + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16: return true } case C_UAUTO8K: switch b { - case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8: + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16: return true } case C_UAUTO16K: switch b { - case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO16K_8: + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16: return true } case C_UAUTO32K: switch b { case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, - C_PPAUTO, C_UAUTO4K_8, C_UAUTO8K_8, C_UAUTO16K_8: + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K_16: + return true + } + + case C_UAUTO64K: + switch b { + case C_ZAUTO, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO_16, C_UAUTO4K_16, C_UAUTO8K_16, C_UAUTO16K_16, + C_UAUTO32K_16: return true } case C_LAUTO: switch b { - case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, - C_NAUTO4K, C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, C_PPAUTO, - C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, - C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, - C_UAUTO16K, C_UAUTO16K_8, - C_UAUTO32K: + case C_ZAUTO, C_NSAUTO, C_NSAUTO_4, C_NSAUTO_8, C_NPAUTO, C_NAUTO4K, + C_PSAUTO, C_PSAUTO_4, C_PSAUTO_8, + C_PPAUTO, C_PPAUTO_16, + C_UAUTO4K, C_UAUTO4K_2, C_UAUTO4K_4, C_UAUTO4K_8, C_UAUTO4K_16, + C_UAUTO8K, C_UAUTO8K_4, C_UAUTO8K_8, C_UAUTO8K_16, + C_UAUTO16K, C_UAUTO16K_8, C_UAUTO16K_16, + C_UAUTO32K, C_UAUTO32K_16, + C_UAUTO64K: return true } @@ -2192,6 +2265,11 @@ func cmp(a int, b int) bool { return true } + case C_PSOREG_8: + if b == C_ZOREG { + return true + } + case C_PSOREG_4: switch b { case C_ZOREG, C_PSOREG_8: @@ -2206,48 +2284,66 @@ func cmp(a int, b int) bool { case C_PPOREG: switch b { - case C_ZOREG, C_PSOREG_8: + case C_ZOREG, C_PSOREG_8, C_PPOREG_16: return true } case C_UOREG4K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16: return true } case C_UOREG8K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, - C_UOREG8K_4, C_UOREG8K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16: return true } case C_UOREG16K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_4, C_UOREG4K_8, C_UOREG8K_4, - C_UOREG8K_8, C_UOREG16K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16: return true } case C_UOREG32K: switch b { - case C_ZOREG, C_PSOREG_4, C_PSOREG_8, C_PSOREG, - C_PPOREG, C_UOREG4K_8, C_UOREG8K_8, C_UOREG16K_8: + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K_16: + return true + } + + case C_UOREG64K: + switch b { + case C_ZOREG, C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG_16, C_UOREG4K_16, C_UOREG8K_16, C_UOREG16K_16, + C_UOREG32K_16: return true } case C_LOREG: switch b { - case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, - C_NOREG4K, C_PSOREG_4, C_PSOREG_8, C_PSOREG, C_PPOREG, - C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, - C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, - C_UOREG16K, C_UOREG16K_8, - C_UOREG32K: + case C_ZOREG, C_NSOREG, C_NSOREG_4, C_NSOREG_8, C_NPOREG, C_NOREG4K, + C_PSOREG, C_PSOREG_4, C_PSOREG_8, + C_PPOREG, C_PPOREG_16, + C_UOREG4K, C_UOREG4K_2, C_UOREG4K_4, C_UOREG4K_8, C_UOREG4K_16, + C_UOREG8K, C_UOREG8K_4, C_UOREG8K_8, C_UOREG8K_16, + C_UOREG16K, C_UOREG16K_8, C_UOREG16K_16, + C_UOREG32K, C_UOREG32K_16, + C_UOREG64K: return true } @@ -2675,7 +2771,8 @@ func buildop(ctxt *obj.Link) { case AFCSELD: oprangeset(AFCSELS, t) - case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS: + case AFMOVQ, AFMOVD, AFMOVS, + AVMOVQ, AVMOVD, AVMOVS: break case AFCVTZSD: @@ -2802,6 +2899,8 @@ func buildop(ctxt *obj.Link) { case AVUSHR: oprangeset(AVSHL, t) oprangeset(AVSRI, t) + oprangeset(AVSLI, t) + oprangeset(AVUSRA, t) case AVREV32: oprangeset(AVCNT, t) @@ -2829,6 +2928,9 @@ func buildop(ctxt *obj.Link) { case AVEOR3: oprangeset(AVBCAX, t) + case AVUADDW: + oprangeset(AVUADDW2, t) + case ASHA1H, AVCNT, AVMOV, @@ -4805,47 +4907,31 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(imm5&0x1f) << 16) | (uint32(imm4&0xf) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) - case 93: /* vpmull{2} Vm., Vn., Vd */ - af := int((p.From.Reg >> 5) & 15) - at := int((p.To.Reg >> 5) & 15) - a := int((p.Reg >> 5) & 15) + case 93: /* vpmull{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if af != a { + c.ctxt.Diag("invalid arrangement: %v", p) + } var Q, size uint32 - if p.As == AVPMULL { - Q = 0 - } else { + if p.As == AVPMULL2 { Q = 1 } - - var fArng int - switch at { - case ARNG_8H: - if Q == 0 { - fArng = ARNG_8B - } else { - fArng = ARNG_16B - } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): size = 0 - case ARNG_1Q: - if Q == 0 { - fArng = ARNG_1D - } else { - fArng = ARNG_2D - } + case pack(0, ARNG_1Q, ARNG_1D), pack(1, ARNG_1Q, ARNG_2D): size = 3 default: - c.ctxt.Diag("invalid arrangement on Vd.: %v", p) - } - - if af != a || af != fArng { - c.ctxt.Diag("invalid arrangement: %v", p) + c.ctxt.Diag("operand mismatch: %v\n", p) } o1 = c.oprrr(p, p.As) rf := int((p.From.Reg) & 31) rt := int((p.To.Reg) & 31) r := int((p.Reg) & 31) - o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) case 94: /* vext $imm4, Vm., Vn., Vd. */ @@ -4883,7 +4969,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= ((Q & 1) << 30) | (uint32(r&31) << 16) | (uint32(index&15) << 11) | (uint32(rf&31) << 5) | uint32(rt&31) - case 95: /* vushr $shift, Vn., Vd. */ + case 95: /* vushr/vshl/vsri/vsli/vusra $shift, Vn., Vd. */ at := int((p.To.Reg >> 5) & 15) af := int((p.Reg >> 5) & 15) shift := int(p.From.Offset) @@ -4920,14 +5006,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } imm := 0 - switch p.As { - case AVUSHR, AVSRI: + case AVUSHR, AVSRI, AVUSRA: imm = esize*2 - shift if imm < esize || imm > imax { c.ctxt.Diag("shift out of range: %v", p) } - case AVSHL: + case AVSHL, AVSLI: imm = esize + shift if imm > imax { c.ctxt.Diag("shift out of range: %v", p) @@ -4940,7 +5025,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int((p.To.Reg) & 31) rf := int((p.Reg) & 31) - o1 |= ((Q & 1) << 30) | (uint32(imm&127) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) + o1 |= ((Q & 1) << 30) | (uint32(imm&0x7f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) case 96: /* vst1 Vt1.[index], offset(Rn) */ af := int((p.From.Reg >> 5) & 15) @@ -5169,11 +5254,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { shift = 0 } - pack := func(q, x, y uint8) uint32 { - return uint32(q)<<16 | uint32(x)<<8 | uint32(y) - } - - var Q uint8 = uint8(o1>>30) & 1 + Q := (o1 >> 30) & 1 var immh, width uint8 switch pack(Q, af, at) { case pack(0, ARNG_8B, ARNG_8H): @@ -5195,6 +5276,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { c.ctxt.Diag("shift amount out of range: %v\n", p) } o1 |= uint32(immh)<<19 | uint32(shift)<<16 | uint32(rf&31)<<5 | uint32(p.To.Reg&31) + case 103: /* VEOR3/VBCAX Va.B16, Vm.B16, Vn.B16, Vd.B16 */ ta := (p.From.Reg >> 5) & 15 tm := (p.Reg >> 5) & 15 @@ -5240,6 +5322,35 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= (uint32(r&31) << 16) | (uint32(index&63) << 10) | (uint32(rf&31) << 5) | uint32(rt&31) + case 105: /* vuaddw{2} Vm., Vn., Vd. */ + af := uint8((p.From.Reg >> 5) & 15) + at := uint8((p.To.Reg >> 5) & 15) + a := uint8((p.Reg >> 5) & 15) + if at != a { + c.ctxt.Diag("invalid arrangement: %v", p) + break + } + + var Q, size uint32 + if p.As == AVUADDW2 { + Q = 1 + } + switch pack(Q, at, af) { + case pack(0, ARNG_8H, ARNG_8B), pack(1, ARNG_8H, ARNG_16B): + size = 0 + case pack(0, ARNG_4S, ARNG_4H), pack(1, ARNG_4S, ARNG_8H): + size = 1 + case pack(0, ARNG_2D, ARNG_2S), pack(1, ARNG_2D, ARNG_4S): + size = 2 + default: + c.ctxt.Diag("operand mismatch: %v\n", p) + } + + o1 = c.oprrr(p, p.As) + rf := int((p.From.Reg) & 31) + rt := int((p.To.Reg) & 31) + r := int((p.Reg) & 31) + o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) } out[0] = o1 out[1] = o2 @@ -5898,6 +6009,9 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVUZP2: return 7<<25 | 1<<14 | 3<<11 + + case AVUADDW, AVUADDW2: + return 0x17<<25 | 1<<21 | 1<<12 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6097,6 +6211,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVSRI: return 0x5E<<23 | 17<<10 + case AVSLI: + return 0x5E<<23 | 21<<10 + case AVUSHLL, AVUXTL: return 1<<29 | 15<<24 | 0x29<<10 @@ -6106,6 +6223,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 { case AVXAR: return 0xCE<<24 | 1<<23 + case AVUSRA: + return 1<<29 | 15<<24 | 5<<10 + case APRFM: return 0xf9<<24 | 2<<22 } @@ -6535,7 +6655,14 @@ func (c *ctxt7) olsr9s(p *obj.Prog, o int32, v int32, b int, r int) uint32 { // pre/post-indexed store. // and the 12-bit and 9-bit are distinguished in olsr12u and oslr9s. func (c *ctxt7) opstr(p *obj.Prog, a obj.As) uint32 { - return LD2STR(c.opldr(p, a)) + enc := c.opldr(p, a) + switch p.As { + case AFMOVQ: + enc = enc &^ (1 << 22) + default: + enc = LD2STR(enc) + } + return enc } // load(immediate) @@ -6571,6 +6698,9 @@ func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 { case AFMOVD: return LDSTR(3, 1, 1) + + case AFMOVQ: + return LDSTR(0, 1, 3) } c.ctxt.Diag("bad opldr %v\n%v", a, p) @@ -7063,10 +7193,13 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { */ func movesize(a obj.As) int { switch a { - case AMOVD: + case AFMOVQ: + return 4 + + case AMOVD, AFMOVD: return 3 - case AMOVW, AMOVWU: + case AMOVW, AMOVWU, AFMOVS: return 2 case AMOVH, AMOVHU: @@ -7075,12 +7208,6 @@ func movesize(a obj.As) int { case AMOVB, AMOVBU: return 0 - case AFMOVS: - return 2 - - case AFMOVD: - return 3 - default: return -1 } @@ -7145,3 +7272,8 @@ func (c *ctxt7) encRegShiftOrExt(a *obj.Addr, r int16) uint32 { return 0 } + +// pack returns the encoding of the "Q" field and two arrangement specifiers. +func pack(q uint32, arngA, arngB uint8) uint32 { + return uint32(q)<<16 | uint32(arngA)<<8 | uint32(arngB) +} -- cgit v1.3 From 15131caeaa83e57fd8bdf87dde2801443f5602db Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Fri, 8 May 2020 10:51:29 +0800 Subject: cmd/internal/obj/arm64: add CASx/CASPx instructions This patch adds support for CASx and CASPx atomic instructions. go syntax gnu syntax CASD Rs, (Rn|RSP), Rt => cas Xs, Xt, (Xn|SP) CASALW Rs, (Rn|RSP), Rt => casal Ws, Wt, (Xn|SP) CASPD (Rs, Rs+1), (Rn|RSP), (Rt, Rt+1) => casp Xs, Xs+1, Xt, Xt+1, (Xn|SP) CASPW (Rs, Rs+1), (Rn|RSP), (Rt, Rt+1) => casp Ws, Ws+1, Wt, Wt+1, (Xn|SP) This patch changes the type of prog.RestArgs from "[]Addr" to "[]struct{Addr, Pos}", Pos is a enum, indicating the position of the operand. This patch also adds test cases. Change-Id: Ib971cfda7890b7aa895d17bab22dea326c7fcaa4 Reviewed-on: https://go-review.googlesource.com/c/go/+/233277 Trust: fannie zhang Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/arch/arm64.go | 13 ++- src/cmd/asm/internal/asm/asm.go | 18 +++- src/cmd/asm/internal/asm/testdata/arm64.s | 18 ++++ src/cmd/asm/internal/asm/testdata/arm64error.s | 4 + src/cmd/compile/internal/s390x/ssa.go | 12 +-- src/cmd/internal/obj/arm64/a.out.go | 14 +++ src/cmd/internal/obj/arm64/anames.go | 14 +++ src/cmd/internal/obj/arm64/asm7.go | 132 +++++++++++++++++++------ src/cmd/internal/obj/link.go | 84 +++++++++++----- src/cmd/internal/obj/riscv/obj.go | 8 +- src/cmd/internal/obj/s390x/asmz.go | 2 +- src/cmd/internal/obj/util.go | 15 ++- src/cmd/internal/obj/x86/asm6.go | 6 +- 13 files changed, 266 insertions(+), 74 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go index e643889aef..e557630ca6 100644 --- a/src/cmd/asm/internal/arch/arm64.go +++ b/src/cmd/asm/internal/arch/arm64.go @@ -75,7 +75,7 @@ func IsARM64STLXR(op obj.As) bool { arm64.ASTXP, arm64.ASTXPW, arm64.ASTLXP, arm64.ASTLXPW: return true } - // atomic instructions + // LDADDx/SWPx/CASx atomic instructions if arm64.IsAtomicInstruction(op) { return true } @@ -93,6 +93,17 @@ func IsARM64TBL(op obj.As) bool { return false } +// IsARM64CASP reports whether the op (as defined by an arm64.A* +// constant) is one of the CASP-like instructions, and its 2nd +// destination is a register pair that require special handling. +func IsARM64CASP(op obj.As) bool { + switch op { + case arm64.ACASPD, arm64.ACASPW: + return true + } + return false +} + // ARM64Suffix handles the special suffix for the ARM64. // It returns a boolean to indicate success; failure means // cond was unrecognized. diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index b9efa454ed..c4032759bb 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -637,6 +637,18 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.From = a[0] prog.SetFrom3(a[1]) prog.To = a[2] + case arch.IsARM64CASP(op): + prog.From = a[0] + prog.To = a[1] + // both 1st operand and 3rd operand are (Rs, Rs+1) register pair. + // And the register pair must be contiguous. + if (a[0].Type != obj.TYPE_REGREG) || (a[2].Type != obj.TYPE_REGREG) { + p.errorf("invalid addressing modes for 1st or 3rd operand to %s instruction, must be register pair", op) + return + } + // For ARM64 CASP-like instructions, its 2nd destination operand is register pair(Rt, Rt+1) that can + // not fit into prog.RegTo2, so save it to the prog.RestArgs. + prog.SetTo2(a[2]) default: prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) @@ -725,7 +737,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } if p.arch.Family == sys.AMD64 { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2]} + prog.SetRestArgs([]obj.Addr{a[1], a[2]}) prog.To = a[3] break } @@ -808,13 +820,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } if p.arch.Family == sys.AMD64 { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2], a[3]} + prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]}) prog.To = a[4] break } if p.arch.Family == sys.S390X { prog.From = a[0] - prog.RestArgs = []obj.Addr{a[1], a[2], a[3]} + prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]}) prog.To = a[4] break } diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 7943990e16..5547cf634c 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -777,6 +777,24 @@ again: LDORLH R5, (RSP), R7 // e7336578 LDORLB R5, (R6), R7 // c7306538 LDORLB R5, (RSP), R7 // e7336538 + CASD R1, (R2), ZR // 5f7ca1c8 + CASW R1, (RSP), ZR // ff7fa188 + CASB ZR, (R5), R3 // a37cbf08 + CASH R3, (RSP), ZR // ff7fa348 + CASW R5, (R7), R6 // e67ca588 + CASLD ZR, (RSP), R8 // e8ffbfc8 + CASLW R9, (R10), ZR // 5ffda988 + CASAD R7, (R11), R15 // 6f7de7c8 + CASAW R10, (RSP), R19 // f37fea88 + CASALD R5, (R6), R7 // c7fce5c8 + CASALD R5, (RSP), R7 // e7ffe5c8 + CASALW R5, (R6), R7 // c7fce588 + CASALW R5, (RSP), R7 // e7ffe588 + CASALH ZR, (R5), R8 // a8fcff48 + CASALB R8, (R9), ZR // 3ffde808 + CASPD (R30, ZR), (RSP), (R8, R9) // e87f3e48 + CASPW (R6, R7), (R8), (R4, R5) // 047d2608 + CASPD (R2, R3), (R2), (R8, R9) // 487c2248 // RET RET diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index c3a617066a..99e4d62d25 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -357,4 +357,8 @@ TEXT errors(SB),$0 VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch" VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range" VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range" + CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register" + CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register" + CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous" + CASPD (R2, R3), (R2), (R8, R10) // ERROR "destination register pair must be contiguous" RET diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index e23b31f385..84b9f491e4 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -182,11 +182,11 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { i := v.Aux.(s390x.RotateParams) p := s.Prog(v.Op.Asm()) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(i.Start)} - p.RestArgs = []obj.Addr{ + p.SetRestArgs([]obj.Addr{ {Type: obj.TYPE_CONST, Offset: int64(i.End)}, {Type: obj.TYPE_CONST, Offset: int64(i.Amount)}, {Type: obj.TYPE_REG, Reg: r2}, - } + }) p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} case ssa.OpS390XADD, ssa.OpS390XADDW, ssa.OpS390XSUB, ssa.OpS390XSUBW, @@ -913,7 +913,7 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { p.From.Type = obj.TYPE_CONST p.From.Offset = int64(s390x.NotEqual & s390x.NotUnordered) // unordered is not possible p.Reg = s390x.REG_R3 - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: 0}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 0}) if b.Succs[0].Block() != next { s.Br(s390x.ABR, b.Succs[0].Block()) } @@ -956,17 +956,17 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: b.Controls[1].Reg()}) case ssa.BlockS390XCGIJ, ssa.BlockS390XCIJ: p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(int8(b.AuxInt))}) case ssa.BlockS390XCLGIJ, ssa.BlockS390XCLIJ: p.From.Type = obj.TYPE_CONST p.From.Offset = int64(mask & s390x.NotUnordered) // unordered is not possible p.Reg = b.Controls[0].Reg() - p.RestArgs = []obj.Addr{{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(uint8(b.AuxInt))}) default: b.Fatalf("branch not implemented: %s", b.LongString()) } diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 98504353e2..5844b71ca7 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -843,6 +843,20 @@ const ( ASWPLW ASWPLH ASWPLB + ACASD + ACASW + ACASH + ACASB + ACASAD + ACASAW + ACASLD + ACASLW + ACASALD + ACASALW + ACASALH + ACASALB + ACASPD + ACASPW ABEQ ABNE ABCS diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 126eefd032..fb216f9a94 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -337,6 +337,20 @@ var Anames = []string{ "SWPLW", "SWPLH", "SWPLB", + "CASD", + "CASW", + "CASH", + "CASB", + "CASAD", + "CASAW", + "CASLD", + "CASLW", + "CASALD", + "CASALW", + "CASALH", + "CASALB", + "CASPD", + "CASPW", "BEQ", "BNE", "BCS", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 8cbf5e719f..4fc62d5c7f 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -80,12 +80,17 @@ type Optab struct { } func IsAtomicInstruction(as obj.As) bool { - _, ok := atomicInstructions[as] - return ok + if _, ok := atomicLDADD[as]; ok { + return true + } + if _, ok := atomicSWP[as]; ok { + return true + } + return false } // known field values of an instruction. -var atomicInstructions = map[obj.As]uint32{ +var atomicLDADD = map[obj.As]uint32{ ALDADDAD: 3<<30 | 0x1c5<<21 | 0x00<<10, ALDADDAW: 2<<30 | 0x1c5<<21 | 0x00<<10, ALDADDAH: 1<<30 | 0x1c5<<21 | 0x00<<10, @@ -150,22 +155,41 @@ var atomicInstructions = map[obj.As]uint32{ ALDORLW: 2<<30 | 0x1c3<<21 | 0x0c<<10, ALDORLH: 1<<30 | 0x1c3<<21 | 0x0c<<10, ALDORLB: 0<<30 | 0x1c3<<21 | 0x0c<<10, - ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, - ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, - ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, - ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, - ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, - ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, - ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, - ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, - ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, - ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, +} + +var atomicSWP = map[obj.As]uint32{ + ASWPAD: 3<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAW: 2<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAH: 1<<30 | 0x1c5<<21 | 0x20<<10, + ASWPAB: 0<<30 | 0x1c5<<21 | 0x20<<10, + ASWPALD: 3<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALW: 2<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALH: 1<<30 | 0x1c7<<21 | 0x20<<10, + ASWPALB: 0<<30 | 0x1c7<<21 | 0x20<<10, + ASWPD: 3<<30 | 0x1c1<<21 | 0x20<<10, + ASWPW: 2<<30 | 0x1c1<<21 | 0x20<<10, + ASWPH: 1<<30 | 0x1c1<<21 | 0x20<<10, + ASWPB: 0<<30 | 0x1c1<<21 | 0x20<<10, + ASWPLD: 3<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLW: 2<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLH: 1<<30 | 0x1c3<<21 | 0x20<<10, + ASWPLB: 0<<30 | 0x1c3<<21 | 0x20<<10, + ACASD: 3<<30 | 0x45<<21 | 0x1f<<10, + ACASW: 2<<30 | 0x45<<21 | 0x1f<<10, + ACASH: 1<<30 | 0x45<<21 | 0x1f<<10, + ACASB: 0<<30 | 0x45<<21 | 0x1f<<10, + ACASAD: 3<<30 | 0x47<<21 | 0x1f<<10, + ACASAW: 2<<30 | 0x47<<21 | 0x1f<<10, + ACASLD: 3<<30 | 0x45<<21 | 0x3f<<10, + ACASLW: 2<<30 | 0x45<<21 | 0x3f<<10, + ACASALD: 3<<30 | 0x47<<21 | 0x3f<<10, + ACASALW: 2<<30 | 0x47<<21 | 0x3f<<10, + ACASALH: 1<<30 | 0x47<<21 | 0x3f<<10, + ACASALB: 0<<30 | 0x47<<21 | 0x3f<<10, +} +var atomicCASP = map[obj.As]uint32{ + ACASPD: 1<<30 | 0x41<<21 | 0x1f<<10, + ACASPW: 0<<30 | 0x41<<21 | 0x1f<<10, } var oprange [ALAST & obj.AMask][]Optab @@ -794,8 +818,10 @@ var optab = []Optab{ {ASTPW, C_PAIR, C_NONE, C_NONE, C_LOREG, 77, 12, 0, LTO, C_XPOST}, {ASTPW, C_PAIR, C_NONE, C_NONE, C_ADDR, 87, 12, 0, 0, 0}, - {ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG - {ASWPD, C_REG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ASWPD, C_REG, C_NONE, C_NONE, C_ZOREG, 47, 4, 0, 0, 0}, // RegTo2=C_REG + {ASWPD, C_REG, C_NONE, C_NONE, C_ZAUTO, 47, 4, REGSP, 0, 0}, // RegTo2=C_REG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZOREG, 106, 4, 0, 0, 0}, // RegTo2=C_REGREG + {ACASPD, C_PAIR, C_NONE, C_NONE, C_ZAUTO, 106, 4, REGSP, 0, 0}, // RegTo2=C_REGREG {ALDAR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDXR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, {ALDAXR, C_ZOREG, C_NONE, C_NONE, C_REG, 58, 4, 0, 0, 0}, @@ -2011,7 +2037,7 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab { a1-- a3 := C_NONE + 1 - if p.GetFrom3() != nil { + if p.GetFrom3() != nil && p.RestArgs[0].Pos == 0 { a3 = int(p.GetFrom3().Class) if a3 == 0 { a3 = c.aclass(p.GetFrom3()) + 1 @@ -2496,10 +2522,18 @@ func buildop(ctxt *obj.Link) { oprangeset(AMOVZW, t) case ASWPD: - for i := range atomicInstructions { + for i := range atomicLDADD { + oprangeset(i, t) + } + for i := range atomicSWP { + if i == ASWPD { + continue + } oprangeset(i, t) } + case ACASPD: + oprangeset(ACASPW, t) case ABEQ: oprangeset(ABNE, t) oprangeset(ABCS, t) @@ -3994,17 +4028,27 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= uint32(p.From.Reg&31) << 5 o1 |= uint32(p.To.Reg & 31) - case 47: /* SWPx/LDADDx/LDANDx/LDEORx/LDORx Rs, (Rb), Rt */ + case 47: // SWPx/LDADDx/LDANDx/LDEORx/LDORx/CASx Rs, (Rb), Rt rs := p.From.Reg rt := p.RegTo2 rb := p.To.Reg - fields := atomicInstructions[p.As] - // rt can't be sp. rt can't be r31 when field A is 0, A bit is the 23rd bit. - if rt == REG_RSP || (rt == REGZERO && (fields&(1<<23) == 0)) { + // rt can't be sp. + if rt == REG_RSP { c.ctxt.Diag("illegal destination register: %v\n", p) } - o1 |= fields | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) + if enc, ok := atomicLDADD[p.As]; ok { + // for LDADDx-like instructions, rt can't be r31 when field.enc A is 0, A bit is the 23rd bit. + if (rt == REGZERO) && (enc&(1<<23) == 0) { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc + } else if enc, ok := atomicSWP[p.As]; ok { + o1 |= enc + } else { + c.ctxt.Diag("invalid atomic instructions: %v\n", p) + } + o1 |= uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) case 48: /* ADD $C_ADDCON2, Rm, Rd */ // NOTE: this case does not use REGTMP. If it ever does, @@ -5351,6 +5395,38 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { rt := int((p.To.Reg) & 31) r := int((p.Reg) & 31) o1 |= ((Q & 1) << 30) | ((size & 3) << 22) | (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31) + + case 106: // CASPx (Rs, Rs+1), (Rb), (Rt, Rt+1) + rs := p.From.Reg + rt := p.GetTo2().Reg + rb := p.To.Reg + rs1 := int16(p.From.Offset) + rt1 := int16(p.GetTo2().Offset) + + enc, ok := atomicCASP[p.As] + if !ok { + c.ctxt.Diag("invalid CASP-like atomic instructions: %v\n", p) + } + // for CASPx-like instructions, Rs<0> != 1 && Rt<0> != 1 + switch { + case rs&1 != 0: + c.ctxt.Diag("source register pair must start from even register: %v\n", p) + break + case rt&1 != 0: + c.ctxt.Diag("destination register pair must start from even register: %v\n", p) + break + case rs != rs1-1: + c.ctxt.Diag("source register pair must be contiguous: %v\n", p) + break + case rt != rt1-1: + c.ctxt.Diag("destination register pair must be contiguous: %v\n", p) + break + } + // rt can't be sp. + if rt == REG_RSP { + c.ctxt.Diag("illegal destination register: %v\n", p) + } + o1 |= enc | uint32(rs&31)<<16 | uint32(rb&31)<<5 | uint32(rt&31) } out[0] = o1 out[1] = o2 diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 2037beca72..b578b6a09a 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -283,28 +283,41 @@ func (a *Addr) SetTarget(t *Prog) { // The other fields not yet mentioned are for use by the back ends and should // be left zeroed by creators of Prog lists. type Prog struct { - Ctxt *Link // linker context - Link *Prog // next Prog in linked list - From Addr // first source operand - RestArgs []Addr // can pack any operands that not fit into {Prog.From, Prog.To} - To Addr // destination operand (second is RegTo2 below) - Pool *Prog // constant pool entry, for arm,arm64 back ends - Forwd *Prog // for x86 back end - Rel *Prog // for x86, arm back ends - Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase - Pos src.XPos // source position of this instruction - Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) - As As // assembler opcode - Reg int16 // 2nd source operand - RegTo2 int16 // 2nd destination operand - Mark uint16 // bitmask of arch-specific items - Optab uint16 // arch-specific opcode index - Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) - Back uint8 // for x86 back end: backwards branch state - Ft uint8 // for x86 back end: type index of Prog.From - Tt uint8 // for x86 back end: type index of Prog.To - Isize uint8 // for x86 back end: size of the instruction in bytes -} + Ctxt *Link // linker context + Link *Prog // next Prog in linked list + From Addr // first source operand + RestArgs []AddrPos // can pack any operands that not fit into {Prog.From, Prog.To} + To Addr // destination operand (second is RegTo2 below) + Pool *Prog // constant pool entry, for arm,arm64 back ends + Forwd *Prog // for x86 back end + Rel *Prog // for x86, arm back ends + Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase + Pos src.XPos // source position of this instruction + Spadj int32 // effect of instruction on stack pointer (increment or decrement amount) + As As // assembler opcode + Reg int16 // 2nd source operand + RegTo2 int16 // 2nd destination operand + Mark uint16 // bitmask of arch-specific items + Optab uint16 // arch-specific opcode index + Scond uint8 // bits that describe instruction suffixes (e.g. ARM conditions) + Back uint8 // for x86 back end: backwards branch state + Ft uint8 // for x86 back end: type index of Prog.From + Tt uint8 // for x86 back end: type index of Prog.To + Isize uint8 // for x86 back end: size of the instruction in bytes +} + +// Pos indicates whether the oprand is the source or the destination. +type AddrPos struct { + Addr + Pos OperandPos +} + +type OperandPos int8 + +const ( + Source OperandPos = iota + Destination +) // From3Type returns p.GetFrom3().Type, or TYPE_NONE when // p.GetFrom3() returns nil. @@ -330,15 +343,36 @@ func (p *Prog) GetFrom3() *Addr { if p.RestArgs == nil { return nil } - return &p.RestArgs[0] + return &p.RestArgs[0].Addr } -// SetFrom3 assigns []Addr{a} to p.RestArgs. +// SetFrom3 assigns []Args{{a, 0}} to p.RestArgs. // In pair with Prog.GetFrom3 it can help in emulation of Prog.From3. // // Deprecated: for the same reasons as Prog.GetFrom3. func (p *Prog) SetFrom3(a Addr) { - p.RestArgs = []Addr{a} + p.RestArgs = []AddrPos{{a, Source}} +} + +// SetTo2 assings []Args{{a, 1}} to p.RestArgs when the second destination +// operand does not fit into prog.RegTo2. +func (p *Prog) SetTo2(a Addr) { + p.RestArgs = []AddrPos{{a, Destination}} +} + +// GetTo2 returns the second destination operand. +func (p *Prog) GetTo2() *Addr { + if p.RestArgs == nil { + return nil + } + return &p.RestArgs[0].Addr +} + +// SetRestArgs assigns more than one source operands to p.RestArgs. +func (p *Prog) SetRestArgs(args []Addr) { + for i := range args { + p.RestArgs = append(p.RestArgs, AddrPos{args[i], Source}) + } } // An As denotes an assembler opcode. diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 5301e44002..0cffa54fa6 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -48,7 +48,7 @@ func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *ob p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} @@ -234,7 +234,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: to.Reg} @@ -275,7 +275,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_STYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} @@ -340,7 +340,7 @@ func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { p.As = AAUIPC p.Mark |= NEED_PCREL_ITYPE_RELOC - p.RestArgs = []obj.Addr{obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}} + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} p.Reg = 0 p.To = to diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index da14dd3c41..f0f9d5cefc 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -718,7 +718,7 @@ func (c *ctxtz) oplook(p *obj.Prog) *Optab { p.From.Class = int8(c.aclass(&p.From) + 1) p.To.Class = int8(c.aclass(&p.To) + 1) for i := range p.RestArgs { - p.RestArgs[i].Class = int8(c.aclass(&p.RestArgs[i]) + 1) + p.RestArgs[i].Addr.Class = int8(c.aclass(&p.RestArgs[i].Addr) + 1) } // Mirrors the argument list in Optab. diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go index 21e28807a6..b9bacb7a22 100644 --- a/src/cmd/internal/obj/util.go +++ b/src/cmd/internal/obj/util.go @@ -175,9 +175,11 @@ func (p *Prog) WriteInstructionString(w io.Writer) { sep = ", " } for i := range p.RestArgs { - io.WriteString(w, sep) - WriteDconv(w, p, &p.RestArgs[i]) - sep = ", " + if p.RestArgs[i].Pos == Source { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } } if p.As == ATEXT { @@ -198,6 +200,13 @@ func (p *Prog) WriteInstructionString(w io.Writer) { if p.RegTo2 != REG_NONE { fmt.Fprintf(w, "%s%v", sep, Rconv(int(p.RegTo2))) } + for i := range p.RestArgs { + if p.RestArgs[i].Pos == Destination { + io.WriteString(w, sep) + WriteDconv(w, p, &p.RestArgs[i].Addr) + sep = ", " + } + } } func (ctxt *Link) NewProg() *Prog { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index c412f4945d..94aed44871 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -4270,7 +4270,7 @@ func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { args = append(args, ft) } for i := range p.RestArgs { - args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax) + args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) } if tt != Ynone*Ymax { args = append(args, tt) @@ -5438,10 +5438,10 @@ func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { // unpackOps4 extracts 4 operands from p. func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { - return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To } // unpackOps5 extracts 5 operands from p. func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { - return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To + return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To } -- cgit v1.3 From 50af50d136551e2009b2b52e829570536271cdaa Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Wed, 14 Oct 2020 08:36:11 -0400 Subject: reflect,runtime: use internal ABI for selected ASM routines Change the definitions of selected runtime assembly routines from ABI0 (the default) to ABIInternal. The ABIInternal def is intended to indicate that these functions don't follow the existing Go runtime ABI. In addition, convert the assembly reference to runtime.main (from runtime.mainPC) to ABIInternal. Finally, for functions such as "runtime.duffzero" that are called directly from generated code, make sure that the compiler looks up the correct ABI version. This is intended to support the register abi work, however these changes should not have any issues even when GOEXPERIMENT=regabi is not in effect. Updates #27539, #40724. Change-Id: I9846f8dcaccc95718cf2e61a18b7e924a0677e4c Reviewed-on: https://go-review.googlesource.com/c/go/+/262319 Run-TryBot: Than McIntosh TryBot-Result: Go Bot Trust: Than McIntosh Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/gc/ssa.go | 86 ++++++++++++++++---------------- src/cmd/internal/obj/wasm/wasmobj.go | 6 +-- src/reflect/asm_amd64.s | 12 +++-- src/runtime/asm_amd64.s | 96 ++++++++++++++++++++++-------------- src/runtime/asm_wasm.s | 4 +- src/runtime/duff_amd64.s | 4 +- src/runtime/mkpreempt.go | 3 +- src/runtime/preempt_386.s | 3 +- src/runtime/preempt_amd64.s | 3 +- src/runtime/preempt_arm.s | 3 +- src/runtime/preempt_arm64.s | 3 +- src/runtime/preempt_mips64x.s | 3 +- src/runtime/preempt_mipsx.s | 3 +- src/runtime/preempt_ppc64x.s | 3 +- src/runtime/preempt_riscv64.s | 3 +- src/runtime/preempt_s390x.s | 3 +- src/runtime/preempt_wasm.s | 3 +- src/runtime/race_amd64.s | 12 +++-- src/runtime/sys_linux_amd64.s | 13 +++-- 19 files changed, 154 insertions(+), 112 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 45d628cc5e..7388e4e3e8 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -72,9 +72,9 @@ func initssaconfig() { deferproc = sysfunc("deferproc") deferprocStack = sysfunc("deferprocStack") Deferreturn = sysfunc("deferreturn") - Duffcopy = sysvar("duffcopy") // asm func with special ABI - Duffzero = sysvar("duffzero") // asm func with special ABI - gcWriteBarrier = sysvar("gcWriteBarrier") // asm func with special ABI + Duffcopy = sysfunc("duffcopy") + Duffzero = sysfunc("duffzero") + gcWriteBarrier = sysfunc("gcWriteBarrier") goschedguarded = sysfunc("goschedguarded") growslice = sysfunc("growslice") msanread = sysfunc("msanread") @@ -105,51 +105,51 @@ func initssaconfig() { // asm funcs with special ABI if thearch.LinkArch.Name == "amd64" { GCWriteBarrierReg = map[int16]*obj.LSym{ - x86.REG_AX: sysvar("gcWriteBarrier"), - x86.REG_CX: sysvar("gcWriteBarrierCX"), - x86.REG_DX: sysvar("gcWriteBarrierDX"), - x86.REG_BX: sysvar("gcWriteBarrierBX"), - x86.REG_BP: sysvar("gcWriteBarrierBP"), - x86.REG_SI: sysvar("gcWriteBarrierSI"), - x86.REG_R8: sysvar("gcWriteBarrierR8"), - x86.REG_R9: sysvar("gcWriteBarrierR9"), + x86.REG_AX: sysfunc("gcWriteBarrier"), + x86.REG_CX: sysfunc("gcWriteBarrierCX"), + x86.REG_DX: sysfunc("gcWriteBarrierDX"), + x86.REG_BX: sysfunc("gcWriteBarrierBX"), + x86.REG_BP: sysfunc("gcWriteBarrierBP"), + x86.REG_SI: sysfunc("gcWriteBarrierSI"), + x86.REG_R8: sysfunc("gcWriteBarrierR8"), + x86.REG_R9: sysfunc("gcWriteBarrierR9"), } } if thearch.LinkArch.Family == sys.Wasm { - BoundsCheckFunc[ssa.BoundsIndex] = sysvar("goPanicIndex") - BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("goPanicIndexU") - BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("goPanicSliceAlen") - BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("goPanicSliceAlenU") - BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("goPanicSliceAcap") - BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("goPanicSliceAcapU") - BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("goPanicSliceB") - BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("goPanicSliceBU") - BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("goPanicSlice3Alen") - BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("goPanicSlice3AlenU") - BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("goPanicSlice3Acap") - BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("goPanicSlice3AcapU") - BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("goPanicSlice3B") - BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("goPanicSlice3BU") - BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("goPanicSlice3C") - BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("goPanicSlice3CU") + BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("goPanicIndex") + BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("goPanicIndexU") + BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("goPanicSliceAlen") + BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("goPanicSliceAlenU") + BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("goPanicSliceAcap") + BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("goPanicSliceAcapU") + BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("goPanicSliceB") + BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("goPanicSliceBU") + BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("goPanicSlice3Alen") + BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("goPanicSlice3AlenU") + BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("goPanicSlice3Acap") + BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("goPanicSlice3AcapU") + BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("goPanicSlice3B") + BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("goPanicSlice3BU") + BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("goPanicSlice3C") + BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("goPanicSlice3CU") } else { - BoundsCheckFunc[ssa.BoundsIndex] = sysvar("panicIndex") - BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("panicIndexU") - BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("panicSliceAlen") - BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("panicSliceAlenU") - BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("panicSliceAcap") - BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("panicSliceAcapU") - BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("panicSliceB") - BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("panicSliceBU") - BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("panicSlice3Alen") - BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("panicSlice3AlenU") - BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("panicSlice3Acap") - BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("panicSlice3AcapU") - BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("panicSlice3B") - BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("panicSlice3BU") - BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("panicSlice3C") - BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("panicSlice3CU") + BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("panicIndex") + BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("panicIndexU") + BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("panicSliceAlen") + BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("panicSliceAlenU") + BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("panicSliceAcap") + BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("panicSliceAcapU") + BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("panicSliceB") + BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("panicSliceBU") + BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("panicSlice3Alen") + BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("panicSlice3AlenU") + BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("panicSlice3Acap") + BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("panicSlice3AcapU") + BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("panicSlice3B") + BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("panicSlice3BU") + BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("panicSlice3C") + BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("panicSlice3CU") } if thearch.LinkArch.PtrSize == 4 { ExtendCheckFunc[ssa.BoundsIndex] = sysvar("panicExtendIndex") diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index f7f66a1255..2e9890d86c 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -129,7 +129,6 @@ var ( morestackNoCtxt *obj.LSym gcWriteBarrier *obj.LSym sigpanic *obj.LSym - sigpanic0 *obj.LSym deferreturn *obj.LSym jmpdefer *obj.LSym ) @@ -142,9 +141,8 @@ const ( func instinit(ctxt *obj.Link) { morestack = ctxt.Lookup("runtime.morestack") morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt") - gcWriteBarrier = ctxt.Lookup("runtime.gcWriteBarrier") + gcWriteBarrier = ctxt.LookupABI("runtime.gcWriteBarrier", obj.ABIInternal) sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal) - sigpanic0 = ctxt.LookupABI("runtime.sigpanic", 0) // sigpanic called from assembly, which has ABI0 deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal) // jmpdefer is defined in assembly as ABI0, but what we're // looking for is the *call* to jmpdefer from the Go function @@ -493,7 +491,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } // return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack - if call.As == ACALLNORESUME && call.To.Sym != sigpanic && call.To.Sym != sigpanic0 { // sigpanic unwinds the stack, but it never resumes + if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes // trying to unwind WebAssembly stack but call has no resume point, terminate with error p = appendp(p, AIf) p = appendp(p, obj.AUNDEF) diff --git a/src/reflect/asm_amd64.s b/src/reflect/asm_amd64.s index fb28ab87f1..5c8e56558c 100644 --- a/src/reflect/asm_amd64.s +++ b/src/reflect/asm_amd64.s @@ -9,7 +9,9 @@ // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 +// makeFuncStub must be ABIInternal because it is placed directly +// in function values. +TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 NO_LOCAL_POINTERS MOVQ DX, 0(SP) LEAQ argframe+0(FP), CX @@ -17,14 +19,16 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 MOVB $0, 24(SP) LEAQ 24(SP), AX MOVQ AX, 16(SP) - CALL ·callReflect(SB) + CALL ·callReflect(SB) RET // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 +// methodValueCall must be ABIInternal because it is placed directly +// in function values. +TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 NO_LOCAL_POINTERS MOVQ DX, 0(SP) LEAQ argframe+0(FP), CX @@ -32,5 +36,5 @@ TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 MOVB $0, 24(SP) LEAQ 24(SP), AX MOVQ AX, 16(SP) - CALL ·callMethod(SB) + CALL ·callMethod(SB) RET diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 19a3bb2d7d..196252e1dd 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -84,7 +84,9 @@ GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 -TEXT runtime·rt0_go(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stack-based Go ABI (and +// in addition there are no calls to this entry point from Go code). +TEXT runtime·rt0_go(SB),NOSPLIT,$0 // copy arguments forward on an even stack MOVQ DI, AX // argc MOVQ SI, BX // argv @@ -229,10 +231,13 @@ ok: // Prevent dead-code elimination of debugCallV1, which is // intended to be called by debuggers. - MOVQ $runtime·debugCallV1(SB), AX + MOVQ $runtime·debugCallV1(SB), AX RET -DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) +// mainPC is a function value for runtime.main, to be passed to newproc. +// The reference to runtime.main is made via ABIInternal, since the +// actual function (not the ABI0 wrapper) is needed by newproc. +DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) GLOBL runtime·mainPC(SB),RODATA,$8 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 @@ -468,7 +473,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 JMP AX // Note: can't just "JMP NAME(SB)" - bad inlining results. -TEXT ·reflectcall(SB), NOSPLIT, $0-32 +TEXT ·reflectcall(SB), NOSPLIT, $0-32 MOVLQZX argsize+24(FP), CX DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) @@ -1354,8 +1359,11 @@ TEXT _cgo_topofstack(SB),NOSPLIT,$0 RET // The top-most function running on a goroutine -// returns to goexit+PCQuantum. -TEXT runtime·goexit(SB),NOSPLIT,$0-0 +// returns to goexit+PCQuantum. Defined as ABIInternal +// so as to make it identifiable to traceback (this +// function it used as a sentinel; traceback wants to +// see the func PC, not a wrapper PC). +TEXT runtime·goexit(SB),NOSPLIT,$0-0 BYTE $0x90 // NOP CALL runtime·goexit1(SB) // does not return // traceback from goexit1 must hit code range of goexit @@ -1377,7 +1385,8 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 // - AX is the value being written at DI // It clobbers FLAGS. It does not clobber any general-purpose registers, // but may clobber others (e.g., SSE registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 // Save the registers clobbered by the fast path. This is slightly // faster than having the caller spill these. MOVQ R14, 104(SP) @@ -1461,51 +1470,58 @@ flush: JMP ret // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX. -TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 XCHGQ CX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ CX, AX RET // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX. -TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 XCHGQ DX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ DX, AX RET // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX. -TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 XCHGQ BX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ BX, AX RET // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP. -TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 XCHGQ BP, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ BP, AX RET // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI. -TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 XCHGQ SI, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ SI, AX RET // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8. -TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 XCHGQ R8, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ R8, AX RET // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9. -TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 XCHGQ R9, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ R9, AX RET @@ -1544,7 +1560,10 @@ GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below // obey escape analysis requirements. Specifically, it must not pass // a stack pointer to an escaping argument. debugCallV1 cannot check // this invariant. -TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 +// +// This is ABIInternal because Go code injects its PC directly into new +// goroutine stacks. +TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 // Save all registers that may contain pointers so they can be // conservatively scanned. // @@ -1705,67 +1724,68 @@ TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 // in the caller's stack frame. These stubs write the args into that stack space and // then tail call to the corresponding runtime handler. // The tail call makes these stubs disappear in backtraces. -TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 +// Defined as ABIInternal since they do not use the stack-based Go ABI. +TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicIndex(SB) -TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 +TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicIndexU(SB) -TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAlen(SB) -TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAlenU(SB) -TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAcap(SB) -TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAcapU(SB) -TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSliceB(SB) -TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSliceBU(SB) -TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3Alen(SB) -TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3AlenU(SB) -TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3Acap(SB) -TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3AcapU(SB) -TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSlice3B(SB) -TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSlice3BU(SB) -TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3C(SB) -TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3CU(SB) diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index 67e81adf0b..fcb780f1dc 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -196,7 +196,7 @@ TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 Get CTXT I64Eqz If - CALLNORESUME runtime·sigpanic(SB) + CALLNORESUME runtime·sigpanic(SB) End // caller sp after CALL @@ -300,7 +300,7 @@ TEXT ·reflectcall(SB), NOSPLIT, $0-32 I64Load fn+8(FP) I64Eqz If - CALLNORESUME runtime·sigpanic(SB) + CALLNORESUME runtime·sigpanic(SB) End MOVW argsize+24(FP), R0 diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s index 44dc75d297..2ff5bf6dbc 100644 --- a/src/runtime/duff_amd64.s +++ b/src/runtime/duff_amd64.s @@ -4,7 +4,7 @@ #include "textflag.h" -TEXT runtime·duffzero(SB), NOSPLIT, $0-0 +TEXT runtime·duffzero(SB), NOSPLIT, $0-0 MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) @@ -103,7 +103,7 @@ TEXT runtime·duffzero(SB), NOSPLIT, $0-0 RET -TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 +TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 MOVUPS (SI), X0 ADDQ $16, SI MOVUPS X0, (DI) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 286f81489a..1d614dd003 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -126,7 +126,8 @@ func header(arch string) { } fmt.Fprintf(out, "#include \"go_asm.h\"\n") fmt.Fprintf(out, "#include \"textflag.h\"\n\n") - fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") + fmt.Fprintf(out, "// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally.\n") + fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") } func p(f string, args ...interface{}) { diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s index c3a5fa1f36..a803b24dc6 100644 --- a/src/runtime/preempt_386.s +++ b/src/runtime/preempt_386.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHFL ADJSP $156 NOP SP diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index 4765e9f448..92c664d79a 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHQ BP MOVQ SP, BP // Save flags before clobbering them diff --git a/src/runtime/preempt_arm.s b/src/runtime/preempt_arm.s index 8f243c0dcd..bbc9fbb1ea 100644 --- a/src/runtime/preempt_arm.s +++ b/src/runtime/preempt_arm.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVW.W R14, -188(R13) MOVW R0, 4(R13) MOVW R1, 8(R13) diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index 36ee13282c..2b70a28479 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R30, -496(RSP) SUB $496, RSP #ifdef GOOS_linux diff --git a/src/runtime/preempt_mips64x.s b/src/runtime/preempt_mips64x.s index 1e123e8077..0d0c157c36 100644 --- a/src/runtime/preempt_mips64x.s +++ b/src/runtime/preempt_mips64x.s @@ -5,7 +5,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVV R31, -488(R29) SUBV $488, R29 MOVV R1, 8(R29) diff --git a/src/runtime/preempt_mipsx.s b/src/runtime/preempt_mipsx.s index afac33e0a0..86d3a918d3 100644 --- a/src/runtime/preempt_mipsx.s +++ b/src/runtime/preempt_mipsx.s @@ -5,7 +5,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVW R31, -244(R29) SUB $244, R29 MOVW R1, 4(R29) diff --git a/src/runtime/preempt_ppc64x.s b/src/runtime/preempt_ppc64x.s index b2d7e30ec7..90634386db 100644 --- a/src/runtime/preempt_ppc64x.s +++ b/src/runtime/preempt_ppc64x.s @@ -5,7 +5,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R31, -488(R1) MOVD LR, R31 MOVDU R31, -520(R1) diff --git a/src/runtime/preempt_riscv64.s b/src/runtime/preempt_riscv64.s index eb68dcba2b..d4f9cc277f 100644 --- a/src/runtime/preempt_riscv64.s +++ b/src/runtime/preempt_riscv64.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOV X1, -472(X2) ADD $-472, X2 MOV X3, 8(X2) diff --git a/src/runtime/preempt_s390x.s b/src/runtime/preempt_s390x.s index ca9e47cde1..c6f11571df 100644 --- a/src/runtime/preempt_s390x.s +++ b/src/runtime/preempt_s390x.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 IPM R10 MOVD R14, -248(R15) ADD $-248, R15 diff --git a/src/runtime/preempt_wasm.s b/src/runtime/preempt_wasm.s index 0cf57d3d22..da90e8aa6d 100644 --- a/src/runtime/preempt_wasm.s +++ b/src/runtime/preempt_wasm.s @@ -3,6 +3,7 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 // No async preemption on wasm UNDEF diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s index 758d543203..4a86b3371a 100644 --- a/src/runtime/race_amd64.s +++ b/src/runtime/race_amd64.s @@ -41,7 +41,9 @@ // func runtime·raceread(addr uintptr) // Called from instrumented code. -TEXT runtime·raceread(SB), NOSPLIT, $0-8 +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would render runtime.getcallerpc ineffective. +TEXT runtime·raceread(SB), NOSPLIT, $0-8 MOVQ addr+0(FP), RARG1 MOVQ (SP), RARG2 // void __tsan_read(ThreadState *thr, void *addr, void *pc); @@ -65,7 +67,9 @@ TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 // func runtime·racewrite(addr uintptr) // Called from instrumented code. -TEXT runtime·racewrite(SB), NOSPLIT, $0-8 +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would render runtime.getcallerpc ineffective. +TEXT runtime·racewrite(SB), NOSPLIT, $0-8 MOVQ addr+0(FP), RARG1 MOVQ (SP), RARG2 // void __tsan_write(ThreadState *thr, void *addr, void *pc); @@ -114,7 +118,9 @@ TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 // func runtime·racewriterange(addr, size uintptr) // Called from instrumented code. -TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would render runtime.getcallerpc ineffective. +TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 MOVQ addr+0(FP), RARG1 MOVQ size+8(FP), RARG2 MOVQ (SP), RARG3 diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 681cd20274..37cb8dad03 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -380,7 +380,8 @@ TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 POPQ BP RET -TEXT runtime·sigtramp(SB),NOSPLIT,$72 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·sigtramp(SB),NOSPLIT,$72 // Save callee-saved C registers, since the caller may be a C signal handler. MOVQ BX, bx-8(SP) MOVQ BP, bp-16(SP) // save in case GOEXPERIMENT=noframepointer is set @@ -407,7 +408,8 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$72 // Used instead of sigtramp in programs that use cgo. // Arguments from kernel are in DI, SI, DX. -TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 // If no traceback function, do usual sigtramp. MOVQ runtime·cgoTraceback(SB), AX TESTQ AX, AX @@ -450,12 +452,12 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 // The first three arguments, and the fifth, are already in registers. // Set the two remaining arguments now. MOVQ runtime·cgoTraceback(SB), CX - MOVQ $runtime·sigtramp(SB), R9 + MOVQ $runtime·sigtramp(SB), R9 MOVQ _cgo_callers(SB), AX JMP AX sigtramp: - JMP runtime·sigtramp(SB) + JMP runtime·sigtramp(SB) sigtrampnog: // Signal arrived on a non-Go thread. If this is SIGPROF, get a @@ -486,7 +488,8 @@ sigtrampnog: // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/x86_64/sigaction.c // The code that cares about the precise instructions used is: // https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup -TEXT runtime·sigreturn(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·sigreturn(SB),NOSPLIT,$0 MOVQ $SYS_rt_sigreturn, AX SYSCALL INT $3 // not reached -- cgit v1.3 From ddc7e1d16f58c73a2587bba130a4a49ffac8b0d1 Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Thu, 29 Oct 2020 15:37:35 +0000 Subject: Revert "reflect,runtime: use internal ABI for selected ASM routines" This reverts commit 50af50d136551e2009b2b52e829570536271cdaa. Reason for revert: Causes failures in the runtime package test on Darwin, apparently. Change-Id: I006bc1b3443fa7207e92fb4a93e3fb438d4d3de3 Reviewed-on: https://go-review.googlesource.com/c/go/+/266257 Trust: Than McIntosh Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/gc/ssa.go | 86 ++++++++++++++++---------------- src/cmd/internal/obj/wasm/wasmobj.go | 6 ++- src/reflect/asm_amd64.s | 12 ++--- src/runtime/asm_amd64.s | 96 ++++++++++++++---------------------- src/runtime/asm_wasm.s | 4 +- src/runtime/duff_amd64.s | 4 +- src/runtime/mkpreempt.go | 3 +- src/runtime/preempt_386.s | 3 +- src/runtime/preempt_amd64.s | 3 +- src/runtime/preempt_arm.s | 3 +- src/runtime/preempt_arm64.s | 3 +- src/runtime/preempt_mips64x.s | 3 +- src/runtime/preempt_mipsx.s | 3 +- src/runtime/preempt_ppc64x.s | 3 +- src/runtime/preempt_riscv64.s | 3 +- src/runtime/preempt_s390x.s | 3 +- src/runtime/preempt_wasm.s | 3 +- src/runtime/race_amd64.s | 12 ++--- src/runtime/sys_linux_amd64.s | 13 ++--- 19 files changed, 112 insertions(+), 154 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 7388e4e3e8..45d628cc5e 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -72,9 +72,9 @@ func initssaconfig() { deferproc = sysfunc("deferproc") deferprocStack = sysfunc("deferprocStack") Deferreturn = sysfunc("deferreturn") - Duffcopy = sysfunc("duffcopy") - Duffzero = sysfunc("duffzero") - gcWriteBarrier = sysfunc("gcWriteBarrier") + Duffcopy = sysvar("duffcopy") // asm func with special ABI + Duffzero = sysvar("duffzero") // asm func with special ABI + gcWriteBarrier = sysvar("gcWriteBarrier") // asm func with special ABI goschedguarded = sysfunc("goschedguarded") growslice = sysfunc("growslice") msanread = sysfunc("msanread") @@ -105,51 +105,51 @@ func initssaconfig() { // asm funcs with special ABI if thearch.LinkArch.Name == "amd64" { GCWriteBarrierReg = map[int16]*obj.LSym{ - x86.REG_AX: sysfunc("gcWriteBarrier"), - x86.REG_CX: sysfunc("gcWriteBarrierCX"), - x86.REG_DX: sysfunc("gcWriteBarrierDX"), - x86.REG_BX: sysfunc("gcWriteBarrierBX"), - x86.REG_BP: sysfunc("gcWriteBarrierBP"), - x86.REG_SI: sysfunc("gcWriteBarrierSI"), - x86.REG_R8: sysfunc("gcWriteBarrierR8"), - x86.REG_R9: sysfunc("gcWriteBarrierR9"), + x86.REG_AX: sysvar("gcWriteBarrier"), + x86.REG_CX: sysvar("gcWriteBarrierCX"), + x86.REG_DX: sysvar("gcWriteBarrierDX"), + x86.REG_BX: sysvar("gcWriteBarrierBX"), + x86.REG_BP: sysvar("gcWriteBarrierBP"), + x86.REG_SI: sysvar("gcWriteBarrierSI"), + x86.REG_R8: sysvar("gcWriteBarrierR8"), + x86.REG_R9: sysvar("gcWriteBarrierR9"), } } if thearch.LinkArch.Family == sys.Wasm { - BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("goPanicIndex") - BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("goPanicIndexU") - BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("goPanicSliceAlen") - BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("goPanicSliceAlenU") - BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("goPanicSliceAcap") - BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("goPanicSliceAcapU") - BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("goPanicSliceB") - BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("goPanicSliceBU") - BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("goPanicSlice3Alen") - BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("goPanicSlice3AlenU") - BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("goPanicSlice3Acap") - BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("goPanicSlice3AcapU") - BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("goPanicSlice3B") - BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("goPanicSlice3BU") - BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("goPanicSlice3C") - BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("goPanicSlice3CU") + BoundsCheckFunc[ssa.BoundsIndex] = sysvar("goPanicIndex") + BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("goPanicIndexU") + BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("goPanicSliceAlen") + BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("goPanicSliceAlenU") + BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("goPanicSliceAcap") + BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("goPanicSliceAcapU") + BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("goPanicSliceB") + BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("goPanicSliceBU") + BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("goPanicSlice3Alen") + BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("goPanicSlice3AlenU") + BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("goPanicSlice3Acap") + BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("goPanicSlice3AcapU") + BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("goPanicSlice3B") + BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("goPanicSlice3BU") + BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("goPanicSlice3C") + BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("goPanicSlice3CU") } else { - BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("panicIndex") - BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("panicIndexU") - BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("panicSliceAlen") - BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("panicSliceAlenU") - BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("panicSliceAcap") - BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("panicSliceAcapU") - BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("panicSliceB") - BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("panicSliceBU") - BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("panicSlice3Alen") - BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("panicSlice3AlenU") - BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("panicSlice3Acap") - BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("panicSlice3AcapU") - BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("panicSlice3B") - BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("panicSlice3BU") - BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("panicSlice3C") - BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("panicSlice3CU") + BoundsCheckFunc[ssa.BoundsIndex] = sysvar("panicIndex") + BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("panicIndexU") + BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("panicSliceAlen") + BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("panicSliceAlenU") + BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("panicSliceAcap") + BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("panicSliceAcapU") + BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("panicSliceB") + BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("panicSliceBU") + BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("panicSlice3Alen") + BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("panicSlice3AlenU") + BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("panicSlice3Acap") + BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("panicSlice3AcapU") + BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("panicSlice3B") + BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("panicSlice3BU") + BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("panicSlice3C") + BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("panicSlice3CU") } if thearch.LinkArch.PtrSize == 4 { ExtendCheckFunc[ssa.BoundsIndex] = sysvar("panicExtendIndex") diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index 2e9890d86c..f7f66a1255 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -129,6 +129,7 @@ var ( morestackNoCtxt *obj.LSym gcWriteBarrier *obj.LSym sigpanic *obj.LSym + sigpanic0 *obj.LSym deferreturn *obj.LSym jmpdefer *obj.LSym ) @@ -141,8 +142,9 @@ const ( func instinit(ctxt *obj.Link) { morestack = ctxt.Lookup("runtime.morestack") morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt") - gcWriteBarrier = ctxt.LookupABI("runtime.gcWriteBarrier", obj.ABIInternal) + gcWriteBarrier = ctxt.Lookup("runtime.gcWriteBarrier") sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal) + sigpanic0 = ctxt.LookupABI("runtime.sigpanic", 0) // sigpanic called from assembly, which has ABI0 deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal) // jmpdefer is defined in assembly as ABI0, but what we're // looking for is the *call* to jmpdefer from the Go function @@ -491,7 +493,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } // return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack - if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes + if call.As == ACALLNORESUME && call.To.Sym != sigpanic && call.To.Sym != sigpanic0 { // sigpanic unwinds the stack, but it never resumes // trying to unwind WebAssembly stack but call has no resume point, terminate with error p = appendp(p, AIf) p = appendp(p, obj.AUNDEF) diff --git a/src/reflect/asm_amd64.s b/src/reflect/asm_amd64.s index 5c8e56558c..fb28ab87f1 100644 --- a/src/reflect/asm_amd64.s +++ b/src/reflect/asm_amd64.s @@ -9,9 +9,7 @@ // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -// makeFuncStub must be ABIInternal because it is placed directly -// in function values. -TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 +TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 NO_LOCAL_POINTERS MOVQ DX, 0(SP) LEAQ argframe+0(FP), CX @@ -19,16 +17,14 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 MOVB $0, 24(SP) LEAQ 24(SP), AX MOVQ AX, 16(SP) - CALL ·callReflect(SB) + CALL ·callReflect(SB) RET // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -// methodValueCall must be ABIInternal because it is placed directly -// in function values. -TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 +TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 NO_LOCAL_POINTERS MOVQ DX, 0(SP) LEAQ argframe+0(FP), CX @@ -36,5 +32,5 @@ TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 MOVB $0, 24(SP) LEAQ 24(SP), AX MOVQ AX, 16(SP) - CALL ·callMethod(SB) + CALL ·callMethod(SB) RET diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 196252e1dd..19a3bb2d7d 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -84,9 +84,7 @@ GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 -// Defined as ABIInternal since it does not use the stack-based Go ABI (and -// in addition there are no calls to this entry point from Go code). -TEXT runtime·rt0_go(SB),NOSPLIT,$0 +TEXT runtime·rt0_go(SB),NOSPLIT,$0 // copy arguments forward on an even stack MOVQ DI, AX // argc MOVQ SI, BX // argv @@ -231,13 +229,10 @@ ok: // Prevent dead-code elimination of debugCallV1, which is // intended to be called by debuggers. - MOVQ $runtime·debugCallV1(SB), AX + MOVQ $runtime·debugCallV1(SB), AX RET -// mainPC is a function value for runtime.main, to be passed to newproc. -// The reference to runtime.main is made via ABIInternal, since the -// actual function (not the ABI0 wrapper) is needed by newproc. -DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) +DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) GLOBL runtime·mainPC(SB),RODATA,$8 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 @@ -473,7 +468,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 JMP AX // Note: can't just "JMP NAME(SB)" - bad inlining results. -TEXT ·reflectcall(SB), NOSPLIT, $0-32 +TEXT ·reflectcall(SB), NOSPLIT, $0-32 MOVLQZX argsize+24(FP), CX DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) @@ -1359,11 +1354,8 @@ TEXT _cgo_topofstack(SB),NOSPLIT,$0 RET // The top-most function running on a goroutine -// returns to goexit+PCQuantum. Defined as ABIInternal -// so as to make it identifiable to traceback (this -// function it used as a sentinel; traceback wants to -// see the func PC, not a wrapper PC). -TEXT runtime·goexit(SB),NOSPLIT,$0-0 +// returns to goexit+PCQuantum. +TEXT runtime·goexit(SB),NOSPLIT,$0-0 BYTE $0x90 // NOP CALL runtime·goexit1(SB) // does not return // traceback from goexit1 must hit code range of goexit @@ -1385,8 +1377,7 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 // - AX is the value being written at DI // It clobbers FLAGS. It does not clobber any general-purpose registers, // but may clobber others (e.g., SSE registers). -// Defined as ABIInternal since it does not use the stack-based Go ABI. -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 +TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 // Save the registers clobbered by the fast path. This is slightly // faster than having the caller spill these. MOVQ R14, 104(SP) @@ -1470,58 +1461,51 @@ flush: JMP ret // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 XCHGQ CX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ CX, AX RET // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 XCHGQ DX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ DX, AX RET // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 XCHGQ BX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ BX, AX RET // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 XCHGQ BP, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ BP, AX RET // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 XCHGQ SI, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ SI, AX RET // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 XCHGQ R8, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ R8, AX RET // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9. -// Defined as ABIInternal since it does not use the stable Go ABI. -TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 +TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 XCHGQ R9, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ R9, AX RET @@ -1560,10 +1544,7 @@ GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below // obey escape analysis requirements. Specifically, it must not pass // a stack pointer to an escaping argument. debugCallV1 cannot check // this invariant. -// -// This is ABIInternal because Go code injects its PC directly into new -// goroutine stacks. -TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 +TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 // Save all registers that may contain pointers so they can be // conservatively scanned. // @@ -1724,68 +1705,67 @@ TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 // in the caller's stack frame. These stubs write the args into that stack space and // then tail call to the corresponding runtime handler. // The tail call makes these stubs disappear in backtraces. -// Defined as ABIInternal since they do not use the stack-based Go ABI. -TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 +TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicIndex(SB) -TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 +TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicIndexU(SB) -TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAlen(SB) -TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAlenU(SB) -TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAcap(SB) -TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAcapU(SB) -TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSliceB(SB) -TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSliceBU(SB) -TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3Alen(SB) -TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3AlenU(SB) -TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3Acap(SB) -TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3AcapU(SB) -TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSlice3B(SB) -TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSlice3BU(SB) -TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3C(SB) -TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3CU(SB) diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index fcb780f1dc..67e81adf0b 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -196,7 +196,7 @@ TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 Get CTXT I64Eqz If - CALLNORESUME runtime·sigpanic(SB) + CALLNORESUME runtime·sigpanic(SB) End // caller sp after CALL @@ -300,7 +300,7 @@ TEXT ·reflectcall(SB), NOSPLIT, $0-32 I64Load fn+8(FP) I64Eqz If - CALLNORESUME runtime·sigpanic(SB) + CALLNORESUME runtime·sigpanic(SB) End MOVW argsize+24(FP), R0 diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s index 2ff5bf6dbc..44dc75d297 100644 --- a/src/runtime/duff_amd64.s +++ b/src/runtime/duff_amd64.s @@ -4,7 +4,7 @@ #include "textflag.h" -TEXT runtime·duffzero(SB), NOSPLIT, $0-0 +TEXT runtime·duffzero(SB), NOSPLIT, $0-0 MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) @@ -103,7 +103,7 @@ TEXT runtime·duffzero(SB), NOSPLIT, $0-0 RET -TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 +TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 MOVUPS (SI), X0 ADDQ $16, SI MOVUPS X0, (DI) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 1d614dd003..286f81489a 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -126,8 +126,7 @@ func header(arch string) { } fmt.Fprintf(out, "#include \"go_asm.h\"\n") fmt.Fprintf(out, "#include \"textflag.h\"\n\n") - fmt.Fprintf(out, "// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally.\n") - fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") + fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") } func p(f string, args ...interface{}) { diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s index a803b24dc6..c3a5fa1f36 100644 --- a/src/runtime/preempt_386.s +++ b/src/runtime/preempt_386.s @@ -3,8 +3,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHFL ADJSP $156 NOP SP diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index 92c664d79a..4765e9f448 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -3,8 +3,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHQ BP MOVQ SP, BP // Save flags before clobbering them diff --git a/src/runtime/preempt_arm.s b/src/runtime/preempt_arm.s index bbc9fbb1ea..8f243c0dcd 100644 --- a/src/runtime/preempt_arm.s +++ b/src/runtime/preempt_arm.s @@ -3,8 +3,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVW.W R14, -188(R13) MOVW R0, 4(R13) MOVW R1, 8(R13) diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index 2b70a28479..36ee13282c 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -3,8 +3,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R30, -496(RSP) SUB $496, RSP #ifdef GOOS_linux diff --git a/src/runtime/preempt_mips64x.s b/src/runtime/preempt_mips64x.s index 0d0c157c36..1e123e8077 100644 --- a/src/runtime/preempt_mips64x.s +++ b/src/runtime/preempt_mips64x.s @@ -5,8 +5,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVV R31, -488(R29) SUBV $488, R29 MOVV R1, 8(R29) diff --git a/src/runtime/preempt_mipsx.s b/src/runtime/preempt_mipsx.s index 86d3a918d3..afac33e0a0 100644 --- a/src/runtime/preempt_mipsx.s +++ b/src/runtime/preempt_mipsx.s @@ -5,8 +5,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVW R31, -244(R29) SUB $244, R29 MOVW R1, 4(R29) diff --git a/src/runtime/preempt_ppc64x.s b/src/runtime/preempt_ppc64x.s index 90634386db..b2d7e30ec7 100644 --- a/src/runtime/preempt_ppc64x.s +++ b/src/runtime/preempt_ppc64x.s @@ -5,8 +5,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R31, -488(R1) MOVD LR, R31 MOVDU R31, -520(R1) diff --git a/src/runtime/preempt_riscv64.s b/src/runtime/preempt_riscv64.s index d4f9cc277f..eb68dcba2b 100644 --- a/src/runtime/preempt_riscv64.s +++ b/src/runtime/preempt_riscv64.s @@ -3,8 +3,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOV X1, -472(X2) ADD $-472, X2 MOV X3, 8(X2) diff --git a/src/runtime/preempt_s390x.s b/src/runtime/preempt_s390x.s index c6f11571df..ca9e47cde1 100644 --- a/src/runtime/preempt_s390x.s +++ b/src/runtime/preempt_s390x.s @@ -3,8 +3,7 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 IPM R10 MOVD R14, -248(R15) ADD $-248, R15 diff --git a/src/runtime/preempt_wasm.s b/src/runtime/preempt_wasm.s index da90e8aa6d..0cf57d3d22 100644 --- a/src/runtime/preempt_wasm.s +++ b/src/runtime/preempt_wasm.s @@ -3,7 +3,6 @@ #include "go_asm.h" #include "textflag.h" -// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 // No async preemption on wasm UNDEF diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s index 4a86b3371a..758d543203 100644 --- a/src/runtime/race_amd64.s +++ b/src/runtime/race_amd64.s @@ -41,9 +41,7 @@ // func runtime·raceread(addr uintptr) // Called from instrumented code. -// Defined as ABIInternal so as to avoid introducing a wrapper, -// which would render runtime.getcallerpc ineffective. -TEXT runtime·raceread(SB), NOSPLIT, $0-8 +TEXT runtime·raceread(SB), NOSPLIT, $0-8 MOVQ addr+0(FP), RARG1 MOVQ (SP), RARG2 // void __tsan_read(ThreadState *thr, void *addr, void *pc); @@ -67,9 +65,7 @@ TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 // func runtime·racewrite(addr uintptr) // Called from instrumented code. -// Defined as ABIInternal so as to avoid introducing a wrapper, -// which would render runtime.getcallerpc ineffective. -TEXT runtime·racewrite(SB), NOSPLIT, $0-8 +TEXT runtime·racewrite(SB), NOSPLIT, $0-8 MOVQ addr+0(FP), RARG1 MOVQ (SP), RARG2 // void __tsan_write(ThreadState *thr, void *addr, void *pc); @@ -118,9 +114,7 @@ TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 // func runtime·racewriterange(addr, size uintptr) // Called from instrumented code. -// Defined as ABIInternal so as to avoid introducing a wrapper, -// which would render runtime.getcallerpc ineffective. -TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 +TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 MOVQ addr+0(FP), RARG1 MOVQ size+8(FP), RARG2 MOVQ (SP), RARG3 diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 37cb8dad03..681cd20274 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -380,8 +380,7 @@ TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 POPQ BP RET -// Defined as ABIInternal since it does not use the stack-based Go ABI. -TEXT runtime·sigtramp(SB),NOSPLIT,$72 +TEXT runtime·sigtramp(SB),NOSPLIT,$72 // Save callee-saved C registers, since the caller may be a C signal handler. MOVQ BX, bx-8(SP) MOVQ BP, bp-16(SP) // save in case GOEXPERIMENT=noframepointer is set @@ -408,8 +407,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$72 // Used instead of sigtramp in programs that use cgo. // Arguments from kernel are in DI, SI, DX. -// Defined as ABIInternal since it does not use the stack-based Go ABI. -TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 +TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 // If no traceback function, do usual sigtramp. MOVQ runtime·cgoTraceback(SB), AX TESTQ AX, AX @@ -452,12 +450,12 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 // The first three arguments, and the fifth, are already in registers. // Set the two remaining arguments now. MOVQ runtime·cgoTraceback(SB), CX - MOVQ $runtime·sigtramp(SB), R9 + MOVQ $runtime·sigtramp(SB), R9 MOVQ _cgo_callers(SB), AX JMP AX sigtramp: - JMP runtime·sigtramp(SB) + JMP runtime·sigtramp(SB) sigtrampnog: // Signal arrived on a non-Go thread. If this is SIGPROF, get a @@ -488,8 +486,7 @@ sigtrampnog: // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/x86_64/sigaction.c // The code that cares about the precise instructions used is: // https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup -// Defined as ABIInternal since it does not use the stack-based Go ABI. -TEXT runtime·sigreturn(SB),NOSPLIT,$0 +TEXT runtime·sigreturn(SB),NOSPLIT,$0 MOVQ $SYS_rt_sigreturn, AX SYSCALL INT $3 // not reached -- cgit v1.3 From 38d1ec8c9d09c680065404fe77168fe992e09e82 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 29 Oct 2020 13:57:12 -0400 Subject: cmd/internal/obj: use panic instead of log.Fatalf for two messages These messages can happen if there are duplicate body-less function declarations. Using panic gives the panic handler a chance to handle the panic by printing the queued error messages instead of an internal error. And if there are no queued error messages, using panic pinpoints the stack trace leading to the incorrect use of NewFuncInfo/NewFileInfo. Change-Id: I7e7ea9822ff9a1e7140f5e5b7cfd6437ff9318a7 Reviewed-on: https://go-review.googlesource.com/c/go/+/266338 Trust: Russ Cox Run-TryBot: Russ Cox Reviewed-by: Cherry Zhang TryBot-Result: Go Bot --- src/cmd/internal/obj/link.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index b578b6a09a..c652e3adbb 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -38,7 +38,6 @@ import ( "cmd/internal/src" "cmd/internal/sys" "fmt" - "log" "sync" ) @@ -471,7 +470,7 @@ type FuncInfo struct { // NewFuncInfo allocates and returns a FuncInfo for LSym. func (s *LSym) NewFuncInfo() *FuncInfo { if s.Extra != nil { - log.Fatalf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra) + panic(fmt.Sprintf("invalid use of LSym - NewFuncInfo with Extra of type %T", *s.Extra)) } f := new(FuncInfo) s.Extra = new(interface{}) @@ -498,7 +497,7 @@ type FileInfo struct { // NewFileInfo allocates and returns a FileInfo for LSym. func (s *LSym) NewFileInfo() *FileInfo { if s.Extra != nil { - log.Fatalf("invalid use of LSym - NewFileInfo with Extra of type %T", *s.Extra) + panic(fmt.Sprintf("invalid use of LSym - NewFileInfo with Extra of type %T", *s.Extra)) } f := new(FileInfo) s.Extra = new(interface{}) -- cgit v1.3 From a313eec3869c609c0da2402a4cac2d32113d599c Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Wed, 14 Oct 2020 08:36:11 -0400 Subject: reflect,runtime: use internal ABI for selected ASM routines, attempt 2 [This is a roll-forward of CL 262319, with a fix for some Darwin test failures]. Change the definitions of selected runtime assembly routines from ABI0 (the default) to ABIInternal. The ABIInternal def is intended to indicate that these functions don't follow the existing Go runtime ABI. In addition, convert the assembly reference to runtime.main (from runtime.mainPC) to ABIInternal. Finally, for functions such as "runtime.duffzero" that are called directly from generated code, make sure that the compiler looks up the correct ABI version. This is intended to support the register abi work, however these changes should not have any issues even when GOEXPERIMENT=regabi is not in effect. Updates #27539, #40724. Change-Id: Idf507f1c06176073563845239e1a54dad51a9ea9 Reviewed-on: https://go-review.googlesource.com/c/go/+/266638 Trust: Than McIntosh Run-TryBot: Than McIntosh Reviewed-by: Cherry Zhang TryBot-Result: Go Bot --- src/cmd/compile/internal/gc/ssa.go | 86 ++++++++++++++++---------------- src/cmd/internal/obj/wasm/wasmobj.go | 6 +-- src/cmd/internal/obj/x86/obj6.go | 4 +- src/reflect/asm_amd64.s | 12 +++-- src/runtime/asm_amd64.s | 96 ++++++++++++++++++++++-------------- src/runtime/asm_wasm.s | 4 +- src/runtime/duff_amd64.s | 4 +- src/runtime/mkpreempt.go | 3 +- src/runtime/preempt_386.s | 3 +- src/runtime/preempt_amd64.s | 3 +- src/runtime/preempt_arm.s | 3 +- src/runtime/preempt_arm64.s | 3 +- src/runtime/preempt_mips64x.s | 3 +- src/runtime/preempt_mipsx.s | 3 +- src/runtime/preempt_ppc64x.s | 3 +- src/runtime/preempt_riscv64.s | 3 +- src/runtime/preempt_s390x.s | 3 +- src/runtime/preempt_wasm.s | 3 +- src/runtime/race_amd64.s | 12 +++-- src/runtime/sys_linux_amd64.s | 13 +++-- 20 files changed, 156 insertions(+), 114 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 45d628cc5e..7388e4e3e8 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -72,9 +72,9 @@ func initssaconfig() { deferproc = sysfunc("deferproc") deferprocStack = sysfunc("deferprocStack") Deferreturn = sysfunc("deferreturn") - Duffcopy = sysvar("duffcopy") // asm func with special ABI - Duffzero = sysvar("duffzero") // asm func with special ABI - gcWriteBarrier = sysvar("gcWriteBarrier") // asm func with special ABI + Duffcopy = sysfunc("duffcopy") + Duffzero = sysfunc("duffzero") + gcWriteBarrier = sysfunc("gcWriteBarrier") goschedguarded = sysfunc("goschedguarded") growslice = sysfunc("growslice") msanread = sysfunc("msanread") @@ -105,51 +105,51 @@ func initssaconfig() { // asm funcs with special ABI if thearch.LinkArch.Name == "amd64" { GCWriteBarrierReg = map[int16]*obj.LSym{ - x86.REG_AX: sysvar("gcWriteBarrier"), - x86.REG_CX: sysvar("gcWriteBarrierCX"), - x86.REG_DX: sysvar("gcWriteBarrierDX"), - x86.REG_BX: sysvar("gcWriteBarrierBX"), - x86.REG_BP: sysvar("gcWriteBarrierBP"), - x86.REG_SI: sysvar("gcWriteBarrierSI"), - x86.REG_R8: sysvar("gcWriteBarrierR8"), - x86.REG_R9: sysvar("gcWriteBarrierR9"), + x86.REG_AX: sysfunc("gcWriteBarrier"), + x86.REG_CX: sysfunc("gcWriteBarrierCX"), + x86.REG_DX: sysfunc("gcWriteBarrierDX"), + x86.REG_BX: sysfunc("gcWriteBarrierBX"), + x86.REG_BP: sysfunc("gcWriteBarrierBP"), + x86.REG_SI: sysfunc("gcWriteBarrierSI"), + x86.REG_R8: sysfunc("gcWriteBarrierR8"), + x86.REG_R9: sysfunc("gcWriteBarrierR9"), } } if thearch.LinkArch.Family == sys.Wasm { - BoundsCheckFunc[ssa.BoundsIndex] = sysvar("goPanicIndex") - BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("goPanicIndexU") - BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("goPanicSliceAlen") - BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("goPanicSliceAlenU") - BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("goPanicSliceAcap") - BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("goPanicSliceAcapU") - BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("goPanicSliceB") - BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("goPanicSliceBU") - BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("goPanicSlice3Alen") - BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("goPanicSlice3AlenU") - BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("goPanicSlice3Acap") - BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("goPanicSlice3AcapU") - BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("goPanicSlice3B") - BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("goPanicSlice3BU") - BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("goPanicSlice3C") - BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("goPanicSlice3CU") + BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("goPanicIndex") + BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("goPanicIndexU") + BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("goPanicSliceAlen") + BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("goPanicSliceAlenU") + BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("goPanicSliceAcap") + BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("goPanicSliceAcapU") + BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("goPanicSliceB") + BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("goPanicSliceBU") + BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("goPanicSlice3Alen") + BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("goPanicSlice3AlenU") + BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("goPanicSlice3Acap") + BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("goPanicSlice3AcapU") + BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("goPanicSlice3B") + BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("goPanicSlice3BU") + BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("goPanicSlice3C") + BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("goPanicSlice3CU") } else { - BoundsCheckFunc[ssa.BoundsIndex] = sysvar("panicIndex") - BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("panicIndexU") - BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("panicSliceAlen") - BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("panicSliceAlenU") - BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("panicSliceAcap") - BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("panicSliceAcapU") - BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("panicSliceB") - BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("panicSliceBU") - BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("panicSlice3Alen") - BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("panicSlice3AlenU") - BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("panicSlice3Acap") - BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("panicSlice3AcapU") - BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("panicSlice3B") - BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("panicSlice3BU") - BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("panicSlice3C") - BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("panicSlice3CU") + BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("panicIndex") + BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("panicIndexU") + BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("panicSliceAlen") + BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("panicSliceAlenU") + BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("panicSliceAcap") + BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("panicSliceAcapU") + BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("panicSliceB") + BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("panicSliceBU") + BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("panicSlice3Alen") + BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("panicSlice3AlenU") + BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("panicSlice3Acap") + BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("panicSlice3AcapU") + BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("panicSlice3B") + BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("panicSlice3BU") + BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("panicSlice3C") + BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("panicSlice3CU") } if thearch.LinkArch.PtrSize == 4 { ExtendCheckFunc[ssa.BoundsIndex] = sysvar("panicExtendIndex") diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index f7f66a1255..2e9890d86c 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -129,7 +129,6 @@ var ( morestackNoCtxt *obj.LSym gcWriteBarrier *obj.LSym sigpanic *obj.LSym - sigpanic0 *obj.LSym deferreturn *obj.LSym jmpdefer *obj.LSym ) @@ -142,9 +141,8 @@ const ( func instinit(ctxt *obj.Link) { morestack = ctxt.Lookup("runtime.morestack") morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt") - gcWriteBarrier = ctxt.Lookup("runtime.gcWriteBarrier") + gcWriteBarrier = ctxt.LookupABI("runtime.gcWriteBarrier", obj.ABIInternal) sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal) - sigpanic0 = ctxt.LookupABI("runtime.sigpanic", 0) // sigpanic called from assembly, which has ABI0 deferreturn = ctxt.LookupABI("runtime.deferreturn", obj.ABIInternal) // jmpdefer is defined in assembly as ABI0, but what we're // looking for is the *call* to jmpdefer from the Go function @@ -493,7 +491,7 @@ func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { } // return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack - if call.As == ACALLNORESUME && call.To.Sym != sigpanic && call.To.Sym != sigpanic0 { // sigpanic unwinds the stack, but it never resumes + if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes // trying to unwind WebAssembly stack but call has no resume point, terminate with error p = appendp(p, AIf) p = appendp(p, obj.AUNDEF) diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index e11fa13f65..184fb4308b 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -324,9 +324,9 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { // flags and duffzero on 386 does not otherwise do so). var sym *obj.LSym if p.As == obj.ADUFFZERO { - sym = ctxt.Lookup("runtime.duffzero") + sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal) } else { - sym = ctxt.Lookup("runtime.duffcopy") + sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal) } offset := p.To.Offset p.As = mov diff --git a/src/reflect/asm_amd64.s b/src/reflect/asm_amd64.s index fb28ab87f1..5c8e56558c 100644 --- a/src/reflect/asm_amd64.s +++ b/src/reflect/asm_amd64.s @@ -9,7 +9,9 @@ // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 +// makeFuncStub must be ABIInternal because it is placed directly +// in function values. +TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 NO_LOCAL_POINTERS MOVQ DX, 0(SP) LEAQ argframe+0(FP), CX @@ -17,14 +19,16 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$32 MOVB $0, 24(SP) LEAQ 24(SP), AX MOVQ AX, 16(SP) - CALL ·callReflect(SB) + CALL ·callReflect(SB) RET // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 +// methodValueCall must be ABIInternal because it is placed directly +// in function values. +TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 NO_LOCAL_POINTERS MOVQ DX, 0(SP) LEAQ argframe+0(FP), CX @@ -32,5 +36,5 @@ TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$32 MOVB $0, 24(SP) LEAQ 24(SP), AX MOVQ AX, 16(SP) - CALL ·callMethod(SB) + CALL ·callMethod(SB) RET diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 19a3bb2d7d..196252e1dd 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -84,7 +84,9 @@ GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 DATA _rt0_amd64_lib_argv<>(SB)/8, $0 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 -TEXT runtime·rt0_go(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stack-based Go ABI (and +// in addition there are no calls to this entry point from Go code). +TEXT runtime·rt0_go(SB),NOSPLIT,$0 // copy arguments forward on an even stack MOVQ DI, AX // argc MOVQ SI, BX // argv @@ -229,10 +231,13 @@ ok: // Prevent dead-code elimination of debugCallV1, which is // intended to be called by debuggers. - MOVQ $runtime·debugCallV1(SB), AX + MOVQ $runtime·debugCallV1(SB), AX RET -DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) +// mainPC is a function value for runtime.main, to be passed to newproc. +// The reference to runtime.main is made via ABIInternal, since the +// actual function (not the ABI0 wrapper) is needed by newproc. +DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) GLOBL runtime·mainPC(SB),RODATA,$8 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 @@ -468,7 +473,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 JMP AX // Note: can't just "JMP NAME(SB)" - bad inlining results. -TEXT ·reflectcall(SB), NOSPLIT, $0-32 +TEXT ·reflectcall(SB), NOSPLIT, $0-32 MOVLQZX argsize+24(FP), CX DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) @@ -1354,8 +1359,11 @@ TEXT _cgo_topofstack(SB),NOSPLIT,$0 RET // The top-most function running on a goroutine -// returns to goexit+PCQuantum. -TEXT runtime·goexit(SB),NOSPLIT,$0-0 +// returns to goexit+PCQuantum. Defined as ABIInternal +// so as to make it identifiable to traceback (this +// function it used as a sentinel; traceback wants to +// see the func PC, not a wrapper PC). +TEXT runtime·goexit(SB),NOSPLIT,$0-0 BYTE $0x90 // NOP CALL runtime·goexit1(SB) // does not return // traceback from goexit1 must hit code range of goexit @@ -1377,7 +1385,8 @@ TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 // - AX is the value being written at DI // It clobbers FLAGS. It does not clobber any general-purpose registers, // but may clobber others (e.g., SSE registers). -TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 // Save the registers clobbered by the fast path. This is slightly // faster than having the caller spill these. MOVQ R14, 104(SP) @@ -1461,51 +1470,58 @@ flush: JMP ret // gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX. -TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0 XCHGQ CX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ CX, AX RET // gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX. -TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0 XCHGQ DX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ DX, AX RET // gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX. -TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0 XCHGQ BX, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ BX, AX RET // gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP. -TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0 XCHGQ BP, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ BP, AX RET // gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI. -TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0 XCHGQ SI, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ SI, AX RET // gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8. -TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0 XCHGQ R8, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ R8, AX RET // gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9. -TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stable Go ABI. +TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0 XCHGQ R9, AX - CALL runtime·gcWriteBarrier(SB) + CALL runtime·gcWriteBarrier(SB) XCHGQ R9, AX RET @@ -1544,7 +1560,10 @@ GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below // obey escape analysis requirements. Specifically, it must not pass // a stack pointer to an escaping argument. debugCallV1 cannot check // this invariant. -TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 +// +// This is ABIInternal because Go code injects its PC directly into new +// goroutine stacks. +TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 // Save all registers that may contain pointers so they can be // conservatively scanned. // @@ -1705,67 +1724,68 @@ TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 // in the caller's stack frame. These stubs write the args into that stack space and // then tail call to the corresponding runtime handler. // The tail call makes these stubs disappear in backtraces. -TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 +// Defined as ABIInternal since they do not use the stack-based Go ABI. +TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicIndex(SB) -TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 +TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicIndexU(SB) -TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAlen(SB) -TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAlenU(SB) -TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAcap(SB) -TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSliceAcapU(SB) -TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSliceB(SB) -TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSliceBU(SB) -TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3Alen(SB) -TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3AlenU(SB) -TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3Acap(SB) -TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 MOVQ DX, x+0(FP) MOVQ BX, y+8(FP) JMP runtime·goPanicSlice3AcapU(SB) -TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSlice3B(SB) -TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 MOVQ CX, x+0(FP) MOVQ DX, y+8(FP) JMP runtime·goPanicSlice3BU(SB) -TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3C(SB) -TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 +TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 MOVQ AX, x+0(FP) MOVQ CX, y+8(FP) JMP runtime·goPanicSlice3CU(SB) diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index 67e81adf0b..fcb780f1dc 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -196,7 +196,7 @@ TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 Get CTXT I64Eqz If - CALLNORESUME runtime·sigpanic(SB) + CALLNORESUME runtime·sigpanic(SB) End // caller sp after CALL @@ -300,7 +300,7 @@ TEXT ·reflectcall(SB), NOSPLIT, $0-32 I64Load fn+8(FP) I64Eqz If - CALLNORESUME runtime·sigpanic(SB) + CALLNORESUME runtime·sigpanic(SB) End MOVW argsize+24(FP), R0 diff --git a/src/runtime/duff_amd64.s b/src/runtime/duff_amd64.s index 44dc75d297..2ff5bf6dbc 100644 --- a/src/runtime/duff_amd64.s +++ b/src/runtime/duff_amd64.s @@ -4,7 +4,7 @@ #include "textflag.h" -TEXT runtime·duffzero(SB), NOSPLIT, $0-0 +TEXT runtime·duffzero(SB), NOSPLIT, $0-0 MOVUPS X0,(DI) MOVUPS X0,16(DI) MOVUPS X0,32(DI) @@ -103,7 +103,7 @@ TEXT runtime·duffzero(SB), NOSPLIT, $0-0 RET -TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 +TEXT runtime·duffcopy(SB), NOSPLIT, $0-0 MOVUPS (SI), X0 ADDQ $16, SI MOVUPS X0, (DI) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 286f81489a..1d614dd003 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -126,7 +126,8 @@ func header(arch string) { } fmt.Fprintf(out, "#include \"go_asm.h\"\n") fmt.Fprintf(out, "#include \"textflag.h\"\n\n") - fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") + fmt.Fprintf(out, "// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally.\n") + fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n") } func p(f string, args ...interface{}) { diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s index c3a5fa1f36..a803b24dc6 100644 --- a/src/runtime/preempt_386.s +++ b/src/runtime/preempt_386.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHFL ADJSP $156 NOP SP diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index 4765e9f448..92c664d79a 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHQ BP MOVQ SP, BP // Save flags before clobbering them diff --git a/src/runtime/preempt_arm.s b/src/runtime/preempt_arm.s index 8f243c0dcd..bbc9fbb1ea 100644 --- a/src/runtime/preempt_arm.s +++ b/src/runtime/preempt_arm.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVW.W R14, -188(R13) MOVW R0, 4(R13) MOVW R1, 8(R13) diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index 36ee13282c..2b70a28479 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R30, -496(RSP) SUB $496, RSP #ifdef GOOS_linux diff --git a/src/runtime/preempt_mips64x.s b/src/runtime/preempt_mips64x.s index 1e123e8077..0d0c157c36 100644 --- a/src/runtime/preempt_mips64x.s +++ b/src/runtime/preempt_mips64x.s @@ -5,7 +5,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVV R31, -488(R29) SUBV $488, R29 MOVV R1, 8(R29) diff --git a/src/runtime/preempt_mipsx.s b/src/runtime/preempt_mipsx.s index afac33e0a0..86d3a918d3 100644 --- a/src/runtime/preempt_mipsx.s +++ b/src/runtime/preempt_mipsx.s @@ -5,7 +5,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVW R31, -244(R29) SUB $244, R29 MOVW R1, 4(R29) diff --git a/src/runtime/preempt_ppc64x.s b/src/runtime/preempt_ppc64x.s index b2d7e30ec7..90634386db 100644 --- a/src/runtime/preempt_ppc64x.s +++ b/src/runtime/preempt_ppc64x.s @@ -5,7 +5,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R31, -488(R1) MOVD LR, R31 MOVDU R31, -520(R1) diff --git a/src/runtime/preempt_riscv64.s b/src/runtime/preempt_riscv64.s index eb68dcba2b..d4f9cc277f 100644 --- a/src/runtime/preempt_riscv64.s +++ b/src/runtime/preempt_riscv64.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOV X1, -472(X2) ADD $-472, X2 MOV X3, 8(X2) diff --git a/src/runtime/preempt_s390x.s b/src/runtime/preempt_s390x.s index ca9e47cde1..c6f11571df 100644 --- a/src/runtime/preempt_s390x.s +++ b/src/runtime/preempt_s390x.s @@ -3,7 +3,8 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 IPM R10 MOVD R14, -248(R15) ADD $-248, R15 diff --git a/src/runtime/preempt_wasm.s b/src/runtime/preempt_wasm.s index 0cf57d3d22..da90e8aa6d 100644 --- a/src/runtime/preempt_wasm.s +++ b/src/runtime/preempt_wasm.s @@ -3,6 +3,7 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +// Note: asyncPreempt doesn't use the internal ABI, but we must be able to inject calls to it from the signal handler, so Go code has to see the PC of this function literally. +TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 // No async preemption on wasm UNDEF diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s index 758d543203..4a86b3371a 100644 --- a/src/runtime/race_amd64.s +++ b/src/runtime/race_amd64.s @@ -41,7 +41,9 @@ // func runtime·raceread(addr uintptr) // Called from instrumented code. -TEXT runtime·raceread(SB), NOSPLIT, $0-8 +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would render runtime.getcallerpc ineffective. +TEXT runtime·raceread(SB), NOSPLIT, $0-8 MOVQ addr+0(FP), RARG1 MOVQ (SP), RARG2 // void __tsan_read(ThreadState *thr, void *addr, void *pc); @@ -65,7 +67,9 @@ TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 // func runtime·racewrite(addr uintptr) // Called from instrumented code. -TEXT runtime·racewrite(SB), NOSPLIT, $0-8 +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would render runtime.getcallerpc ineffective. +TEXT runtime·racewrite(SB), NOSPLIT, $0-8 MOVQ addr+0(FP), RARG1 MOVQ (SP), RARG2 // void __tsan_write(ThreadState *thr, void *addr, void *pc); @@ -114,7 +118,9 @@ TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 // func runtime·racewriterange(addr, size uintptr) // Called from instrumented code. -TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would render runtime.getcallerpc ineffective. +TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 MOVQ addr+0(FP), RARG1 MOVQ size+8(FP), RARG2 MOVQ (SP), RARG3 diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 681cd20274..37cb8dad03 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -380,7 +380,8 @@ TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 POPQ BP RET -TEXT runtime·sigtramp(SB),NOSPLIT,$72 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·sigtramp(SB),NOSPLIT,$72 // Save callee-saved C registers, since the caller may be a C signal handler. MOVQ BX, bx-8(SP) MOVQ BP, bp-16(SP) // save in case GOEXPERIMENT=noframepointer is set @@ -407,7 +408,8 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$72 // Used instead of sigtramp in programs that use cgo. // Arguments from kernel are in DI, SI, DX. -TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 // If no traceback function, do usual sigtramp. MOVQ runtime·cgoTraceback(SB), AX TESTQ AX, AX @@ -450,12 +452,12 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 // The first three arguments, and the fifth, are already in registers. // Set the two remaining arguments now. MOVQ runtime·cgoTraceback(SB), CX - MOVQ $runtime·sigtramp(SB), R9 + MOVQ $runtime·sigtramp(SB), R9 MOVQ _cgo_callers(SB), AX JMP AX sigtramp: - JMP runtime·sigtramp(SB) + JMP runtime·sigtramp(SB) sigtrampnog: // Signal arrived on a non-Go thread. If this is SIGPROF, get a @@ -486,7 +488,8 @@ sigtrampnog: // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/x86_64/sigaction.c // The code that cares about the precise instructions used is: // https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup -TEXT runtime·sigreturn(SB),NOSPLIT,$0 +// Defined as ABIInternal since it does not use the stack-based Go ABI. +TEXT runtime·sigreturn(SB),NOSPLIT,$0 MOVQ $SYS_rt_sigreturn, AX SYSCALL INT $3 // not reached -- cgit v1.3 From 84d7a85089009332756c18e876ec91f96b362ebf Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 21 Oct 2020 20:15:48 -0400 Subject: cmd/compile: delete register maps, completely Remove go115ReduceLiveness feature gating flag, along with code that only needed when go115ReduceLiveness is false. Change-Id: I7571913cc74cbd17b330a0ee0160fefc9eeee66e Reviewed-on: https://go-review.googlesource.com/c/go/+/264338 Trust: Cherry Zhang Run-TryBot: Cherry Zhang Reviewed-by: Austin Clements --- src/cmd/compile/fmtmap_test.go | 2 - src/cmd/compile/internal/gc/gsubr.go | 40 +--- src/cmd/compile/internal/gc/obj.go | 2 +- src/cmd/compile/internal/gc/plive.go | 421 ++++++----------------------------- src/cmd/compile/internal/gc/ssa.go | 2 +- src/cmd/internal/obj/link.go | 1 - src/cmd/internal/obj/plist.go | 22 +- src/cmd/internal/objabi/funcdata.go | 9 +- 8 files changed, 87 insertions(+), 412 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go index 179c60187f..0811df7f7b 100644 --- a/src/cmd/compile/fmtmap_test.go +++ b/src/cmd/compile/fmtmap_test.go @@ -105,10 +105,8 @@ var knownFormats = map[string]string{ "cmd/compile/internal/ssa.GCNode %v": "", "cmd/compile/internal/ssa.ID %d": "", "cmd/compile/internal/ssa.ID %v": "", - "cmd/compile/internal/ssa.LocPair %s": "", "cmd/compile/internal/ssa.LocalSlot %s": "", "cmd/compile/internal/ssa.LocalSlot %v": "", - "cmd/compile/internal/ssa.Location %T": "", "cmd/compile/internal/ssa.Location %s": "", "cmd/compile/internal/ssa.Op %s": "", "cmd/compile/internal/ssa.Op %v": "", diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go index ce5182f203..864ada1d3c 100644 --- a/src/cmd/compile/internal/gc/gsubr.go +++ b/src/cmd/compile/internal/gc/gsubr.go @@ -70,12 +70,8 @@ func newProgs(fn *Node, worker int) *Progs { pp.pos = fn.Pos pp.settext(fn) // PCDATA tables implicitly start with index -1. - pp.prevLive = LivenessIndex{-1, -1, false} - if go115ReduceLiveness { - pp.nextLive = pp.prevLive - } else { - pp.nextLive = LivenessInvalid - } + pp.prevLive = LivenessIndex{-1, false} + pp.nextLive = pp.prevLive return pp } @@ -120,31 +116,15 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog { Addrconst(&p.From, objabi.PCDATA_StackMapIndex) Addrconst(&p.To, int64(idx)) } - if !go115ReduceLiveness { + if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint { + // Emit unsafe-point marker. + pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint + p := pp.Prog(obj.APCDATA) + Addrconst(&p.From, objabi.PCDATA_UnsafePoint) if pp.nextLive.isUnsafePoint { - // Unsafe points are encoded as a special value in the - // register map. - pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe - } - if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex { - // Emit register map index change. - idx := pp.nextLive.regMapIndex - pp.prevLive.regMapIndex = idx - p := pp.Prog(obj.APCDATA) - Addrconst(&p.From, objabi.PCDATA_RegMapIndex) - Addrconst(&p.To, int64(idx)) - } - } else { - if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint { - // Emit unsafe-point marker. - pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint - p := pp.Prog(obj.APCDATA) - Addrconst(&p.From, objabi.PCDATA_UnsafePoint) - if pp.nextLive.isUnsafePoint { - Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe) - } else { - Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe) - } + Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe) + } else { + Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe) } } diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index 226eb45252..32aa7c5bb1 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -312,7 +312,7 @@ func addGCLocals() { if fn == nil { continue } - for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} { + for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals} { if gcsym != nil && !gcsym.OnList() { ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK) } diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index b471accb65..a48173e0d6 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -24,16 +24,6 @@ import ( "strings" ) -// go115ReduceLiveness disables register maps and only produces stack -// maps at call sites. -// -// In Go 1.15, we changed debug call injection to use conservative -// scanning instead of precise pointer maps, so these are no longer -// necessary. -// -// Keep in sync with runtime/preempt.go:go115ReduceLiveness. -const go115ReduceLiveness = true - // OpVarDef is an annotation for the liveness analysis, marking a place // where a complete initialization (definition) of a variable begins. // Since the liveness analysis can see initialization of single-word @@ -96,15 +86,15 @@ type BlockEffects struct { // // uevar: upward exposed variables (used before set in block) // varkill: killed variables (set in block) - uevar varRegVec - varkill varRegVec + uevar bvec + varkill bvec // Computed during Liveness.solve using control flow information: // // livein: variables live at block entry // liveout: variables live at block exit - livein varRegVec - liveout varRegVec + livein bvec + liveout bvec } // A collection of global state used by liveness analysis. @@ -128,16 +118,14 @@ type Liveness struct { // current Block during Liveness.epilogue. Indexed in Value // order for that block. Additionally, for the entry block // livevars[0] is the entry bitmap. Liveness.compact moves - // these to stackMaps and regMaps. - livevars []varRegVec + // these to stackMaps. + livevars []bvec // livenessMap maps from safe points (i.e., CALLs) to their // liveness map indexes. livenessMap LivenessMap stackMapSet bvecSet stackMaps []bvec - regMapSet map[liveRegMask]int - regMaps []liveRegMask cache progeffectscache } @@ -158,7 +146,7 @@ func (m *LivenessMap) reset() { delete(m.vals, k) } } - m.deferreturn = LivenessInvalid + m.deferreturn = LivenessDontCare } func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) { @@ -166,27 +154,17 @@ func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) { } func (m LivenessMap) Get(v *ssa.Value) LivenessIndex { - if !go115ReduceLiveness { - // All safe-points are in the map, so if v isn't in - // the map, it's an unsafe-point. - if idx, ok := m.vals[v.ID]; ok { - return idx - } - return LivenessInvalid - } - // If v isn't in the map, then it's a "don't care" and not an // unsafe-point. if idx, ok := m.vals[v.ID]; ok { return idx } - return LivenessIndex{StackMapDontCare, StackMapDontCare, false} + return LivenessIndex{StackMapDontCare, false} } // LivenessIndex stores the liveness map information for a Value. type LivenessIndex struct { stackMapIndex int - regMapIndex int // only for !go115ReduceLiveness // isUnsafePoint indicates that this is an unsafe-point. // @@ -197,8 +175,10 @@ type LivenessIndex struct { isUnsafePoint bool } -// LivenessInvalid indicates an unsafe point with no stack map. -var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness +// LivenessDontCare indicates that the liveness information doesn't +// matter. Currently it is used in deferreturn liveness when we don't +// actually need it. It should never be emitted to the PCDATA stream. +var LivenessDontCare = LivenessIndex{StackMapDontCare, true} // StackMapDontCare indicates that the stack map index at a Value // doesn't matter. @@ -212,46 +192,12 @@ func (idx LivenessIndex) StackMapValid() bool { return idx.stackMapIndex != StackMapDontCare } -func (idx LivenessIndex) RegMapValid() bool { - return idx.regMapIndex != StackMapDontCare -} - type progeffectscache struct { retuevar []int32 tailuevar []int32 initialized bool } -// varRegVec contains liveness bitmaps for variables and registers. -type varRegVec struct { - vars bvec - regs liveRegMask -} - -func (v *varRegVec) Eq(v2 varRegVec) bool { - return v.vars.Eq(v2.vars) && v.regs == v2.regs -} - -func (v *varRegVec) Copy(v2 varRegVec) { - v.vars.Copy(v2.vars) - v.regs = v2.regs -} - -func (v *varRegVec) Clear() { - v.vars.Clear() - v.regs = 0 -} - -func (v *varRegVec) Or(v1, v2 varRegVec) { - v.vars.Or(v1.vars, v2.vars) - v.regs = v1.regs | v2.regs -} - -func (v *varRegVec) AndNot(v1, v2 varRegVec) { - v.vars.AndNot(v1.vars, v2.vars) - v.regs = v1.regs &^ v2.regs -} - // livenessShouldTrack reports whether the liveness analysis // should track the variable n. // We don't care about variables that have no pointers, @@ -400,110 +346,6 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) { } } -// regEffects returns the registers affected by v. -func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) { - if go115ReduceLiveness { - return 0, 0 - } - if v.Op == ssa.OpPhi { - // All phi node arguments must come from the same - // register and the result must also go to that - // register, so there's no overall effect. - return 0, 0 - } - addLocs := func(mask liveRegMask, v *ssa.Value, ptrOnly bool) liveRegMask { - if int(v.ID) >= len(lv.f.RegAlloc) { - // v has no allocated registers. - return mask - } - loc := lv.f.RegAlloc[v.ID] - if loc == nil { - // v has no allocated registers. - return mask - } - if v.Op == ssa.OpGetG { - // GetG represents the G register, which is a - // pointer, but not a valid GC register. The - // current G is always reachable, so it's okay - // to ignore this register. - return mask - } - - // Collect registers and types from v's location. - var regs [2]*ssa.Register - nreg := 0 - switch loc := loc.(type) { - case ssa.LocalSlot: - return mask - case *ssa.Register: - if ptrOnly && !v.Type.HasPointers() { - return mask - } - regs[0] = loc - nreg = 1 - case ssa.LocPair: - // The value will have TTUPLE type, and the - // children are nil or *ssa.Register. - if v.Type.Etype != types.TTUPLE { - v.Fatalf("location pair %s has non-tuple type %v", loc, v.Type) - } - for i, loc1 := range &loc { - if loc1 == nil { - continue - } - if ptrOnly && !v.Type.FieldType(i).HasPointers() { - continue - } - regs[nreg] = loc1.(*ssa.Register) - nreg++ - } - default: - v.Fatalf("weird RegAlloc location: %s (%T)", loc, loc) - } - - // Add register locations to vars. - for _, reg := range regs[:nreg] { - if reg.GCNum() == -1 { - if ptrOnly { - v.Fatalf("pointer in non-pointer register %v", reg) - } else { - continue - } - } - mask |= 1 << uint(reg.GCNum()) - } - return mask - } - - // v clobbers all registers it writes to (whether or not the - // write is pointer-typed). - kill = addLocs(0, v, false) - for _, arg := range v.Args { - // v uses all registers is reads from, but we only - // care about marking those containing pointers. - uevar = addLocs(uevar, arg, true) - } - return uevar, kill -} - -type liveRegMask uint32 // only if !go115ReduceLiveness - -func (m liveRegMask) niceString(config *ssa.Config) string { - if m == 0 { - return "" - } - str := "" - for i, reg := range config.GCRegMap { - if m&(1<= f.NumBlocks() { lv.be = lc.be[:f.NumBlocks()] } - lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessInvalid} + lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessDontCare} lc.livenessMap.vals = nil } if lv.be == nil { @@ -546,10 +386,10 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt for _, b := range f.Blocks { be := lv.blockEffects(b) - be.uevar = varRegVec{vars: bulk.next()} - be.varkill = varRegVec{vars: bulk.next()} - be.livein = varRegVec{vars: bulk.next()} - be.liveout = varRegVec{vars: bulk.next()} + be.uevar = bulk.next() + be.varkill = bulk.next() + be.livein = bulk.next() + be.liveout = bulk.next() } lv.livenessMap.reset() @@ -637,20 +477,6 @@ func onebitwalktype1(t *types.Type, off int64, bv bvec) { } } -// usedRegs returns the maximum width of the live register map. -func (lv *Liveness) usedRegs() int32 { - var any liveRegMask - for _, live := range lv.regMaps { - any |= live - } - i := int32(0) - for any != 0 { - any >>= 1 - i++ - } - return i -} - // Generates live pointer value maps for arguments and local variables. The // this argument and the in arguments are always assumed live. The vars // argument is a slice of *Nodes. @@ -851,31 +677,16 @@ func (lv *Liveness) markUnsafePoints() { // particular, call Values can have a stack map in case the callee // grows the stack, but not themselves be a safe-point. func (lv *Liveness) hasStackMap(v *ssa.Value) bool { - // The runtime only has safe-points in function prologues, so - // we only need stack maps at call sites. go:nosplit functions - // are similar. - if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit { - if !v.Op.IsCall() { - return false - } - // typedmemclr and typedmemmove are write barriers and - // deeply non-preemptible. They are unsafe points and - // hence should not have liveness maps. - if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) { - return false - } - return true + if !v.Op.IsCall() { + return false } - - switch v.Op { - case ssa.OpInitMem, ssa.OpArg, ssa.OpSP, ssa.OpSB, - ssa.OpSelect0, ssa.OpSelect1, ssa.OpGetG, - ssa.OpVarDef, ssa.OpVarLive, ssa.OpKeepAlive, - ssa.OpPhi: - // These don't produce code (see genssa). + // typedmemclr and typedmemmove are write barriers and + // deeply non-preemptible. They are unsafe points and + // hence should not have liveness maps. + if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) { return false } - return !lv.unsafePoints.Get(int32(v.ID)) + return true } // Initializes the sets for solving the live variables. Visits all the @@ -891,17 +702,13 @@ func (lv *Liveness) prologue() { // effects with the each prog effects. for j := len(b.Values) - 1; j >= 0; j-- { pos, e := lv.valueEffects(b.Values[j]) - regUevar, regKill := lv.regEffects(b.Values[j]) if e&varkill != 0 { - be.varkill.vars.Set(pos) - be.uevar.vars.Unset(pos) + be.varkill.Set(pos) + be.uevar.Unset(pos) } - be.varkill.regs |= regKill - be.uevar.regs &^= regKill if e&uevar != 0 { - be.uevar.vars.Set(pos) + be.uevar.Set(pos) } - be.uevar.regs |= regUevar } } } @@ -911,8 +718,8 @@ func (lv *Liveness) solve() { // These temporary bitvectors exist to avoid successive allocations and // frees within the loop. nvars := int32(len(lv.vars)) - newlivein := varRegVec{vars: bvalloc(nvars)} - newliveout := varRegVec{vars: bvalloc(nvars)} + newlivein := bvalloc(nvars) + newliveout := bvalloc(nvars) // Walk blocks in postorder ordering. This improves convergence. po := lv.f.Postorder() @@ -930,11 +737,11 @@ func (lv *Liveness) solve() { switch b.Kind { case ssa.BlockRet: for _, pos := range lv.cache.retuevar { - newliveout.vars.Set(pos) + newliveout.Set(pos) } case ssa.BlockRetJmp: for _, pos := range lv.cache.tailuevar { - newliveout.vars.Set(pos) + newliveout.Set(pos) } case ssa.BlockExit: // panic exit - nothing to do @@ -969,7 +776,7 @@ func (lv *Liveness) solve() { // variables at each safe point locations. func (lv *Liveness) epilogue() { nvars := int32(len(lv.vars)) - liveout := varRegVec{vars: bvalloc(nvars)} + liveout := bvalloc(nvars) livedefer := bvalloc(nvars) // always-live variables // If there is a defer (that could recover), then all output @@ -1025,12 +832,11 @@ func (lv *Liveness) epilogue() { { // Reserve an entry for function entry. live := bvalloc(nvars) - lv.livevars = append(lv.livevars, varRegVec{vars: live}) + lv.livevars = append(lv.livevars, live) } for _, b := range lv.f.Blocks { be := lv.blockEffects(b) - firstBitmapIndex := len(lv.livevars) // Walk forward through the basic block instructions and // allocate liveness maps for those instructions that need them. @@ -1040,7 +846,7 @@ func (lv *Liveness) epilogue() { } live := bvalloc(nvars) - lv.livevars = append(lv.livevars, varRegVec{vars: live}) + lv.livevars = append(lv.livevars, live) } // walk backward, construct maps at each safe point @@ -1056,21 +862,18 @@ func (lv *Liveness) epilogue() { live := &lv.livevars[index] live.Or(*live, liveout) - live.vars.Or(live.vars, livedefer) // only for non-entry safe points + live.Or(*live, livedefer) // only for non-entry safe points index-- } // Update liveness information. pos, e := lv.valueEffects(v) - regUevar, regKill := lv.regEffects(v) if e&varkill != 0 { - liveout.vars.Unset(pos) + liveout.Unset(pos) } - liveout.regs &^= regKill if e&uevar != 0 { - liveout.vars.Set(pos) + liveout.Set(pos) } - liveout.regs |= regUevar } if b == lv.f.Entry { @@ -1080,7 +883,7 @@ func (lv *Liveness) epilogue() { // Check to make sure only input variables are live. for i, n := range lv.vars { - if !liveout.vars.Get(int32(i)) { + if !liveout.Get(int32(i)) { continue } if n.Class() == PPARAM { @@ -1094,32 +897,16 @@ func (lv *Liveness) epilogue() { live.Or(*live, liveout) } - // Check that no registers are live across calls. - // For closure calls, the CALLclosure is the last use - // of the context register, so it's dead after the call. - index = int32(firstBitmapIndex) - for _, v := range b.Values { - if lv.hasStackMap(v) { - live := lv.livevars[index] - if v.Op.IsCall() && live.regs != 0 { - lv.printDebug() - v.Fatalf("%v register %s recorded as live at call", lv.fn.Func.Nname, live.regs.niceString(lv.f.Config)) - } - index++ - } - } - // The liveness maps for this block are now complete. Compact them. lv.compact(b) } // If we have an open-coded deferreturn call, make a liveness map for it. if lv.fn.Func.OpenCodedDeferDisallowed() { - lv.livenessMap.deferreturn = LivenessInvalid + lv.livenessMap.deferreturn = LivenessDontCare } else { lv.livenessMap.deferreturn = LivenessIndex{ stackMapIndex: lv.stackMapSet.add(livedefer), - regMapIndex: 0, // entry regMap, containing no live registers isUnsafePoint: false, } } @@ -1136,20 +923,10 @@ func (lv *Liveness) epilogue() { lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n) } } - if !go115ReduceLiveness { - // Check that no registers are live at function entry. - // The context register, if any, comes from a - // LoweredGetClosurePtr operation first thing in the function, - // so it doesn't appear live at entry. - if regs := lv.regMaps[0]; regs != 0 { - lv.printDebug() - lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config)) - } - } } // Compact coalesces identical bitmaps from lv.livevars into the sets -// lv.stackMapSet and lv.regMaps. +// lv.stackMapSet. // // Compact clears lv.livevars. // @@ -1165,45 +942,23 @@ func (lv *Liveness) epilogue() { // PCDATA tables cost about 100k. So for now we keep using a single index for // both bitmap lists. func (lv *Liveness) compact(b *ssa.Block) { - add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness - // Deduplicate the stack map. - stackIndex := lv.stackMapSet.add(live.vars) - // Deduplicate the register map. - regIndex, ok := lv.regMapSet[live.regs] - if !ok { - regIndex = len(lv.regMapSet) - lv.regMapSet[live.regs] = regIndex - lv.regMaps = append(lv.regMaps, live.regs) - } - return LivenessIndex{stackIndex, regIndex, isUnsafePoint} - } pos := 0 if b == lv.f.Entry { // Handle entry stack map. - if !go115ReduceLiveness { - add(lv.livevars[0], false) - } else { - lv.stackMapSet.add(lv.livevars[0].vars) - } + lv.stackMapSet.add(lv.livevars[0]) pos++ } for _, v := range b.Values { - if go115ReduceLiveness { - hasStackMap := lv.hasStackMap(v) - isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID)) - idx := LivenessIndex{StackMapDontCare, StackMapDontCare, isUnsafePoint} - if hasStackMap { - idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars) - pos++ - } - if hasStackMap || isUnsafePoint { - lv.livenessMap.set(v, idx) - } - } else if lv.hasStackMap(v) { - isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID)) - lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint)) + hasStackMap := lv.hasStackMap(v) + isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID)) + idx := LivenessIndex{StackMapDontCare, isUnsafePoint} + if hasStackMap { + idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos]) pos++ } + if hasStackMap || isUnsafePoint { + lv.livenessMap.set(v, idx) + } } // Reset livevars. @@ -1250,8 +1005,8 @@ func (lv *Liveness) showlive(v *ssa.Value, live bvec) { Warnl(pos, s) } -func (lv *Liveness) printbvec(printed bool, name string, live varRegVec) bool { - if live.vars.IsEmpty() && live.regs == 0 { +func (lv *Liveness) printbvec(printed bool, name string, live bvec) bool { + if live.IsEmpty() { return printed } @@ -1264,19 +1019,18 @@ func (lv *Liveness) printbvec(printed bool, name string, live varRegVec) bool { comma := "" for i, n := range lv.vars { - if !live.vars.Get(int32(i)) { + if !live.Get(int32(i)) { continue } fmt.Printf("%s%s", comma, n.Sym.Name) comma = "," } - fmt.Printf("%s%s", comma, live.regs.niceString(lv.f.Config)) return true } -// printeffect is like printbvec, but for valueEffects and regEffects. -func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, regMask liveRegMask) bool { - if !x && regMask == 0 { +// printeffect is like printbvec, but for valueEffects. +func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool) bool { + if !x { return printed } if !printed { @@ -1288,15 +1042,7 @@ func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, re if x { fmt.Printf("%s", lv.vars[pos].Sym.Name) } - for j, reg := range lv.f.Config.GCRegMap { - if regMask&(1< 32 { - // Our uint32 conversion below won't work. - Fatalf("GP registers overflow uint32") - } - - if regs.n > 0 { - for _, live := range lv.regMaps { - regs.Clear() - regs.b[0] = uint32(live) - roff = dbvec(®sSymTmp, roff, regs) - } - } - } - // Give these LSyms content-addressable names, // so that they can be de-duplicated. // This provides significant binary size savings. @@ -1502,11 +1219,7 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) { lsym.Set(obj.AttrContentAddressable, true) }) } - if !go115ReduceLiveness { - return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(®sSymTmp) - } - // TODO(go115ReduceLiveness): Remove regsSym result - return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil + return makeSym(&argsSymTmp), makeSym(&liveSymTmp) } // Entry pointer for liveness analysis. Solves for the liveness of @@ -1553,7 +1266,7 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { // Emit the live pointer map data structures ls := e.curfn.Func.lsym fninfo := ls.Func() - fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit() + fninfo.GCArgs, fninfo.GCLocals = lv.emit() p := pp.Prog(obj.AFUNCDATA) Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps) @@ -1567,14 +1280,6 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { p.To.Name = obj.NAME_EXTERN p.To.Sym = fninfo.GCLocals - if !go115ReduceLiveness { - p = pp.Prog(obj.AFUNCDATA) - Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) - p.To.Type = obj.TYPE_MEM - p.To.Name = obj.NAME_EXTERN - p.To.Sym = fninfo.GCRegs - } - return lv.livenessMap } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 7388e4e3e8..67484904a9 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -6265,7 +6265,7 @@ func genssa(f *ssa.Func, pp *Progs) { // instruction. We won't use the actual liveness map on a // control instruction. Just mark it something that is // preemptible, unless this function is "all unsafe". - s.pp.nextLive = LivenessIndex{-1, -1, allUnsafe(f)} + s.pp.nextLive = LivenessIndex{-1, allUnsafe(f)} // Emit values in block thearch.SSAMarkMoves(&s, b) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index c652e3adbb..8c8ff587ff 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -460,7 +460,6 @@ type FuncInfo struct { GCArgs *LSym GCLocals *LSym - GCRegs *LSym // Only if !go115ReduceLiveness StackObjects *LSym OpenCodedDeferInfo *LSym diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go index eb54c67f6a..2b096996f7 100644 --- a/src/cmd/internal/obj/plist.go +++ b/src/cmd/internal/obj/plist.go @@ -178,7 +178,7 @@ func (ctxt *Link) Globl(s *LSym, size int64, flag int) { // Prog generated. func (ctxt *Link) EmitEntryLiveness(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := ctxt.EmitEntryStackMap(s, p, newprog) - pcdata = ctxt.EmitEntryRegMap(s, pcdata, newprog) + pcdata = ctxt.EmitEntryUnsafePoint(s, pcdata, newprog) return pcdata } @@ -195,13 +195,13 @@ func (ctxt *Link) EmitEntryStackMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { return pcdata } -// Similar to EmitEntryLiveness, but just emit register map. -func (ctxt *Link) EmitEntryRegMap(s *LSym, p *Prog, newprog ProgAlloc) *Prog { +// Similar to EmitEntryLiveness, but just emit unsafe point map. +func (ctxt *Link) EmitEntryUnsafePoint(s *LSym, p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) pcdata.Pos = s.Func().Text.Pos pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST - pcdata.From.Offset = objabi.PCDATA_RegMapIndex + pcdata.From.Offset = objabi.PCDATA_UnsafePoint pcdata.To.Type = TYPE_CONST pcdata.To.Offset = -1 @@ -216,9 +216,9 @@ func (ctxt *Link) StartUnsafePoint(p *Prog, newprog ProgAlloc) *Prog { pcdata := Appendp(p, newprog) pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST - pcdata.From.Offset = objabi.PCDATA_RegMapIndex + pcdata.From.Offset = objabi.PCDATA_UnsafePoint pcdata.To.Type = TYPE_CONST - pcdata.To.Offset = objabi.PCDATA_RegMapUnsafe + pcdata.To.Offset = objabi.PCDATA_UnsafePointUnsafe return pcdata } @@ -231,7 +231,7 @@ func (ctxt *Link) EndUnsafePoint(p *Prog, newprog ProgAlloc, oldval int64) *Prog pcdata := Appendp(p, newprog) pcdata.As = APCDATA pcdata.From.Type = TYPE_CONST - pcdata.From.Offset = objabi.PCDATA_RegMapIndex + pcdata.From.Offset = objabi.PCDATA_UnsafePoint pcdata.To.Type = TYPE_CONST pcdata.To.Offset = oldval @@ -257,11 +257,11 @@ func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, is prevPcdata := int64(-1) // entry PC data value prevRestart := int64(0) for p := prev.Link; p != nil; p, prev = p.Link, p { - if p.As == APCDATA && p.From.Offset == objabi.PCDATA_RegMapIndex { + if p.As == APCDATA && p.From.Offset == objabi.PCDATA_UnsafePoint { prevPcdata = p.To.Offset continue } - if prevPcdata == objabi.PCDATA_RegMapUnsafe { + if prevPcdata == objabi.PCDATA_UnsafePointUnsafe { continue // already unsafe } if isUnsafePoint(p) { @@ -288,7 +288,7 @@ func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, is q := Appendp(prev, newprog) q.As = APCDATA q.From.Type = TYPE_CONST - q.From.Offset = objabi.PCDATA_RegMapIndex + q.From.Offset = objabi.PCDATA_UnsafePoint q.To.Type = TYPE_CONST q.To.Offset = val q.Pc = p.Pc @@ -305,7 +305,7 @@ func MarkUnsafePoints(ctxt *Link, p0 *Prog, newprog ProgAlloc, isUnsafePoint, is p = Appendp(p, newprog) p.As = APCDATA p.From.Type = TYPE_CONST - p.From.Offset = objabi.PCDATA_RegMapIndex + p.From.Offset = objabi.PCDATA_UnsafePoint p.To.Type = TYPE_CONST p.To.Offset = prevPcdata p.Pc = p.Link.Pc diff --git a/src/cmd/internal/objabi/funcdata.go b/src/cmd/internal/objabi/funcdata.go index c9480bf2f0..1c5e5e1c8c 100644 --- a/src/cmd/internal/objabi/funcdata.go +++ b/src/cmd/internal/objabi/funcdata.go @@ -11,14 +11,12 @@ package objabi // ../../../runtime/symtab.go. const ( - PCDATA_RegMapIndex = 0 // if !go115ReduceLiveness - PCDATA_UnsafePoint = 0 // if go115ReduceLiveness + PCDATA_UnsafePoint = 0 PCDATA_StackMapIndex = 1 PCDATA_InlTreeIndex = 2 FUNCDATA_ArgsPointerMaps = 0 FUNCDATA_LocalsPointerMaps = 1 - FUNCDATA_RegPointerMaps = 2 // if !go115ReduceLiveness FUNCDATA_StackObjects = 3 FUNCDATA_InlTree = 4 FUNCDATA_OpenCodedDeferInfo = 5 @@ -32,11 +30,6 @@ const ( // Special PCDATA values. const ( - // PCDATA_RegMapIndex values. - // - // Only if !go115ReduceLiveness. - PCDATA_RegMapUnsafe = PCDATA_UnsafePointUnsafe // Unsafe for async preemption - // PCDATA_UnsafePoint values. PCDATA_UnsafePointSafe = -1 // Safe for async preemption PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption -- cgit v1.3 From fdba080220f0642b2b3926cbc062c775f6224e5d Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Fri, 30 Oct 2020 18:36:41 -0400 Subject: cmd: remove Go115AMD64 Always do aligned jumps now. Change-Id: If68a16fe93c9173c83323a9063465c9bd166eeb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/266857 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: David Chase --- src/cmd/asm/internal/asm/endtoend_test.go | 7 +------ src/cmd/internal/obj/x86/asm6.go | 11 +++-------- src/cmd/internal/objabi/util.go | 9 +-------- src/cmd/link/internal/amd64/l.go | 2 +- src/cmd/link/internal/amd64/obj.go | 7 +------ 5 files changed, 7 insertions(+), 29 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 989b7a5405..7472507caf 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -390,12 +390,7 @@ func TestARM64Errors(t *testing.T) { } func TestAMD64EndToEnd(t *testing.T) { - defer func(old string) { objabi.GOAMD64 = old }(objabi.GOAMD64) - for _, goamd64 := range []string{"normaljumps", "alignedjumps"} { - t.Logf("GOAMD64=%s", goamd64) - objabi.GOAMD64 = goamd64 - testEndToEnd(t, "amd64", "amd64") - } + testEndToEnd(t, "amd64", "amd64") } func Test386Encoder(t *testing.T) { diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 94aed44871..a6b85ac4a0 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -1851,9 +1851,9 @@ func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { return q } -// If the environment variable GOAMD64=alignedjumps the assembler will ensure that -// no standalone or macro-fused jump will straddle or end on a 32 byte boundary -// by inserting NOPs before the jumps +// isJump returns whether p is a jump instruction. +// It is used to ensure that no standalone or macro-fused jump will straddle +// or end on a 32 byte boundary by inserting NOPs before the jumps. func isJump(p *obj.Prog) bool { return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO @@ -1987,11 +1987,6 @@ func makePjcCtx(ctxt *obj.Link) padJumpsCtx { return padJumpsCtx(0) } - if objabi.GOAMD64 != "alignedjumps" { - return padJumpsCtx(0) - - } - return padJumpsCtx(32) } diff --git a/src/cmd/internal/objabi/util.go b/src/cmd/internal/objabi/util.go index b81b73a022..9479ab2cd9 100644 --- a/src/cmd/internal/objabi/util.go +++ b/src/cmd/internal/objabi/util.go @@ -25,7 +25,6 @@ var ( GOARCH = envOr("GOARCH", defaultGOARCH) GOOS = envOr("GOOS", defaultGOOS) GO386 = envOr("GO386", defaultGO386) - GOAMD64 = goamd64() GOARM = goarm() GOMIPS = gomips() GOMIPS64 = gomips64() @@ -37,15 +36,9 @@ var ( const ( ElfRelocOffset = 256 - MachoRelocOffset = 2048 // reserve enough space for ELF relocations - Go115AMD64 = "alignedjumps" // Should be "alignedjumps" or "normaljumps"; this replaces environment variable introduced in CL 219357. + MachoRelocOffset = 2048 // reserve enough space for ELF relocations ) -// TODO(1.16): assuming no issues in 1.15 release, remove this and related constant. -func goamd64() string { - return Go115AMD64 -} - func goarm() int { switch v := envOr("GOARM", defaultGOARM); v { case "5": diff --git a/src/cmd/link/internal/amd64/l.go b/src/cmd/link/internal/amd64/l.go index a9afb3a39f..c9ea90a9c8 100644 --- a/src/cmd/link/internal/amd64/l.go +++ b/src/cmd/link/internal/amd64/l.go @@ -33,7 +33,7 @@ package amd64 const ( maxAlign = 32 // max data alignment minAlign = 1 // min data alignment - funcAlign = 16 + funcAlign = 32 ) /* Used by ../internal/ld/dwarf.go */ diff --git a/src/cmd/link/internal/amd64/obj.go b/src/cmd/link/internal/amd64/obj.go index 777f99dbe2..d09c90ea28 100644 --- a/src/cmd/link/internal/amd64/obj.go +++ b/src/cmd/link/internal/amd64/obj.go @@ -39,13 +39,8 @@ import ( func Init() (*sys.Arch, ld.Arch) { arch := sys.ArchAMD64 - fa := funcAlign - if objabi.GOAMD64 == "alignedjumps" { - fa = 32 - } - theArch := ld.Arch{ - Funcalign: fa, + Funcalign: funcAlign, Maxalign: maxAlign, Minalign: minAlign, Dwarfregsp: dwarfRegSP, -- cgit v1.3 From 5f0fca1475e042a6e85f81ff6db676ec18805041 Mon Sep 17 00:00:00 2001 From: Jonathan Swinney Date: Mon, 2 Nov 2020 16:36:10 +0000 Subject: cmd/asm: rename arm64 instructions LDANDx to LDCLRx The LDANDx instructions were misleading because they correspond to the mnemonic LDCLRx as defined in the Arm Architecture Reference Manual for Armv8. This changes the assembler to use the same mnemonic as the GNU assembler and the manual. The instruction has the form: LDCLRx Rs, (Rb), Rt: *Rb -> Rt, Rs AND NOT(*Rb) -> *Rb Change-Id: I94ae003e99e817209bba1afe960e612bf3a0b410 Reviewed-on: https://go-review.googlesource.com/c/go/+/267138 Reviewed-by: Cherry Zhang Reviewed-by: fannie zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Trust: fannie zhang --- src/cmd/asm/internal/asm/testdata/arm64.s | 64 +++++++++++++------------- src/cmd/asm/internal/asm/testdata/arm64error.s | 48 +++++++++---------- src/cmd/internal/obj/arm64/a.out.go | 32 ++++++------- src/cmd/internal/obj/arm64/anames.go | 32 ++++++------- src/cmd/internal/obj/arm64/asm7.go | 34 +++++++------- 5 files changed, 105 insertions(+), 105 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 5547cf634c..91e3a0ca0a 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -681,38 +681,38 @@ again: LDADDLH R5, (RSP), R7 // e7036578 LDADDLB R5, (R6), R7 // c7006538 LDADDLB R5, (RSP), R7 // e7036538 - LDANDAD R5, (R6), R7 // c710a5f8 - LDANDAD R5, (RSP), R7 // e713a5f8 - LDANDAW R5, (R6), R7 // c710a5b8 - LDANDAW R5, (RSP), R7 // e713a5b8 - LDANDAH R5, (R6), R7 // c710a578 - LDANDAH R5, (RSP), R7 // e713a578 - LDANDAB R5, (R6), R7 // c710a538 - LDANDAB R5, (RSP), R7 // e713a538 - LDANDALD R5, (R6), R7 // c710e5f8 - LDANDALD R5, (RSP), R7 // e713e5f8 - LDANDALW R5, (R6), R7 // c710e5b8 - LDANDALW R5, (RSP), R7 // e713e5b8 - LDANDALH R5, (R6), R7 // c710e578 - LDANDALH R5, (RSP), R7 // e713e578 - LDANDALB R5, (R6), R7 // c710e538 - LDANDALB R5, (RSP), R7 // e713e538 - LDANDD R5, (R6), R7 // c71025f8 - LDANDD R5, (RSP), R7 // e71325f8 - LDANDW R5, (R6), R7 // c71025b8 - LDANDW R5, (RSP), R7 // e71325b8 - LDANDH R5, (R6), R7 // c7102578 - LDANDH R5, (RSP), R7 // e7132578 - LDANDB R5, (R6), R7 // c7102538 - LDANDB R5, (RSP), R7 // e7132538 - LDANDLD R5, (R6), R7 // c71065f8 - LDANDLD R5, (RSP), R7 // e71365f8 - LDANDLW R5, (R6), R7 // c71065b8 - LDANDLW R5, (RSP), R7 // e71365b8 - LDANDLH R5, (R6), R7 // c7106578 - LDANDLH R5, (RSP), R7 // e7136578 - LDANDLB R5, (R6), R7 // c7106538 - LDANDLB R5, (RSP), R7 // e7136538 + LDCLRAD R5, (R6), R7 // c710a5f8 + LDCLRAD R5, (RSP), R7 // e713a5f8 + LDCLRAW R5, (R6), R7 // c710a5b8 + LDCLRAW R5, (RSP), R7 // e713a5b8 + LDCLRAH R5, (R6), R7 // c710a578 + LDCLRAH R5, (RSP), R7 // e713a578 + LDCLRAB R5, (R6), R7 // c710a538 + LDCLRAB R5, (RSP), R7 // e713a538 + LDCLRALD R5, (R6), R7 // c710e5f8 + LDCLRALD R5, (RSP), R7 // e713e5f8 + LDCLRALW R5, (R6), R7 // c710e5b8 + LDCLRALW R5, (RSP), R7 // e713e5b8 + LDCLRALH R5, (R6), R7 // c710e578 + LDCLRALH R5, (RSP), R7 // e713e578 + LDCLRALB R5, (R6), R7 // c710e538 + LDCLRALB R5, (RSP), R7 // e713e538 + LDCLRD R5, (R6), R7 // c71025f8 + LDCLRD R5, (RSP), R7 // e71325f8 + LDCLRW R5, (R6), R7 // c71025b8 + LDCLRW R5, (RSP), R7 // e71325b8 + LDCLRH R5, (R6), R7 // c7102578 + LDCLRH R5, (RSP), R7 // e7132578 + LDCLRB R5, (R6), R7 // c7102538 + LDCLRB R5, (RSP), R7 // e7132538 + LDCLRLD R5, (R6), R7 // c71065f8 + LDCLRLD R5, (RSP), R7 // e71365f8 + LDCLRLW R5, (R6), R7 // c71065b8 + LDCLRLW R5, (RSP), R7 // e71365b8 + LDCLRLH R5, (R6), R7 // c7106578 + LDCLRLH R5, (RSP), R7 // e7136578 + LDCLRLB R5, (R6), R7 // c7106538 + LDCLRLB R5, (RSP), R7 // e7136538 LDEORAD R5, (R6), R7 // c720a5f8 LDEORAD R5, (RSP), R7 // e723a5f8 LDEORAW R5, (R6), R7 // c720a5b8 diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s index 99e4d62d25..e579f20836 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64error.s +++ b/src/cmd/asm/internal/asm/testdata/arm64error.s @@ -123,14 +123,14 @@ TEXT errors(SB),$0 LDADDLW R5, (R6), ZR // ERROR "illegal destination register" LDADDLH R5, (R6), ZR // ERROR "illegal destination register" LDADDLB R5, (R6), ZR // ERROR "illegal destination register" - LDANDD R5, (R6), ZR // ERROR "illegal destination register" - LDANDW R5, (R6), ZR // ERROR "illegal destination register" - LDANDH R5, (R6), ZR // ERROR "illegal destination register" - LDANDB R5, (R6), ZR // ERROR "illegal destination register" - LDANDLD R5, (R6), ZR // ERROR "illegal destination register" - LDANDLW R5, (R6), ZR // ERROR "illegal destination register" - LDANDLH R5, (R6), ZR // ERROR "illegal destination register" - LDANDLB R5, (R6), ZR // ERROR "illegal destination register" + LDCLRD R5, (R6), ZR // ERROR "illegal destination register" + LDCLRW R5, (R6), ZR // ERROR "illegal destination register" + LDCLRH R5, (R6), ZR // ERROR "illegal destination register" + LDCLRB R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLD R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLW R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLH R5, (R6), ZR // ERROR "illegal destination register" + LDCLRLB R5, (R6), ZR // ERROR "illegal destination register" LDEORD R5, (R6), ZR // ERROR "illegal destination register" LDEORW R5, (R6), ZR // ERROR "illegal destination register" LDEORH R5, (R6), ZR // ERROR "illegal destination register" @@ -163,22 +163,22 @@ TEXT errors(SB),$0 LDADDLW R5, (R6), RSP // ERROR "illegal destination register" LDADDLH R5, (R6), RSP // ERROR "illegal destination register" LDADDLB R5, (R6), RSP // ERROR "illegal destination register" - LDANDAD R5, (R6), RSP // ERROR "illegal destination register" - LDANDAW R5, (R6), RSP // ERROR "illegal destination register" - LDANDAH R5, (R6), RSP // ERROR "illegal destination register" - LDANDAB R5, (R6), RSP // ERROR "illegal destination register" - LDANDALD R5, (R6), RSP // ERROR "illegal destination register" - LDANDALW R5, (R6), RSP // ERROR "illegal destination register" - LDANDALH R5, (R6), RSP // ERROR "illegal destination register" - LDANDALB R5, (R6), RSP // ERROR "illegal destination register" - LDANDD R5, (R6), RSP // ERROR "illegal destination register" - LDANDW R5, (R6), RSP // ERROR "illegal destination register" - LDANDH R5, (R6), RSP // ERROR "illegal destination register" - LDANDB R5, (R6), RSP // ERROR "illegal destination register" - LDANDLD R5, (R6), RSP // ERROR "illegal destination register" - LDANDLW R5, (R6), RSP // ERROR "illegal destination register" - LDANDLH R5, (R6), RSP // ERROR "illegal destination register" - LDANDLB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRAB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRALB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRB R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLD R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLW R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLH R5, (R6), RSP // ERROR "illegal destination register" + LDCLRLB R5, (R6), RSP // ERROR "illegal destination register" LDEORAD R5, (R6), RSP // ERROR "illegal destination register" LDEORAW R5, (R6), RSP // ERROR "illegal destination register" LDEORAH R5, (R6), RSP // ERROR "illegal destination register" diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 5844b71ca7..1d1bea505c 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -617,22 +617,6 @@ const ( ALDADDLD ALDADDLH ALDADDLW - ALDANDAB - ALDANDAD - ALDANDAH - ALDANDAW - ALDANDALB - ALDANDALD - ALDANDALH - ALDANDALW - ALDANDB - ALDANDD - ALDANDH - ALDANDW - ALDANDLB - ALDANDLD - ALDANDLH - ALDANDLW ALDAR ALDARB ALDARH @@ -643,6 +627,22 @@ const ( ALDAXRB ALDAXRH ALDAXRW + ALDCLRAB + ALDCLRAD + ALDCLRAH + ALDCLRAW + ALDCLRALB + ALDCLRALD + ALDCLRALH + ALDCLRALW + ALDCLRB + ALDCLRD + ALDCLRH + ALDCLRW + ALDCLRLB + ALDCLRLD + ALDCLRLH + ALDCLRLW ALDEORAB ALDEORAD ALDEORAH diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index fb216f9a94..a98f8c7ed5 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -111,22 +111,6 @@ var Anames = []string{ "LDADDLD", "LDADDLH", "LDADDLW", - "LDANDAB", - "LDANDAD", - "LDANDAH", - "LDANDAW", - "LDANDALB", - "LDANDALD", - "LDANDALH", - "LDANDALW", - "LDANDB", - "LDANDD", - "LDANDH", - "LDANDW", - "LDANDLB", - "LDANDLD", - "LDANDLH", - "LDANDLW", "LDAR", "LDARB", "LDARH", @@ -137,6 +121,22 @@ var Anames = []string{ "LDAXRB", "LDAXRH", "LDAXRW", + "LDCLRAB", + "LDCLRAD", + "LDCLRAH", + "LDCLRAW", + "LDCLRALB", + "LDCLRALD", + "LDCLRALH", + "LDCLRALW", + "LDCLRB", + "LDCLRD", + "LDCLRH", + "LDCLRW", + "LDCLRLB", + "LDCLRLD", + "LDCLRLH", + "LDCLRLW", "LDEORAB", "LDEORAD", "LDEORAH", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 4fc62d5c7f..1a359f1921 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -107,22 +107,22 @@ var atomicLDADD = map[obj.As]uint32{ ALDADDLW: 2<<30 | 0x1c3<<21 | 0x00<<10, ALDADDLH: 1<<30 | 0x1c3<<21 | 0x00<<10, ALDADDLB: 0<<30 | 0x1c3<<21 | 0x00<<10, - ALDANDAD: 3<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAW: 2<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAH: 1<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDAB: 0<<30 | 0x1c5<<21 | 0x04<<10, - ALDANDALD: 3<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALW: 2<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALH: 1<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDALB: 0<<30 | 0x1c7<<21 | 0x04<<10, - ALDANDD: 3<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDW: 2<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDH: 1<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDB: 0<<30 | 0x1c1<<21 | 0x04<<10, - ALDANDLD: 3<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLW: 2<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLH: 1<<30 | 0x1c3<<21 | 0x04<<10, - ALDANDLB: 0<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRAD: 3<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAW: 2<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAH: 1<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRAB: 0<<30 | 0x1c5<<21 | 0x04<<10, + ALDCLRALD: 3<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALW: 2<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALH: 1<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRALB: 0<<30 | 0x1c7<<21 | 0x04<<10, + ALDCLRD: 3<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRW: 2<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRH: 1<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRB: 0<<30 | 0x1c1<<21 | 0x04<<10, + ALDCLRLD: 3<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLW: 2<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLH: 1<<30 | 0x1c3<<21 | 0x04<<10, + ALDCLRLB: 0<<30 | 0x1c3<<21 | 0x04<<10, ALDEORAD: 3<<30 | 0x1c5<<21 | 0x08<<10, ALDEORAW: 2<<30 | 0x1c5<<21 | 0x08<<10, ALDEORAH: 1<<30 | 0x1c5<<21 | 0x08<<10, @@ -4028,7 +4028,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 |= uint32(p.From.Reg&31) << 5 o1 |= uint32(p.To.Reg & 31) - case 47: // SWPx/LDADDx/LDANDx/LDEORx/LDORx/CASx Rs, (Rb), Rt + case 47: // SWPx/LDADDx/LDCLRx/LDEORx/LDORx/CASx Rs, (Rb), Rt rs := p.From.Reg rt := p.RegTo2 rb := p.To.Reg -- cgit v1.3 From 5ed81a3d14aa4eda5de87d7fe074b4c913b58511 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 3 Nov 2020 16:59:25 -0600 Subject: cmd/asm: fix rlwnm reg,reg,const,reg encoding on ppc64 The wrong value for the first reg parameter was selected. Likewise the wrong opcode was selected. This should match rlwnm (rrr type), not rlwinm (irr type). Similarly, fix the optab matching rules so clrlslwi does not match reg,reg,const,reg arguments. This is not a valid operand combination for clrlslwi. Fixes #42368 Change-Id: I4eb16d45a760b9fd3f497ef9863f82465351d39f Reviewed-on: https://go-review.googlesource.com/c/go/+/267421 Reviewed-by: Cherry Zhang Trust: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64.s | 2 ++ src/cmd/internal/obj/ppc64/asm9.go | 26 ++++++-------------------- 2 files changed, 8 insertions(+), 20 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index 2b1191c44b..8f6eb14f73 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -282,7 +282,9 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 RLWMI $7, R3, $65535, R6 // 50663c3e RLWMICC $7, R3, $65535, R6 // 50663c3f RLWNM $3, R4, $7, R6 // 54861f7e + RLWNM R3, R4, $7, R6 // 5c861f7e RLWNMCC $3, R4, $7, R6 // 54861f7f + RLWNMCC R3, R4, $7, R6 // 5c861f7f RLDMI $0, R4, $7, R6 // 7886076c RLDMICC $0, R4, $7, R6 // 7886076d RLDIMI $0, R4, $7, R6 // 788601cc diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 090fefb4d8..775d27d8e8 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -174,6 +174,7 @@ var optab = []Optab{ {ASRAD, C_SCON, C_NONE, C_NONE, C_REG, 56, 4, 0}, {ARLWMI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0}, {ARLWMI, C_REG, C_REG, C_LCON, C_REG, 63, 4, 0}, + {ACLRLSLWI, C_SCON, C_REG, C_LCON, C_REG, 62, 4, 0}, {ARLDMI, C_SCON, C_REG, C_LCON, C_REG, 30, 4, 0}, {ARLDC, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0}, {ARLDCL, C_SCON, C_REG, C_LCON, C_REG, 29, 4, 0}, @@ -1911,7 +1912,6 @@ func buildop(ctxt *obj.Link) { opset(ARLWMICC, r0) opset(ARLWNM, r0) opset(ARLWNMCC, r0) - opset(ACLRLSLWI, r0) case ARLDMI: opset(ARLDMICC, r0) @@ -2010,6 +2010,7 @@ func buildop(ctxt *obj.Link) { AADDEX, ACMPEQB, AECIWX, + ACLRLSLWI, obj.ANOP, obj.ATEXT, obj.AUNDEF, @@ -3413,25 +3414,10 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { } case 63: /* rlwmi b,s,$mask,a */ - v := c.regoff(&p.From) - switch p.As { - case ACLRLSLWI: - n := c.regoff(p.GetFrom3()) - if n > v || v >= 32 { - // Message will match operands from the ISA even though in the - // code it uses 'v' - c.ctxt.Diag("Invalid n or b for CLRLSLWI: %x %x\n%v", v, n, p) - } - // This is an extended mnemonic described in the ISA C.8.2 - // clrlslwi ra,rs,b,n -> rlwinm ra,rs,n,b-n,31-n - // It generates the rlwinm directly here. - o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(p.Reg), uint32(n), uint32(v-n), uint32(31-n)) - default: - var mask [2]uint8 - c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) - o1 = AOP_RRR(c.opirr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(v)) - o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 - } + var mask [2]uint8 + c.maskgen(p, mask[:], uint32(c.regoff(p.GetFrom3()))) + o1 = AOP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg)) + o1 |= (uint32(mask[0])&31)<<6 | (uint32(mask[1])&31)<<1 case 64: /* mtfsf fr[, $m] {,fpcsr} */ var v int32 -- cgit v1.3 From 63fd764502e08d067293a93d6d1a566951255ce5 Mon Sep 17 00:00:00 2001 From: Derek Parker Date: Wed, 28 Oct 2020 20:54:27 +0000 Subject: cmd/internal/obj: add prologue_end DWARF stmt for ppc64 This patch adds a prologue_end statement to the DWARF information for the ppc64 arch. Prologue end is used by the Delve debugger in order to determine where to set a breakpoint to avoid the stacksplit prologue. Updates #36612 Change-Id: Ifb16c1476fe716a0bf493c5486d1d88ebe8d0253 GitHub-Last-Rev: 77a217206d529df8bf8d4ef10a5347b6ae524612 GitHub-Pull-Request: golang/go#42261 Reviewed-on: https://go-review.googlesource.com/c/go/+/266019 Run-TryBot: David Chase TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: Alessandro Arzilli Trust: Dmitri Shuralyov --- src/cmd/internal/obj/ppc64/obj9.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index 3ab19de602..fddf552156 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -32,6 +32,7 @@ package ppc64 import ( "cmd/internal/obj" "cmd/internal/objabi" + "cmd/internal/src" "cmd/internal/sys" ) @@ -672,6 +673,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // save the link register and update the stack, since that code is // called directly from C/C++ and can't clobber REGTMP (R31). if autosize != 0 && c.cursym.Name != "runtime.racecallbackthunk" { + var prologueEnd *obj.Prog // Save the link register and update the SP. MOVDU is used unless // the frame size is too large. The link register must be saved // even for non-empty leaf functions so that traceback works. @@ -685,6 +687,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Type = obj.TYPE_REG q.To.Reg = REGTMP + prologueEnd = q + q = obj.Appendp(q, c.newprog) q.As = AMOVDU q.Pos = p.Pos @@ -720,6 +724,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.To.Offset = int64(-autosize) q.To.Reg = REGSP + prologueEnd = q + q = obj.Appendp(q, c.newprog) q.As = AADD q.Pos = p.Pos @@ -730,8 +736,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { q.Spadj = +autosize q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) - } + prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) } else if c.cursym.Func().Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have -- cgit v1.3 From 854e892ce17e2555c59fce5b92f64bc505ba5d8c Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Mon, 11 May 2020 09:44:48 -0700 Subject: cmd/compile: optimize shift pairs and masks on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize combinations of left and right shifts by a constant value into a 'rotate then insert selected bits [into zero]' instruction. Use the same instruction for contiguous masks since it has some benefits over 'and immediate' (not restricted to 32-bits, does not overwrite source register). To keep the complexity of this change under control I've only implemented 64 bit operations for now. There are a lot more optimizations that can be done with this instruction family. However, since their function overlaps with other instructions we need to be somewhat careful not to break existing optimization rules by creating optimization dead ends. This is particularly true of the load/store merging rules which contain lots of zero extensions and shifts. This CL does interfere with the store merging rules when an operand is shifted left before it is stored: binary.BigEndian.PutUint64(b, x << 1) This is unfortunate but it's not critical and somewhat complex so I plan to fix that in a follow up CL. file before after Δ % addr2line 4117446 4117282 -164 -0.004% api 4945184 4942752 -2432 -0.049% asm 4998079 4991891 -6188 -0.124% buildid 2685158 2684074 -1084 -0.040% cgo 4553732 4553394 -338 -0.007% compile 19294446 19245070 -49376 -0.256% cover 4897105 4891319 -5786 -0.118% dist 3544389 3542785 -1604 -0.045% doc 3926795 3927617 +822 +0.021% fix 3302958 3293868 -9090 -0.275% link 6546274 6543456 -2818 -0.043% nm 4102021 4100825 -1196 -0.029% objdump 4542431 4548483 +6052 +0.133% pack 2482465 2416389 -66076 -2.662% pprof 13366541 13363915 -2626 -0.020% test2json 2829007 2761515 -67492 -2.386% trace 10216164 10219684 +3520 +0.034% vet 6773956 6773572 -384 -0.006% total 107124151 106917891 -206260 -0.193% Change-Id: I7591cce41e06867ba10a745daae9333513062746 Reviewed-on: https://go-review.googlesource.com/c/go/+/233317 Run-TryBot: Michael Munday TryBot-Result: Go Bot Reviewed-by: Keith Randall Trust: Michael Munday --- src/cmd/compile/internal/s390x/ssa.go | 14 +- src/cmd/compile/internal/ssa/gen/S390X.rules | 162 ++++- src/cmd/compile/internal/ssa/gen/S390XOps.go | 20 +- src/cmd/compile/internal/ssa/opGen.go | 25 +- src/cmd/compile/internal/ssa/rewriteS390X.go | 844 +++++++++++++++++++-------- src/cmd/internal/obj/s390x/rotate.go | 82 ++- src/cmd/internal/obj/s390x/rotate_test.go | 122 ++++ test/codegen/bitfield.go | 18 +- test/codegen/bits.go | 12 + test/codegen/mathbits.go | 2 +- test/codegen/rotate.go | 6 +- test/codegen/shift.go | 33 +- 12 files changed, 1007 insertions(+), 333 deletions(-) create mode 100644 src/cmd/internal/obj/s390x/rotate_test.go (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index 84b9f491e4..8037357131 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -188,6 +188,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { {Type: obj.TYPE_REG, Reg: r2}, }) p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} + case ssa.OpS390XRISBGZ: + r1 := v.Reg() + r2 := v.Args[0].Reg() + i := v.Aux.(s390x.RotateParams) + p := s.Prog(v.Op.Asm()) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(i.Start)} + p.SetRestArgs([]obj.Addr{ + {Type: obj.TYPE_CONST, Offset: int64(i.End)}, + {Type: obj.TYPE_CONST, Offset: int64(i.Amount)}, + {Type: obj.TYPE_REG, Reg: r2}, + }) + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: r1} case ssa.OpS390XADD, ssa.OpS390XADDW, ssa.OpS390XSUB, ssa.OpS390XSUBW, ssa.OpS390XAND, ssa.OpS390XANDW, @@ -360,7 +372,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { case ssa.OpS390XSLDconst, ssa.OpS390XSLWconst, ssa.OpS390XSRDconst, ssa.OpS390XSRWconst, ssa.OpS390XSRADconst, ssa.OpS390XSRAWconst, - ssa.OpS390XRLLGconst, ssa.OpS390XRLLconst: + ssa.OpS390XRLLconst: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index 1b56361c00..39949edbc2 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -643,8 +643,18 @@ // equivalent to the leftmost 32 bits being set. // TODO(mundaym): modify the assembler to accept 64-bit values // and use isU32Bit(^c). -(AND x (MOVDconst [c])) && is32Bit(c) && c < 0 => (ANDconst [c] x) -(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 => (MOVWZreg (ANDWconst [int32(c)] x)) +(AND x (MOVDconst [c])) + && s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil + => (RISBGZ x {*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))}) +(AND x (MOVDconst [c])) + && is32Bit(c) + && c < 0 + => (ANDconst [c] x) +(AND x (MOVDconst [c])) + && is32Bit(c) + && c >= 0 + => (MOVWZreg (ANDWconst [int32(c)] x)) + (ANDW x (MOVDconst [c])) => (ANDWconst [int32(c)] x) ((AND|ANDW)const [c] ((AND|ANDW)const [d] x)) => ((AND|ANDW)const [c&d] x) @@ -653,14 +663,20 @@ ((OR|XOR)W x (MOVDconst [c])) => ((OR|XOR)Wconst [int32(c)] x) // Constant shifts. -(S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c])) - => (S(LD|RD|RAD|LW|RW|RAW)const x [int8(c&63)]) +(S(LD|RD|RAD) x (MOVDconst [c])) => (S(LD|RD|RAD)const x [int8(c&63)]) +(S(LW|RW|RAW) x (MOVDconst [c])) && c&32 == 0 => (S(LW|RW|RAW)const x [int8(c&31)]) +(S(LW|RW) _ (MOVDconst [c])) && c&32 != 0 => (MOVDconst [0]) +(SRAW x (MOVDconst [c])) && c&32 != 0 => (SRAWconst x [31]) // Shifts only use the rightmost 6 bits of the shift value. +(S(LD|RD|RAD|LW|RW|RAW) x (RISBGZ y {r})) + && r.Amount == 0 + && r.OutMask()&63 == 63 + => (S(LD|RD|RAD|LW|RW|RAW) x y) (S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y)) - => (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [int32(c&63)] y)) + => (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [int32(c&63)] y)) (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63 - => (S(LD|RD|RAD|LW|RW|RAW) x y) + => (S(LD|RD|RAD|LW|RW|RAW) x y) (SLD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SLD x y) (SRD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRD x y) (SRAD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAD x y) @@ -668,17 +684,13 @@ (SRW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRW x y) (SRAW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAW x y) -// Constant rotate generation -(RLL x (MOVDconst [c])) => (RLLconst x [int8(c&31)]) -(RLLG x (MOVDconst [c])) => (RLLGconst x [int8(c&63)]) - -(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x) -( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x) -(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x) +// Match rotate by constant. +(RLLG x (MOVDconst [c])) => (RISBGZ x {s390x.NewRotateParams(0, 63, int8(c&63))}) +(RLL x (MOVDconst [c])) => (RLLconst x [int8(c&31)]) -(ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x) -( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x) -(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x) +// Match rotate by constant pattern. +((ADD|OR|XOR) (SLDconst x [c]) (SRDconst x [64-c])) => (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) +((ADD|OR|XOR)W (SLWconst x [c]) (SRWconst x [32-c])) => (RLLconst x [c]) // Signed 64-bit comparison with immediate. (CMP x (MOVDconst [c])) && is32Bit(c) => (CMPconst x [int32(c)]) @@ -692,15 +704,97 @@ (CMP(W|WU) x (MOVDconst [c])) => (CMP(W|WU)const x [int32(c)]) (CMP(W|WU) (MOVDconst [c]) x) => (InvertFlags (CMP(W|WU)const x [int32(c)])) +// Match (x >> c) << d to 'rotate then insert selected bits [into zero]'. +(SLDconst (SRDconst x [c]) [d]) => (RISBGZ x {s390x.NewRotateParams(max8(0, c-d), 63-d, (d-c)&63)}) + +// Match (x << c) >> d to 'rotate then insert selected bits [into zero]'. +(SRDconst (SLDconst x [c]) [d]) => (RISBGZ x {s390x.NewRotateParams(d, min8(63, 63-c+d), (c-d)&63)}) + +// Absorb input zero extension into 'rotate then insert selected bits [into zero]'. +(RISBGZ (MOVWZreg x) {r}) && r.InMerge(0xffffffff) != nil => (RISBGZ x {*r.InMerge(0xffffffff)}) +(RISBGZ (MOVHZreg x) {r}) && r.InMerge(0x0000ffff) != nil => (RISBGZ x {*r.InMerge(0x0000ffff)}) +(RISBGZ (MOVBZreg x) {r}) && r.InMerge(0x000000ff) != nil => (RISBGZ x {*r.InMerge(0x000000ff)}) + +// Absorb 'rotate then insert selected bits [into zero]' into zero extension. +(MOVWZreg (RISBGZ x {r})) && r.OutMerge(0xffffffff) != nil => (RISBGZ x {*r.OutMerge(0xffffffff)}) +(MOVHZreg (RISBGZ x {r})) && r.OutMerge(0x0000ffff) != nil => (RISBGZ x {*r.OutMerge(0x0000ffff)}) +(MOVBZreg (RISBGZ x {r})) && r.OutMerge(0x000000ff) != nil => (RISBGZ x {*r.OutMerge(0x000000ff)}) + +// Absorb shift into 'rotate then insert selected bits [into zero]'. +// +// Any unsigned shift can be represented as a rotate and mask operation: +// +// x << c => RotateLeft64(x, c) & (^uint64(0) << c) +// x >> c => RotateLeft64(x, -c) & (^uint64(0) >> c) +// +// Therefore when a shift is used as the input to a rotate then insert +// selected bits instruction we can merge the two together. We just have +// to be careful that the resultant mask is representable (non-zero and +// contiguous). For example, assuming that x is variable and c, y and m +// are constants, a shift followed by a rotate then insert selected bits +// could be represented as: +// +// RotateLeft64(RotateLeft64(x, c) & (^uint64(0) << c), y) & m +// +// We can split the rotation by y into two, one rotate for x and one for +// the mask: +// +// RotateLeft64(RotateLeft64(x, c), y) & (RotateLeft64(^uint64(0) << c, y)) & m +// +// The rotations of x by c followed by y can then be combined: +// +// RotateLeft64(x, c+y) & (RotateLeft64(^uint64(0) << c, y)) & m +// ^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// rotate mask +// +// To perform this optimization we therefore just need to check that it +// is valid to merge the shift mask (^(uint64(0)< (RISBGZ x {(*r.InMerge(^uint64(0)<>c) != nil => (RISBGZ x {(*r.InMerge(^uint64(0)>>c)).RotateLeft(-c)}) + +// Absorb 'rotate then insert selected bits [into zero]' into left shift. +(SLDconst (RISBGZ x {r}) [c]) + && s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil + => (RISBGZ x {(*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + +// Absorb 'rotate then insert selected bits [into zero]' into right shift. +(SRDconst (RISBGZ x {r}) [c]) + && s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil + => (RISBGZ x {(*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + +// Merge 'rotate then insert selected bits [into zero]' instructions together. +(RISBGZ (RISBGZ x {y}) {z}) + && z.InMerge(y.OutMask()) != nil + => (RISBGZ x {(*z.InMerge(y.OutMask())).RotateLeft(y.Amount)}) + +// Convert RISBGZ into 64-bit shift (helps CSE). +(RISBGZ x {r}) && r.End == 63 && r.Start == -r.Amount&63 => (SRDconst x [-r.Amount&63]) +(RISBGZ x {r}) && r.Start == 0 && r.End == 63-r.Amount => (SLDconst x [r.Amount]) + +// Optimize single bit isolation when it is known to be equivalent to +// the most significant bit due to mask produced by arithmetic shift. +// Simply isolate the most significant bit itself and place it in the +// correct position. +// +// Example: (int64(x) >> 63) & 0x8 -> RISBGZ $60, $60, $4, Rsrc, Rdst +(RISBGZ (SRADconst x [c]) {r}) + && r.Start == r.End // single bit selected + && (r.Start+r.Amount)&63 <= c // equivalent to most significant bit of x + => (RISBGZ x {s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)}) + // Canonicalize the order of arguments to comparisons - helps with CSE. ((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID => (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x)) -// Using MOV{W,H,B}Zreg instead of AND is cheaper. -(AND x (MOVDconst [0xFF])) => (MOVBZreg x) -(AND x (MOVDconst [0xFFFF])) => (MOVHZreg x) -(AND x (MOVDconst [0xFFFFFFFF])) => (MOVWZreg x) -(ANDWconst [0xFF] x) => (MOVBZreg x) -(ANDWconst [0xFFFF] x) => (MOVHZreg x) +// Use sign/zero extend instead of RISBGZ. +(RISBGZ x {r}) && r == s390x.NewRotateParams(56, 63, 0) => (MOVBZreg x) +(RISBGZ x {r}) && r == s390x.NewRotateParams(48, 63, 0) => (MOVHZreg x) +(RISBGZ x {r}) && r == s390x.NewRotateParams(32, 63, 0) => (MOVWZreg x) + +// Use sign/zero extend instead of ANDW. +(ANDWconst [0x00ff] x) => (MOVBZreg x) +(ANDWconst [0xffff] x) => (MOVHZreg x) // Strength reduce multiplication to the sum (or difference) of two powers of two. // @@ -773,21 +867,22 @@ // detect attempts to set/clear the sign bit // may need to be reworked when NIHH/OIHH are added -(SRDconst [1] (SLDconst [1] (LGDR x))) => (LGDR (LPDFR x)) -(LDGR (SRDconst [1] (SLDconst [1] x))) => (LPDFR (LDGR x)) -(AND (MOVDconst [^(-1<<63)]) (LGDR x)) => (LGDR (LPDFR x)) -(LDGR (AND (MOVDconst [^(-1<<63)]) x)) => (LPDFR (LDGR x)) -(OR (MOVDconst [-1<<63]) (LGDR x)) => (LGDR (LNDFR x)) -(LDGR (OR (MOVDconst [-1<<63]) x)) => (LNDFR (LDGR x)) +(RISBGZ (LGDR x) {r}) && r == s390x.NewRotateParams(1, 63, 0) => (LGDR (LPDFR x)) +(LDGR (RISBGZ x {r})) && r == s390x.NewRotateParams(1, 63, 0) => (LPDFR (LDGR x)) +(OR (MOVDconst [-1<<63]) (LGDR x)) => (LGDR (LNDFR x)) +(LDGR (OR (MOVDconst [-1<<63]) x)) => (LNDFR (LDGR x)) // detect attempts to set the sign bit with load (LDGR x:(ORload [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (LNDFR (LDGR (MOVDload [off] {sym} ptr mem))) // detect copysign -(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) => (LGDR (CPSDR y x)) -(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) -(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR y))) => (LGDR (CPSDR y x)) -(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) +(OR (RISBGZ (LGDR x) {r}) (LGDR (LPDFR y))) + && r == s390x.NewRotateParams(0, 0, 0) + => (LGDR (CPSDR y x)) +(OR (RISBGZ (LGDR x) {r}) (MOVDconst [c])) + && c >= 0 + && r == s390x.NewRotateParams(0, 0, 0) + => (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) (CPSDR y (FMOVDconst [c])) && !math.Signbit(c) => (LPDFR y) (CPSDR y (FMOVDconst [c])) && math.Signbit(c) => (LNDFR y) @@ -966,6 +1061,9 @@ (CMPWconst (ANDWconst _ [m]) [n]) && int32(m) >= 0 && int32(m) < int32(n) => (FlagLT) (CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) => (FlagLT) +(CMPconst (RISBGZ x {r}) [c]) && c > 0 && r.OutMask() < uint64(c) => (FlagLT) +(CMPUconst (RISBGZ x {r}) [c]) && r.OutMask() < uint64(uint32(c)) => (FlagLT) + // Constant compare-and-branch with immediate. (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Equal != 0 && int64(x) == int64(y) => (First yes no) (CGIJ {c} (MOVDconst [x]) [y] yes no) && c&s390x.Less != 0 && int64(x) < int64(y) => (First yes no) diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go index 728cfb5508..f0cf2f2f6e 100644 --- a/src/cmd/compile/internal/ssa/gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go @@ -331,25 +331,26 @@ func init() { {name: "LTEBR", argLength: 1, reg: fp1flags, asm: "LTEBR", typ: "Flags"}, // arg0 compare to 0, f32 {name: "SLD", argLength: 2, reg: sh21, asm: "SLD"}, // arg0 << arg1, shift amount is mod 64 - {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"}, // arg0 << arg1, shift amount is mod 32 + {name: "SLW", argLength: 2, reg: sh21, asm: "SLW"}, // arg0 << arg1, shift amount is mod 64 {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int8"}, // arg0 << auxint, shift amount 0-63 {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int8"}, // arg0 << auxint, shift amount 0-31 {name: "SRD", argLength: 2, reg: sh21, asm: "SRD"}, // unsigned arg0 >> arg1, shift amount is mod 64 - {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32 + {name: "SRW", argLength: 2, reg: sh21, asm: "SRW"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 64 {name: "SRDconst", argLength: 1, reg: gp11, asm: "SRD", aux: "Int8"}, // unsigned arg0 >> auxint, shift amount 0-63 {name: "SRWconst", argLength: 1, reg: gp11, asm: "SRW", aux: "Int8"}, // unsigned uint32(arg0) >> auxint, shift amount 0-31 // Arithmetic shifts clobber flags. {name: "SRAD", argLength: 2, reg: sh21, asm: "SRAD", clobberFlags: true}, // signed arg0 >> arg1, shift amount is mod 64 - {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true}, // signed int32(arg0) >> arg1, shift amount is mod 32 + {name: "SRAW", argLength: 2, reg: sh21, asm: "SRAW", clobberFlags: true}, // signed int32(arg0) >> arg1, shift amount is mod 64 {name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int8", clobberFlags: true}, // signed arg0 >> auxint, shift amount 0-63 {name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int8", clobberFlags: true}, // signed int32(arg0) >> auxint, shift amount 0-31 - {name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"}, // arg0 rotate left arg1, rotate amount 0-63 - {name: "RLL", argLength: 2, reg: sh21, asm: "RLL"}, // arg0 rotate left arg1, rotate amount 0-31 - {name: "RLLGconst", argLength: 1, reg: gp11, asm: "RLLG", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-63 - {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-31 + // Rotate instructions. + // Note: no RLLGconst - use RISBGZ instead. + {name: "RLLG", argLength: 2, reg: sh21, asm: "RLLG"}, // arg0 rotate left arg1, rotate amount 0-63 + {name: "RLL", argLength: 2, reg: sh21, asm: "RLL"}, // arg0 rotate left arg1, rotate amount 0-31 + {name: "RLLconst", argLength: 1, reg: gp11, asm: "RLL", aux: "Int8"}, // arg0 rotate left auxint, rotate amount 0-31 // Rotate then (and|or|xor|insert) selected bits instructions. // @@ -371,6 +372,7 @@ func init() { // +-------------+-------+-----+--------+-----------------------+-----------------------+-----------------------+ // {name: "RXSBG", argLength: 2, reg: gp21, asm: "RXSBG", resultInArg0: true, aux: "S390XRotateParams", clobberFlags: true}, // rotate then xor selected bits + {name: "RISBGZ", argLength: 1, reg: gp11, asm: "RISBGZ", aux: "S390XRotateParams", clobberFlags: true}, // rotate then insert selected bits [into zero] // unary ops {name: "NEG", argLength: 1, reg: gp11, asm: "NEG", clobberFlags: true}, // -arg0 @@ -547,9 +549,9 @@ func init() { // Atomic bitwise operations. // Note: 'floor' operations round the pointer down to the nearest word boundary // which reflects how they are used in the runtime. - {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem. + {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem. {name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem. - {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem. + {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem. {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem. // Compare and swap. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index c0b663cd8f..eceef1d91a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2285,9 +2285,9 @@ const ( OpS390XSRAWconst OpS390XRLLG OpS390XRLL - OpS390XRLLGconst OpS390XRLLconst OpS390XRXSBG + OpS390XRISBGZ OpS390XNEG OpS390XNEGW OpS390XNOT @@ -30740,10 +30740,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "RLLGconst", + name: "RLLconst", auxType: auxInt8, argLen: 1, - asm: s390x.ARLLG, + asm: s390x.ARLL, reg: regInfo{ inputs: []inputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 @@ -30754,13 +30754,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "RLLconst", - auxType: auxInt8, - argLen: 1, - asm: s390x.ARLL, + name: "RXSBG", + auxType: auxS390XRotateParams, + argLen: 2, + resultInArg0: true, + clobberFlags: true, + asm: s390x.ARXSBG, reg: regInfo{ inputs: []inputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 + {1, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 }, outputs: []outputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 @@ -30768,16 +30771,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "RXSBG", + name: "RISBGZ", auxType: auxS390XRotateParams, - argLen: 2, - resultInArg0: true, + argLen: 1, clobberFlags: true, - asm: s390x.ARXSBG, + asm: s390x.ARISBGZ, reg: regInfo{ inputs: []inputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 - {1, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 }, outputs: []outputInfo{ {0, 23551}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14 diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index 8c3c61d584..d66113d111 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -699,6 +699,8 @@ func rewriteValueS390X(v *Value) bool { return rewriteValueS390X_OpS390XORconst(v) case OpS390XORload: return rewriteValueS390X_OpS390XORload(v) + case OpS390XRISBGZ: + return rewriteValueS390X_OpS390XRISBGZ(v) case OpS390XRLL: return rewriteValueS390X_OpS390XRLL(v) case OpS390XRLLG: @@ -5272,9 +5274,8 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool { } break } - // match: (ADD (SLDconst x [c]) (SRDconst x [d])) - // cond: d == 64-c - // result: (RLLGconst [c] x) + // match: (ADD (SLDconst x [c]) (SRDconst x [64-c])) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLDconst { @@ -5282,15 +5283,11 @@ func rewriteValueS390X_OpS390XADD(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRDconst { + if v_1.Op != OpS390XSRDconst || auxIntToInt8(v_1.AuxInt) != 64-c || x != v_1.Args[0] { continue } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 64-c) { - continue - } - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, c)) v.AddArg(x) return true } @@ -5470,9 +5467,8 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool { } break } - // match: (ADDW (SLWconst x [c]) (SRWconst x [d])) - // cond: d == 32-c - // result: (RLLconst [c] x) + // match: (ADDW (SLWconst x [c]) (SRWconst x [32-c])) + // result: (RLLconst x [c]) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLWconst { @@ -5480,11 +5476,7 @@ func rewriteValueS390X_OpS390XADDW(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRWconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 32-c) { + if v_1.Op != OpS390XSRWconst || auxIntToInt8(v_1.AuxInt) != 32-c || x != v_1.Args[0] { continue } v.reset(OpS390XRLLconst) @@ -5844,8 +5836,8 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (AND x (MOVDconst [c])) - // cond: is32Bit(c) && c < 0 - // result: (ANDconst [c] x) + // cond: s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil + // result: (RISBGZ x {*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 @@ -5853,19 +5845,19 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool { continue } c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c) && c < 0) { + if !(s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c)) != nil) { continue } - v.reset(OpS390XANDconst) - v.AuxInt = int64ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*s390x.NewRotateParams(0, 63, 0).OutMerge(uint64(c))) v.AddArg(x) return true } break } // match: (AND x (MOVDconst [c])) - // cond: is32Bit(c) && c >= 0 - // result: (MOVWZreg (ANDWconst [int32(c)] x)) + // cond: is32Bit(c) && c < 0 + // result: (ANDconst [c] x) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 @@ -5873,72 +5865,32 @@ func rewriteValueS390X_OpS390XAND(v *Value) bool { continue } c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c) && c >= 0) { - continue - } - v.reset(OpS390XMOVWZreg) - v0 := b.NewValue0(v.Pos, OpS390XANDWconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(int32(c)) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } - // match: (AND x (MOVDconst [0xFF])) - // result: (MOVBZreg x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpS390XMOVDconst || auxIntToInt64(v_1.AuxInt) != 0xFF { + if !(is32Bit(c) && c < 0) { continue } - v.reset(OpS390XMOVBZreg) + v.reset(OpS390XANDconst) + v.AuxInt = int64ToAuxInt(c) v.AddArg(x) return true } break } - // match: (AND x (MOVDconst [0xFFFF])) - // result: (MOVHZreg x) + // match: (AND x (MOVDconst [c])) + // cond: is32Bit(c) && c >= 0 + // result: (MOVWZreg (ANDWconst [int32(c)] x)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpS390XMOVDconst || auxIntToInt64(v_1.AuxInt) != 0xFFFF { + if v_1.Op != OpS390XMOVDconst { continue } - v.reset(OpS390XMOVHZreg) - v.AddArg(x) - return true - } - break - } - // match: (AND x (MOVDconst [0xFFFFFFFF])) - // result: (MOVWZreg x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpS390XMOVDconst || auxIntToInt64(v_1.AuxInt) != 0xFFFFFFFF { + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c) && c >= 0) { continue } v.reset(OpS390XMOVWZreg) - v.AddArg(x) - return true - } - break - } - // match: (AND (MOVDconst [^(-1<<63)]) (LGDR x)) - // result: (LGDR (LPDFR x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0.AuxInt) != ^(-1<<63) || v_1.Op != OpS390XLGDR { - continue - } - t := v_1.Type - x := v_1.Args[0] - v.reset(OpS390XLGDR) - v.Type = t - v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0 := b.NewValue0(v.Pos, OpS390XANDWconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(int32(c)) v0.AddArg(x) v.AddArg(v0) return true @@ -6103,10 +6055,10 @@ func rewriteValueS390X_OpS390XANDWconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDWconst [0xFF] x) + // match: (ANDWconst [0x00ff] x) // result: (MOVBZreg x) for { - if auxIntToInt32(v.AuxInt) != 0xFF { + if auxIntToInt32(v.AuxInt) != 0x00ff { break } x := v_0 @@ -6114,10 +6066,10 @@ func rewriteValueS390X_OpS390XANDWconst(v *Value) bool { v.AddArg(x) return true } - // match: (ANDWconst [0xFFFF] x) + // match: (ANDWconst [0xffff] x) // result: (MOVHZreg x) for { - if auxIntToInt32(v.AuxInt) != 0xFFFF { + if auxIntToInt32(v.AuxInt) != 0xffff { break } x := v_0 @@ -6515,6 +6467,21 @@ func rewriteValueS390X_OpS390XCMPUconst(v *Value) bool { v.reset(OpS390XFlagLT) return true } + // match: (CMPUconst (RISBGZ x {r}) [c]) + // cond: r.OutMask() < uint64(uint32(c)) + // result: (FlagLT) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + if !(r.OutMask() < uint64(uint32(c))) { + break + } + v.reset(OpS390XFlagLT) + return true + } // match: (CMPUconst (MOVWZreg x) [c]) // result: (CMPWUconst x [c]) for { @@ -7152,6 +7119,21 @@ func rewriteValueS390X_OpS390XCMPconst(v *Value) bool { v.reset(OpS390XFlagGT) return true } + // match: (CMPconst (RISBGZ x {r}) [c]) + // cond: c > 0 && r.OutMask() < uint64(c) + // result: (FlagLT) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + if !(c > 0 && r.OutMask() < uint64(c)) { + break + } + v.reset(OpS390XFlagLT) + return true + } // match: (CMPconst (MOVWreg x) [c]) // result: (CMPWconst x [c]) for { @@ -7684,47 +7666,25 @@ func rewriteValueS390X_OpS390XFNEGS(v *Value) bool { func rewriteValueS390X_OpS390XLDGR(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (LDGR (SRDconst [1] (SLDconst [1] x))) + // match: (LDGR (RISBGZ x {r})) + // cond: r == s390x.NewRotateParams(1, 63, 0) // result: (LPDFR (LDGR x)) for { t := v.Type - if v_0.Op != OpS390XSRDconst || auxIntToInt8(v_0.AuxInt) != 1 { + if v_0.Op != OpS390XRISBGZ { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XSLDconst || auxIntToInt8(v_0_0.AuxInt) != 1 { + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r == s390x.NewRotateParams(1, 63, 0)) { break } - x := v_0_0.Args[0] v.reset(OpS390XLPDFR) v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) v0.AddArg(x) v.AddArg(v0) return true } - // match: (LDGR (AND (MOVDconst [^(-1<<63)]) x)) - // result: (LPDFR (LDGR x)) - for { - t := v.Type - if v_0.Op != OpS390XAND { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0_0.AuxInt) != ^(-1<<63) { - continue - } - x := v_0_1 - v.reset(OpS390XLPDFR) - v0 := b.NewValue0(v.Pos, OpS390XLDGR, t) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } // match: (LDGR (OR (MOVDconst [-1<<63]) x)) // result: (LNDFR (LDGR x)) for { @@ -8309,6 +8269,23 @@ func rewriteValueS390X_OpS390XMOVBZreg(v *Value) bool { v.copyOf(x) return true } + // match: (MOVBZreg (RISBGZ x {r})) + // cond: r.OutMerge(0x000000ff) != nil + // result: (RISBGZ x {*r.OutMerge(0x000000ff)}) + for { + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r.OutMerge(0x000000ff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.OutMerge(0x000000ff)) + v.AddArg(x) + return true + } // match: (MOVBZreg (ANDWconst [m] x)) // result: (MOVWZreg (ANDWconst [int32( uint8(m))] x)) for { @@ -9697,6 +9674,23 @@ func rewriteValueS390X_OpS390XMOVHZreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(uint16(c))) return true } + // match: (MOVHZreg (RISBGZ x {r})) + // cond: r.OutMerge(0x0000ffff) != nil + // result: (RISBGZ x {*r.OutMerge(0x0000ffff)}) + for { + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r.OutMerge(0x0000ffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.OutMerge(0x0000ffff)) + v.AddArg(x) + return true + } // match: (MOVHZreg (ANDWconst [m] x)) // result: (MOVWZreg (ANDWconst [int32(uint16(m))] x)) for { @@ -10547,6 +10541,23 @@ func rewriteValueS390X_OpS390XMOVWZreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(uint32(c))) return true } + // match: (MOVWZreg (RISBGZ x {r})) + // cond: r.OutMerge(0xffffffff) != nil + // result: (RISBGZ x {*r.OutMerge(0xffffffff)}) + for { + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(r.OutMerge(0xffffffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.OutMerge(0xffffffff)) + v.AddArg(x) + return true + } return false } func rewriteValueS390X_OpS390XMOVWload(v *Value) bool { @@ -11622,9 +11633,8 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: ( OR (SLDconst x [c]) (SRDconst x [d])) - // cond: d == 64-c - // result: (RLLGconst [c] x) + // match: (OR (SLDconst x [c]) (SRDconst x [64-c])) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLDconst { @@ -11632,15 +11642,11 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRDconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 64-c) { + if v_1.Op != OpS390XSRDconst || auxIntToInt8(v_1.AuxInt) != 64-c || x != v_1.Args[0] { continue } - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, c)) v.AddArg(x) return true } @@ -11664,22 +11670,20 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR y))) + // match: (OR (RISBGZ (LGDR x) {r}) (LGDR (LPDFR y))) + // cond: r == s390x.NewRotateParams(0, 0, 0) // result: (LGDR (CPSDR y x)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XSLDconst || auxIntToInt8(v_0.AuxInt) != 63 { + if v_0.Op != OpS390XRISBGZ { continue } + r := auxToS390xRotateParams(v_0.Aux) v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XSRDconst || auxIntToInt8(v_0_0.AuxInt) != 63 { - continue - } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpS390XLGDR { + if v_0_0.Op != OpS390XLGDR { continue } - x := v_0_0_0.Args[0] + x := v_0_0.Args[0] if v_1.Op != OpS390XLGDR { continue } @@ -11689,6 +11693,9 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } t := v_1_0.Type y := v_1_0.Args[0] + if !(r == s390x.NewRotateParams(0, 0, 0)) { + continue + } v.reset(OpS390XLGDR) v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) v0.AddArg2(y, x) @@ -11697,28 +11704,25 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) - // cond: c & -1<<63 == 0 + // match: (OR (RISBGZ (LGDR x) {r}) (MOVDconst [c])) + // cond: c >= 0 && r == s390x.NewRotateParams(0, 0, 0) // result: (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XSLDconst || auxIntToInt8(v_0.AuxInt) != 63 { + if v_0.Op != OpS390XRISBGZ { continue } + r := auxToS390xRotateParams(v_0.Aux) v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XSRDconst || auxIntToInt8(v_0_0.AuxInt) != 63 { + if v_0_0.Op != OpS390XLGDR { continue } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpS390XLGDR { - continue - } - x := v_0_0_0.Args[0] + x := v_0_0.Args[0] if v_1.Op != OpS390XMOVDconst { continue } c := auxIntToInt64(v_1.AuxInt) - if !(c&-1<<63 == 0) { + if !(c >= 0 && r == s390x.NewRotateParams(0, 0, 0)) { continue } v.reset(OpS390XLGDR) @@ -11731,73 +11735,6 @@ func rewriteValueS390X_OpS390XOR(v *Value) bool { } break } - // match: (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR y))) - // result: (LGDR (CPSDR y x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XAND { - continue - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { - if v_0_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0_0.AuxInt) != -1<<63 || v_0_1.Op != OpS390XLGDR { - continue - } - x := v_0_1.Args[0] - if v_1.Op != OpS390XLGDR { - continue - } - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpS390XLPDFR { - continue - } - t := v_1_0.Type - y := v_1_0.Args[0] - v.reset(OpS390XLGDR) - v0 := b.NewValue0(v.Pos, OpS390XCPSDR, t) - v0.AddArg2(y, x) - v.AddArg(v0) - return true - } - } - break - } - // match: (OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) - // cond: c & -1<<63 == 0 - // result: (LGDR (CPSDR (FMOVDconst [math.Float64frombits(uint64(c))]) x)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpS390XAND { - continue - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { - if v_0_0.Op != OpS390XMOVDconst || auxIntToInt64(v_0_0.AuxInt) != -1<<63 || v_0_1.Op != OpS390XLGDR { - continue - } - x := v_0_1.Args[0] - if v_1.Op != OpS390XMOVDconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(c&-1<<63 == 0) { - continue - } - v.reset(OpS390XLGDR) - v0 := b.NewValue0(v.Pos, OpS390XCPSDR, x.Type) - v1 := b.NewValue0(v.Pos, OpS390XFMOVDconst, x.Type) - v1.AuxInt = float64ToAuxInt(math.Float64frombits(uint64(c))) - v0.AddArg2(v1, x) - v.AddArg(v0) - return true - } - } - break - } // match: (OR (MOVDconst [c]) (MOVDconst [d])) // result: (MOVDconst [c|d]) for { @@ -12394,9 +12331,8 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool { } break } - // match: ( ORW (SLWconst x [c]) (SRWconst x [d])) - // cond: d == 32-c - // result: (RLLconst [c] x) + // match: (ORW (SLWconst x [c]) (SRWconst x [32-c])) + // result: (RLLconst x [c]) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLWconst { @@ -12404,11 +12340,7 @@ func rewriteValueS390X_OpS390XORW(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRWconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 32-c) { + if v_1.Op != OpS390XSRWconst || auxIntToInt8(v_1.AuxInt) != 32-c || x != v_1.Args[0] { continue } v.reset(OpS390XRLLconst) @@ -12980,6 +12912,221 @@ func rewriteValueS390X_OpS390XORload(v *Value) bool { } return false } +func rewriteValueS390X_OpS390XRISBGZ(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (RISBGZ (MOVWZreg x) {r}) + // cond: r.InMerge(0xffffffff) != nil + // result: (RISBGZ x {*r.InMerge(0xffffffff)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XMOVWZreg { + break + } + x := v_0.Args[0] + if !(r.InMerge(0xffffffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.InMerge(0xffffffff)) + v.AddArg(x) + return true + } + // match: (RISBGZ (MOVHZreg x) {r}) + // cond: r.InMerge(0x0000ffff) != nil + // result: (RISBGZ x {*r.InMerge(0x0000ffff)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XMOVHZreg { + break + } + x := v_0.Args[0] + if !(r.InMerge(0x0000ffff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.InMerge(0x0000ffff)) + v.AddArg(x) + return true + } + // match: (RISBGZ (MOVBZreg x) {r}) + // cond: r.InMerge(0x000000ff) != nil + // result: (RISBGZ x {*r.InMerge(0x000000ff)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XMOVBZreg { + break + } + x := v_0.Args[0] + if !(r.InMerge(0x000000ff) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(*r.InMerge(0x000000ff)) + v.AddArg(x) + return true + } + // match: (RISBGZ (SLDconst x [c]) {r}) + // cond: r.InMerge(^uint64(0)<>c) != nil + // result: (RISBGZ x {(*r.InMerge(^uint64(0)>>c)).RotateLeft(-c)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XSRDconst { + break + } + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + if !(r.InMerge(^uint64(0)>>c) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*r.InMerge(^uint64(0) >> c)).RotateLeft(-c)) + v.AddArg(x) + return true + } + // match: (RISBGZ (RISBGZ x {y}) {z}) + // cond: z.InMerge(y.OutMask()) != nil + // result: (RISBGZ x {(*z.InMerge(y.OutMask())).RotateLeft(y.Amount)}) + for { + z := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XRISBGZ { + break + } + y := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(z.InMerge(y.OutMask()) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*z.InMerge(y.OutMask())).RotateLeft(y.Amount)) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r.End == 63 && r.Start == -r.Amount&63 + // result: (SRDconst x [-r.Amount&63]) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r.End == 63 && r.Start == -r.Amount&63) { + break + } + v.reset(OpS390XSRDconst) + v.AuxInt = int8ToAuxInt(-r.Amount & 63) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r.Start == 0 && r.End == 63-r.Amount + // result: (SLDconst x [r.Amount]) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r.Start == 0 && r.End == 63-r.Amount) { + break + } + v.reset(OpS390XSLDconst) + v.AuxInt = int8ToAuxInt(r.Amount) + v.AddArg(x) + return true + } + // match: (RISBGZ (SRADconst x [c]) {r}) + // cond: r.Start == r.End && (r.Start+r.Amount)&63 <= c + // result: (RISBGZ x {s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)}) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XSRADconst { + break + } + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + if !(r.Start == r.End && (r.Start+r.Amount)&63 <= c) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(r.Start, r.Start, -r.Start&63)) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r == s390x.NewRotateParams(56, 63, 0) + // result: (MOVBZreg x) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r == s390x.NewRotateParams(56, 63, 0)) { + break + } + v.reset(OpS390XMOVBZreg) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r == s390x.NewRotateParams(48, 63, 0) + // result: (MOVHZreg x) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r == s390x.NewRotateParams(48, 63, 0)) { + break + } + v.reset(OpS390XMOVHZreg) + v.AddArg(x) + return true + } + // match: (RISBGZ x {r}) + // cond: r == s390x.NewRotateParams(32, 63, 0) + // result: (MOVWZreg x) + for { + r := auxToS390xRotateParams(v.Aux) + x := v_0 + if !(r == s390x.NewRotateParams(32, 63, 0)) { + break + } + v.reset(OpS390XMOVWZreg) + v.AddArg(x) + return true + } + // match: (RISBGZ (LGDR x) {r}) + // cond: r == s390x.NewRotateParams(1, 63, 0) + // result: (LGDR (LPDFR x)) + for { + r := auxToS390xRotateParams(v.Aux) + if v_0.Op != OpS390XLGDR { + break + } + t := v_0.Type + x := v_0.Args[0] + if !(r == s390x.NewRotateParams(1, 63, 0)) { + break + } + v.reset(OpS390XLGDR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) + v0.AddArg(x) + v.AddArg(v0) + return true + } + return false +} func rewriteValueS390X_OpS390XRLL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -13002,15 +13149,15 @@ func rewriteValueS390X_OpS390XRLLG(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (RLLG x (MOVDconst [c])) - // result: (RLLGconst x [int8(c&63)]) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, int8(c&63))}) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, int8(c&63))) v.AddArg(x) return true } @@ -13034,6 +13181,23 @@ func rewriteValueS390X_OpS390XSLD(v *Value) bool { v.AddArg(x) return true } + // match: (SLD x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SLD x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSLD) + v.AddArg2(x, y) + return true + } // match: (SLD x (AND (MOVDconst [c]) y)) // result: (SLD x (ANDWconst [int32(c&63)] y)) for { @@ -13152,6 +13316,38 @@ func rewriteValueS390X_OpS390XSLD(v *Value) bool { } func rewriteValueS390X_OpS390XSLDconst(v *Value) bool { v_0 := v.Args[0] + // match: (SLDconst (SRDconst x [c]) [d]) + // result: (RISBGZ x {s390x.NewRotateParams(max8(0, c-d), 63-d, (d-c)&63)}) + for { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XSRDconst { + break + } + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(max8(0, c-d), 63-d, (d-c)&63)) + v.AddArg(x) + return true + } + // match: (SLDconst (RISBGZ x {r}) [c]) + // cond: s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil + // result: (RISBGZ x {(*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask()) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*s390x.NewRotateParams(0, 63-c, c).InMerge(r.OutMask())).RotateLeft(r.Amount)) + v.AddArg(x) + return true + } // match: (SLDconst x [0]) // result: x for { @@ -13170,18 +13366,54 @@ func rewriteValueS390X_OpS390XSLW(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (SLW x (MOVDconst [c])) - // result: (SLWconst x [int8(c&63)]) + // cond: c&32 == 0 + // result: (SLWconst x [int8(c&31)]) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) + if !(c&32 == 0) { + break + } v.reset(OpS390XSLWconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AuxInt = int8ToAuxInt(int8(c & 31)) v.AddArg(x) return true } + // match: (SLW _ (MOVDconst [c])) + // cond: c&32 != 0 + // result: (MOVDconst [0]) + for { + if v_1.Op != OpS390XMOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(c&32 != 0) { + break + } + v.reset(OpS390XMOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SLW x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SLW x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSLW) + v.AddArg2(x, y) + return true + } // match: (SLW x (AND (MOVDconst [c]) y)) // result: (SLW x (ANDWconst [int32(c&63)] y)) for { @@ -13330,6 +13562,23 @@ func rewriteValueS390X_OpS390XSRAD(v *Value) bool { v.AddArg(x) return true } + // match: (SRAD x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRAD x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRAD) + v.AddArg2(x, y) + return true + } // match: (SRAD x (AND (MOVDconst [c]) y)) // result: (SRAD x (ANDWconst [int32(c&63)] y)) for { @@ -13478,18 +13727,56 @@ func rewriteValueS390X_OpS390XSRAW(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (SRAW x (MOVDconst [c])) - // result: (SRAWconst x [int8(c&63)]) + // cond: c&32 == 0 + // result: (SRAWconst x [int8(c&31)]) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) + if !(c&32 == 0) { + break + } v.reset(OpS390XSRAWconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AuxInt = int8ToAuxInt(int8(c & 31)) v.AddArg(x) return true } + // match: (SRAW x (MOVDconst [c])) + // cond: c&32 != 0 + // result: (SRAWconst x [31]) + for { + x := v_0 + if v_1.Op != OpS390XMOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(c&32 != 0) { + break + } + v.reset(OpS390XSRAWconst) + v.AuxInt = int8ToAuxInt(31) + v.AddArg(x) + return true + } + // match: (SRAW x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRAW x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRAW) + v.AddArg2(x, y) + return true + } // match: (SRAW x (AND (MOVDconst [c]) y)) // result: (SRAW x (ANDWconst [int32(c&63)] y)) for { @@ -13650,6 +13937,23 @@ func rewriteValueS390X_OpS390XSRD(v *Value) bool { v.AddArg(x) return true } + // match: (SRD x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRD x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRD) + v.AddArg2(x, y) + return true + } // match: (SRD x (AND (MOVDconst [c]) y)) // result: (SRD x (ANDWconst [int32(c&63)] y)) for { @@ -13768,24 +14072,36 @@ func rewriteValueS390X_OpS390XSRD(v *Value) bool { } func rewriteValueS390X_OpS390XSRDconst(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (SRDconst [1] (SLDconst [1] (LGDR x))) - // result: (LGDR (LPDFR x)) + // match: (SRDconst (SLDconst x [c]) [d]) + // result: (RISBGZ x {s390x.NewRotateParams(d, min8(63, 63-c+d), (c-d)&63)}) for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpS390XSLDconst || auxIntToInt8(v_0.AuxInt) != 1 { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XSLDconst { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpS390XLGDR { + c := auxIntToInt8(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(d, min8(63, 63-c+d), (c-d)&63)) + v.AddArg(x) + return true + } + // match: (SRDconst (RISBGZ x {r}) [c]) + // cond: s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil + // result: (RISBGZ x {(*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)}) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpS390XRISBGZ { break } - t := v_0_0.Type - x := v_0_0.Args[0] - v.reset(OpS390XLGDR) - v.Type = t - v0 := b.NewValue0(v.Pos, OpS390XLPDFR, x.Type) - v0.AddArg(x) - v.AddArg(v0) + r := auxToS390xRotateParams(v_0.Aux) + x := v_0.Args[0] + if !(s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask()) != nil) { + break + } + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux((*s390x.NewRotateParams(c, 63, -c&63).InMerge(r.OutMask())).RotateLeft(r.Amount)) + v.AddArg(x) return true } // match: (SRDconst x [0]) @@ -13806,18 +14122,54 @@ func rewriteValueS390X_OpS390XSRW(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (SRW x (MOVDconst [c])) - // result: (SRWconst x [int8(c&63)]) + // cond: c&32 == 0 + // result: (SRWconst x [int8(c&31)]) for { x := v_0 if v_1.Op != OpS390XMOVDconst { break } c := auxIntToInt64(v_1.AuxInt) + if !(c&32 == 0) { + break + } v.reset(OpS390XSRWconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AuxInt = int8ToAuxInt(int8(c & 31)) v.AddArg(x) return true } + // match: (SRW _ (MOVDconst [c])) + // cond: c&32 != 0 + // result: (MOVDconst [0]) + for { + if v_1.Op != OpS390XMOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + if !(c&32 != 0) { + break + } + v.reset(OpS390XMOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SRW x (RISBGZ y {r})) + // cond: r.Amount == 0 && r.OutMask()&63 == 63 + // result: (SRW x y) + for { + x := v_0 + if v_1.Op != OpS390XRISBGZ { + break + } + r := auxToS390xRotateParams(v_1.Aux) + y := v_1.Args[0] + if !(r.Amount == 0 && r.OutMask()&63 == 63) { + break + } + v.reset(OpS390XSRW) + v.AddArg2(x, y) + return true + } // match: (SRW x (AND (MOVDconst [c]) y)) // result: (SRW x (ANDWconst [int32(c&63)] y)) for { @@ -14564,9 +14916,8 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool { } break } - // match: (XOR (SLDconst x [c]) (SRDconst x [d])) - // cond: d == 64-c - // result: (RLLGconst [c] x) + // match: (XOR (SLDconst x [c]) (SRDconst x [64-c])) + // result: (RISBGZ x {s390x.NewRotateParams(0, 63, c)}) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLDconst { @@ -14574,15 +14925,11 @@ func rewriteValueS390X_OpS390XXOR(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRDconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 64-c) { + if v_1.Op != OpS390XSRDconst || auxIntToInt8(v_1.AuxInt) != 64-c || x != v_1.Args[0] { continue } - v.reset(OpS390XRLLGconst) - v.AuxInt = int8ToAuxInt(c) + v.reset(OpS390XRISBGZ) + v.Aux = s390xRotateParamsToAux(s390x.NewRotateParams(0, 63, c)) v.AddArg(x) return true } @@ -14665,9 +15012,8 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool { } break } - // match: (XORW (SLWconst x [c]) (SRWconst x [d])) - // cond: d == 32-c - // result: (RLLconst [c] x) + // match: (XORW (SLWconst x [c]) (SRWconst x [32-c])) + // result: (RLLconst x [c]) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpS390XSLWconst { @@ -14675,11 +15021,7 @@ func rewriteValueS390X_OpS390XXORW(v *Value) bool { } c := auxIntToInt8(v_0.AuxInt) x := v_0.Args[0] - if v_1.Op != OpS390XSRWconst { - continue - } - d := auxIntToInt8(v_1.AuxInt) - if x != v_1.Args[0] || !(d == 32-c) { + if v_1.Op != OpS390XSRWconst || auxIntToInt8(v_1.AuxInt) != 32-c || x != v_1.Args[0] { continue } v.reset(OpS390XRLLconst) diff --git a/src/cmd/internal/obj/s390x/rotate.go b/src/cmd/internal/obj/s390x/rotate.go index fd2d5482db..7dbc45e648 100644 --- a/src/cmd/internal/obj/s390x/rotate.go +++ b/src/cmd/internal/obj/s390x/rotate.go @@ -4,6 +4,10 @@ package s390x +import ( + "math/bits" +) + // RotateParams represents the immediates required for a "rotate // then ... selected bits instruction". // @@ -24,12 +28,18 @@ package s390x // input left by. Note that this rotation is performed // before the masked region is used. type RotateParams struct { - Start uint8 // big-endian start bit index [0..63] - End uint8 // big-endian end bit index [0..63] - Amount uint8 // amount to rotate left + Start int8 // big-endian start bit index [0..63] + End int8 // big-endian end bit index [0..63] + Amount int8 // amount to rotate left } -func NewRotateParams(start, end, amount int64) RotateParams { +// NewRotateParams creates a set of parameters representing a +// rotation left by the amount provided and a selection of the bits +// between the provided start and end indexes (inclusive). +// +// The start and end indexes and the rotation amount must all +// be in the range 0-63 inclusive or this function will panic. +func NewRotateParams(start, end, amount int8) RotateParams { if start&^63 != 0 { panic("start out of bounds") } @@ -40,8 +50,66 @@ func NewRotateParams(start, end, amount int64) RotateParams { panic("amount out of bounds") } return RotateParams{ - Start: uint8(start), - End: uint8(end), - Amount: uint8(amount), + Start: start, + End: end, + Amount: amount, } } + +// RotateLeft generates a new set of parameters with the rotation amount +// increased by the given value. The selected bits are left unchanged. +func (r RotateParams) RotateLeft(amount int8) RotateParams { + r.Amount += amount + r.Amount &= 63 + return r +} + +// OutMask provides a mask representing the selected bits. +func (r RotateParams) OutMask() uint64 { + // Note: z must be unsigned for bootstrap compiler + z := uint8(63-r.End+r.Start) & 63 // number of zero bits in mask + return bits.RotateLeft64(^uint64(0)<> 1, outMask: ^uint64(0) >> 1}, + {start: 0, end: 62, amount: 0, inMask: ^uint64(1), outMask: ^uint64(1)}, + {start: 1, end: 62, amount: 0, inMask: ^uint64(3) >> 1, outMask: ^uint64(3) >> 1}, + + // end before start, no rotation + {start: 63, end: 0, amount: 0, inMask: 1<<63 | 1, outMask: 1<<63 | 1}, + {start: 62, end: 0, amount: 0, inMask: 1<<63 | 3, outMask: 1<<63 | 3}, + {start: 63, end: 1, amount: 0, inMask: 3<<62 | 1, outMask: 3<<62 | 1}, + {start: 62, end: 1, amount: 0, inMask: 3<<62 | 3, outMask: 3<<62 | 3}, + + // rotation + {start: 32, end: 63, amount: 32, inMask: 0xffffffff00000000, outMask: 0x00000000ffffffff}, + {start: 48, end: 15, amount: 16, inMask: 0xffffffff00000000, outMask: 0xffff00000000ffff}, + {start: 0, end: 7, amount: -8 & 63, inMask: 0xff, outMask: 0xff << 56}, + } + for i, test := range tests { + r := NewRotateParams(test.start, test.end, test.amount) + if m := r.OutMask(); m != test.outMask { + t.Errorf("out mask %v: want %#x, got %#x", i, test.outMask, m) + } + if m := r.InMask(); m != test.inMask { + t.Errorf("in mask %v: want %#x, got %#x", i, test.inMask, m) + } + } +} + +func TestRotateParamsMerge(t *testing.T) { + tests := []struct { + // inputs + src RotateParams + mask uint64 + + // results + in *RotateParams + out *RotateParams + }{ + { + src: RotateParams{Start: 48, End: 15, Amount: 16}, + mask: 0xffffffffffffffff, + in: &RotateParams{Start: 48, End: 15, Amount: 16}, + out: &RotateParams{Start: 48, End: 15, Amount: 16}, + }, + { + src: RotateParams{Start: 16, End: 47, Amount: 0}, + mask: 0x00000000ffffffff, + in: &RotateParams{Start: 32, End: 47, Amount: 0}, + out: &RotateParams{Start: 32, End: 47, Amount: 0}, + }, + { + src: RotateParams{Start: 16, End: 47, Amount: 0}, + mask: 0xffff00000000ffff, + in: nil, + out: nil, + }, + { + src: RotateParams{Start: 0, End: 63, Amount: 0}, + mask: 0xf7f0000000000000, + in: nil, + out: nil, + }, + { + src: RotateParams{Start: 0, End: 63, Amount: 1}, + mask: 0x000000000000ff00, + in: &RotateParams{Start: 47, End: 54, Amount: 1}, + out: &RotateParams{Start: 48, End: 55, Amount: 1}, + }, + { + src: RotateParams{Start: 32, End: 63, Amount: 32}, + mask: 0xffff00000000ffff, + in: &RotateParams{Start: 32, End: 47, Amount: 32}, + out: &RotateParams{Start: 48, End: 63, Amount: 32}, + }, + { + src: RotateParams{Start: 0, End: 31, Amount: 32}, + mask: 0x8000000000000000, + in: nil, + out: &RotateParams{Start: 0, End: 0, Amount: 32}, + }, + { + src: RotateParams{Start: 0, End: 31, Amount: 32}, + mask: 0x0000000080000000, + in: &RotateParams{Start: 0, End: 0, Amount: 32}, + out: nil, + }, + } + + eq := func(x, y *RotateParams) bool { + if x == nil && y == nil { + return true + } + if x == nil || y == nil { + return false + } + return *x == *y + } + + for _, test := range tests { + if r := test.src.InMerge(test.mask); !eq(r, test.in) { + t.Errorf("%v merged with %#x (input): want %v, got %v", test.src, test.mask, test.in, r) + } + if r := test.src.OutMerge(test.mask); !eq(r, test.out) { + t.Errorf("%v merged with %#x (output): want %v, got %v", test.src, test.mask, test.out, r) + } + } +} diff --git a/test/codegen/bitfield.go b/test/codegen/bitfield.go index 08788f1447..7abc1c2783 100644 --- a/test/codegen/bitfield.go +++ b/test/codegen/bitfield.go @@ -127,11 +127,13 @@ func sbfx6(x int32) int32 { // ubfiz func ubfiz1(x uint64) uint64 { // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND" + // s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND" return (x & 0xfff) << 3 } func ubfiz2(x uint64) uint64 { // arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND" + // s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND" return (x << 4) & 0xfff0 } @@ -149,6 +151,7 @@ func ubfiz5(x uint8) uint64 { func ubfiz6(x uint64) uint64 { // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD" return (x << 4) >> 3 } @@ -159,6 +162,7 @@ func ubfiz7(x uint32) uint32 { func ubfiz8(x uint64) uint64 { // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND" return ((x & 0xfffff) << 4) >> 3 } @@ -169,17 +173,20 @@ func ubfiz9(x uint64) uint64 { func ubfiz10(x uint64) uint64 { // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND" return ((x << 5) & (0xfff << 5)) << 2 } // ubfx func ubfx1(x uint64) uint64 { // arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND" return (x >> 25) & 1023 } func ubfx2(x uint64) uint64 { // arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND" return (x & 0x0ff0) >> 4 } @@ -196,30 +203,37 @@ func ubfx5(x uint8) uint64 { } func ubfx6(x uint64) uint64 { - return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR" + // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD" + return (x << 1) >> 2 } func ubfx7(x uint32) uint32 { - return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR" + // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR" + return (x << 1) >> 2 } func ubfx8(x uint64) uint64 { // arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND" return ((x << 1) >> 2) & 0xfff } func ubfx9(x uint64) uint64 { // arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND" return ((x >> 3) & 0xfff) >> 1 } func ubfx10(x uint64) uint64 { // arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD" return ((x >> 2) << 5) >> 8 } func ubfx11(x uint64) uint64 { // arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND" return ((x & 0xfffff) << 3) >> 4 } diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 398dd84e9e..56e0f3474e 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -340,3 +340,15 @@ func bitSetTest(x int) bool { // amd64:"CMPQ\tAX, [$]9" return x&9 == 9 } + +// mask contiguous one bits +func cont1Mask64U(x uint64) uint64 { + // s390x:"RISBGZ\t[$]16, [$]47, [$]0," + return x&0x0000ffffffff0000 +} + +// mask contiguous zero bits +func cont0Mask64U(x uint64) uint64 { + // s390x:"RISBGZ\t[$]48, [$]15, [$]0," + return x&0xffff00000000ffff +} diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 4c35f26997..fff6639546 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -213,7 +213,7 @@ func RotateLeft64(n uint64) uint64 { // arm64:"ROR" // ppc64:"ROTL" // ppc64le:"ROTL" - // s390x:"RLLG" + // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " // wasm:"I64Rotl" return bits.RotateLeft64(n, 37) } diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index 0c8b030970..e0bcd0abbc 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -17,21 +17,21 @@ func rot64(x uint64) uint64 { // amd64:"ROLQ\t[$]7" // arm64:"ROR\t[$]57" - // s390x:"RLLG\t[$]7" + // s390x:"RISBGZ\t[$]0, [$]63, [$]7, " // ppc64:"ROTL\t[$]7" // ppc64le:"ROTL\t[$]7" a += x<<7 | x>>57 // amd64:"ROLQ\t[$]8" // arm64:"ROR\t[$]56" - // s390x:"RLLG\t[$]8" + // s390x:"RISBGZ\t[$]0, [$]63, [$]8, " // ppc64:"ROTL\t[$]8" // ppc64le:"ROTL\t[$]8" a += x<<8 + x>>56 // amd64:"ROLQ\t[$]9" // arm64:"ROR\t[$]55" - // s390x:"RLLG\t[$]9" + // s390x:"RISBGZ\t[$]0, [$]63, [$]9, " // ppc64:"ROTL\t[$]9" // ppc64le:"ROTL\t[$]9" a += x<<9 ^ x>>55 diff --git a/test/codegen/shift.go b/test/codegen/shift.go index a45f27c9cf..d19a1984c1 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -11,84 +11,84 @@ package codegen // ------------------ // func lshMask64x64(v int64, s uint64) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v << (s & 63) } func rshMask64Ux64(v uint64, s uint64) uint64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> (s & 63) } func rshMask64x64(v int64, s uint64) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> (s & 63) } func lshMask32x64(v int32, s uint64) int32 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ISEL",-"ORN" // ppc64:"ISEL",-"ORN" return v << (s & 63) } func rshMask32Ux64(v uint32, s uint64) uint32 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ISEL",-"ORN" // ppc64:"ISEL",-"ORN" return v >> (s & 63) } func rshMask32x64(v int32, s uint64) int32 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ISEL",-"ORN" // ppc64:"ISEL",-"ORN" return v >> (s & 63) } func lshMask64x32(v int64, s uint32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN" // ppc64:"ANDCC",-"ORN" return v << (s & 63) } func rshMask64Ux32(v uint64, s uint32) uint64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN" // ppc64:"ANDCC",-"ORN" return v >> (s & 63) } func rshMask64x32(v int64, s uint32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> (s & 63) } func lshMask64x32Ext(v int64, s int32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v << uint(s&63) } func rshMask64Ux32Ext(v uint64, s int32) uint64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> uint(s&63) } func rshMask64x32Ext(v int64, s int32) int64 { - // s390x:-".*AND",-".*MOVDGE" + // s390x:-"RISBGZ",-"AND",-"LOCGR" // ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64:"ANDCC",-"ORN",-"ISEL" return v >> uint(s&63) @@ -128,7 +128,8 @@ func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) { func rshGuarded64(v int64, s uint) int64 { if s < 64 { - // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" return v >> s } panic("shift too large") @@ -136,7 +137,8 @@ func rshGuarded64(v int64, s uint) int64 { func rshGuarded64U(v uint64, s uint) uint64 { if s < 64 { - // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" return v >> s } panic("shift too large") @@ -144,7 +146,8 @@ func rshGuarded64U(v uint64, s uint) uint64 { func lshGuarded64(v int64, s uint) int64 { if s < 64 { - // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" return v << s } panic("shift too large") -- cgit v1.3 From 189931296f6b56090d9d7f49b7936b817189d87d Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 10 Nov 2020 02:31:28 -0800 Subject: cmd/internal/obj/s390x: fix SYNC instruction encoding SYNC is supposed to correspond to 'fast-BCR-serialization' which is encoded as 'bcr 14,0'. In CL 197178 I accidentally modified the encoding to 'bcr 7,0' which is a no-op. This CL reverses that change. Fixes #42479. Change-Id: I9918d93d720f5e12acc3014cde20d2d32cc87ee5 Reviewed-on: https://go-review.googlesource.com/c/go/+/268797 Run-TryBot: Michael Munday Trust: Michael Munday TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/asm/internal/asm/testdata/s390x.s | 2 ++ src/cmd/internal/obj/s390x/asmz.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s index 03b84cfa62..7c5d26be33 100644 --- a/src/cmd/asm/internal/asm/testdata/s390x.s +++ b/src/cmd/asm/internal/asm/testdata/s390x.s @@ -412,6 +412,8 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- UNDEF // 00000000 NOPH // 0700 + SYNC // 07e0 + // vector add and sub instructions VAB V3, V4, V4 // e743400000f3 VAH V3, V4, V4 // e743400010f3 diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index f0f9d5cefc..06921085c9 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -3700,7 +3700,7 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { } case 80: // sync - zRR(op_BCR, uint32(NotEqual), 0, asm) + zRR(op_BCR, 14, 0, asm) // fast-BCR-serialization case 81: // float to fixed and fixed to float moves (no conversion) switch p.As { -- cgit v1.3 From 9ef65ff137c17bb7975859017abae4e27600c684 Mon Sep 17 00:00:00 2001 From: Alessandro Arzilli Date: Tue, 10 Nov 2020 17:02:27 +0100 Subject: cmd/compile: do not emit an extra debug_line entry for the end of seq addr Uses DW_LNS_advance_pc directly, instead of calling putpclcdelta because the latter will create a new debug_line entry for the end of sequence address. Fixes #42484 Change-Id: Ib6355605cac101b9bf37a3b4961ab0cee678a839 Reviewed-on: https://go-review.googlesource.com/c/go/+/268937 Trust: Than McIntosh Run-TryBot: Than McIntosh TryBot-Result: Go Bot Reviewed-by: Than McIntosh Reviewed-by: Cherry Zhang --- src/cmd/internal/obj/dwarf.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go index 328fb03b24..87c62e2981 100644 --- a/src/cmd/internal/obj/dwarf.go +++ b/src/cmd/internal/obj/dwarf.go @@ -104,7 +104,8 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { // GDB will assign a line number of zero the last row in the line // table, which we don't want. lastlen := uint64(s.Size - (lastpc - s.Func().Text.Pc)) - putpclcdelta(ctxt, dctxt, lines, lastlen, 0) + dctxt.AddUint8(lines, dwarf.DW_LNS_advance_pc) + dwarf.Uleb128put(dctxt, lines, int64(lastlen)) dctxt.AddUint8(lines, 0) // start extended opcode dwarf.Uleb128put(dctxt, lines, 1) dctxt.AddUint8(lines, dwarf.DW_LNE_end_sequence) -- cgit v1.3 From cb674b5c13d331fd5cef5bae7a2a67e5e0d41f7d Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 13 Nov 2020 14:44:25 -0600 Subject: cmd/compile,cmd/asm: fix function pointer call perf regression on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit by inserting hint when using bclrl. Using this instruction as subroutine call is not the expected default behavior, and as a result confuses the branch predictor. The default expected behavior is a conditional return from a subroutine. We can change this assumption by encoding a hint this is not a subroutine return. The regex benchmarks are a pretty good example of how much this hint can help generic ppc64le code on a power9 machine: name old time/op new time/op delta Find 606ns ± 0% 447ns ± 0% -26.27% FindAllNoMatches 309ns ± 0% 205ns ± 0% -33.72% FindString 609ns ± 0% 451ns ± 0% -26.04% FindSubmatch 734ns ± 0% 594ns ± 0% -19.07% FindStringSubmatch 706ns ± 0% 574ns ± 0% -18.83% Literal 177ns ± 0% 136ns ± 0% -22.89% NotLiteral 4.69µs ± 0% 2.34µs ± 0% -50.14% MatchClass 6.05µs ± 0% 3.26µs ± 0% -46.08% MatchClass_InRange 5.93µs ± 0% 3.15µs ± 0% -46.86% ReplaceAll 3.15µs ± 0% 2.18µs ± 0% -30.77% AnchoredLiteralShortNonMatch 156ns ± 0% 109ns ± 0% -30.61% AnchoredLiteralLongNonMatch 192ns ± 0% 136ns ± 0% -29.34% AnchoredShortMatch 268ns ± 0% 209ns ± 0% -22.00% AnchoredLongMatch 472ns ± 0% 357ns ± 0% -24.30% OnePassShortA 1.16µs ± 0% 0.87µs ± 0% -25.03% NotOnePassShortA 1.34µs ± 0% 1.20µs ± 0% -10.63% OnePassShortB 940ns ± 0% 655ns ± 0% -30.29% NotOnePassShortB 873ns ± 0% 703ns ± 0% -19.52% OnePassLongPrefix 258ns ± 0% 155ns ± 0% -40.13% OnePassLongNotPrefix 943ns ± 0% 529ns ± 0% -43.89% MatchParallelShared 591ns ± 0% 436ns ± 0% -26.31% MatchParallelCopied 596ns ± 0% 435ns ± 0% -27.10% QuoteMetaAll 186ns ± 0% 186ns ± 0% -0.16% QuoteMetaNone 55.9ns ± 0% 55.9ns ± 0% +0.02% Compile/Onepass 9.64µs ± 0% 9.26µs ± 0% -3.97% Compile/Medium 21.7µs ± 0% 20.6µs ± 0% -4.90% Compile/Hard 174µs ± 0% 174µs ± 0% +0.07% Match/Easy0/16 7.35ns ± 0% 7.34ns ± 0% -0.11% Match/Easy0/32 116ns ± 0% 97ns ± 0% -16.27% Match/Easy0/1K 592ns ± 0% 562ns ± 0% -5.04% Match/Easy0/32K 12.6µs ± 0% 12.5µs ± 0% -0.64% Match/Easy0/1M 556µs ± 0% 556µs ± 0% -0.00% Match/Easy0/32M 17.7ms ± 0% 17.7ms ± 0% +0.05% Match/Easy0i/16 7.34ns ± 0% 7.35ns ± 0% +0.10% Match/Easy0i/32 2.82µs ± 0% 1.64µs ± 0% -41.71% Match/Easy0i/1K 83.2µs ± 0% 48.2µs ± 0% -42.06% Match/Easy0i/32K 2.13ms ± 0% 1.80ms ± 0% -15.34% Match/Easy0i/1M 68.1ms ± 0% 57.6ms ± 0% -15.31% Match/Easy0i/32M 2.18s ± 0% 1.80s ± 0% -17.52% Match/Easy1/16 7.36ns ± 0% 7.34ns ± 0% -0.24% Match/Easy1/32 118ns ± 0% 96ns ± 0% -18.72% Match/Easy1/1K 2.46µs ± 0% 1.58µs ± 0% -35.65% Match/Easy1/32K 80.2µs ± 0% 54.6µs ± 0% -31.92% Match/Easy1/1M 2.75ms ± 0% 1.88ms ± 0% -31.66% Match/Easy1/32M 87.5ms ± 0% 59.8ms ± 0% -31.62% Match/Medium/16 7.34ns ± 0% 7.34ns ± 0% +0.01% Match/Medium/32 2.60µs ± 0% 1.50µs ± 0% -42.61% Match/Medium/1K 78.1µs ± 0% 43.7µs ± 0% -44.06% Match/Medium/32K 2.08ms ± 0% 1.52ms ± 0% -27.11% Match/Medium/1M 66.5ms ± 0% 48.6ms ± 0% -26.96% Match/Medium/32M 2.14s ± 0% 1.60s ± 0% -25.18% Match/Hard/16 7.35ns ± 0% 7.35ns ± 0% +0.03% Match/Hard/32 3.58µs ± 0% 2.44µs ± 0% -31.82% Match/Hard/1K 108µs ± 0% 75µs ± 0% -31.04% Match/Hard/32K 2.79ms ± 0% 2.25ms ± 0% -19.30% Match/Hard/1M 89.4ms ± 0% 72.2ms ± 0% -19.26% Match/Hard/32M 2.91s ± 0% 2.37s ± 0% -18.60% Match/Hard1/16 11.1µs ± 0% 8.3µs ± 0% -25.07% Match/Hard1/32 21.4µs ± 0% 16.1µs ± 0% -24.85% Match/Hard1/1K 658µs ± 0% 498µs ± 0% -24.27% Match/Hard1/32K 12.2ms ± 0% 11.7ms ± 0% -4.60% Match/Hard1/1M 391ms ± 0% 374ms ± 0% -4.40% Match/Hard1/32M 12.6s ± 0% 12.0s ± 0% -4.68% Match_onepass_regex/16 870ns ± 0% 611ns ± 0% -29.79% Match_onepass_regex/32 1.58µs ± 0% 1.08µs ± 0% -31.48% Match_onepass_regex/1K 45.7µs ± 0% 30.3µs ± 0% -33.58% Match_onepass_regex/32K 1.45ms ± 0% 0.97ms ± 0% -33.20% Match_onepass_regex/1M 46.2ms ± 0% 30.9ms ± 0% -33.01% Match_onepass_regex/32M 1.46s ± 0% 0.99s ± 0% -32.02% name old alloc/op new alloc/op delta Find 0.00B 0.00B 0.00% FindAllNoMatches 0.00B 0.00B 0.00% FindString 0.00B 0.00B 0.00% FindSubmatch 48.0B ± 0% 48.0B ± 0% 0.00% FindStringSubmatch 32.0B ± 0% 32.0B ± 0% 0.00% Compile/Onepass 4.02kB ± 0% 4.02kB ± 0% 0.00% Compile/Medium 9.39kB ± 0% 9.39kB ± 0% 0.00% Compile/Hard 84.7kB ± 0% 84.7kB ± 0% 0.00% Match_onepass_regex/16 0.00B 0.00B 0.00% Match_onepass_regex/32 0.00B 0.00B 0.00% Match_onepass_regex/1K 0.00B 0.00B 0.00% Match_onepass_regex/32K 0.00B 0.00B 0.00% Match_onepass_regex/1M 5.00B ± 0% 3.00B ± 0% -40.00% Match_onepass_regex/32M 136B ± 0% 68B ± 0% -50.00% name old allocs/op new allocs/op delta Find 0.00 0.00 0.00% FindAllNoMatches 0.00 0.00 0.00% FindString 0.00 0.00 0.00% FindSubmatch 1.00 ± 0% 1.00 ± 0% 0.00% FindStringSubmatch 1.00 ± 0% 1.00 ± 0% 0.00% Compile/Onepass 52.0 ± 0% 52.0 ± 0% 0.00% Compile/Medium 112 ± 0% 112 ± 0% 0.00% Compile/Hard 424 ± 0% 424 ± 0% 0.00% Match_onepass_regex/16 0.00 0.00 0.00% Match_onepass_regex/32 0.00 0.00 0.00% Match_onepass_regex/1K 0.00 0.00 0.00% Match_onepass_regex/32K 0.00 0.00 0.00% Match_onepass_regex/1M 0.00 0.00 0.00% Match_onepass_regex/32M 2.00 ± 0% 1.00 ± 0% -50.00% name old speed new speed delta QuoteMetaAll 75.2MB/s ± 0% 75.3MB/s ± 0% +0.15% QuoteMetaNone 465MB/s ± 0% 465MB/s ± 0% -0.02% Match/Easy0/16 2.18GB/s ± 0% 2.18GB/s ± 0% +0.10% Match/Easy0/32 276MB/s ± 0% 330MB/s ± 0% +19.46% Match/Easy0/1K 1.73GB/s ± 0% 1.82GB/s ± 0% +5.29% Match/Easy0/32K 2.60GB/s ± 0% 2.62GB/s ± 0% +0.64% Match/Easy0/1M 1.89GB/s ± 0% 1.89GB/s ± 0% +0.00% Match/Easy0/32M 1.89GB/s ± 0% 1.89GB/s ± 0% -0.05% Match/Easy0i/16 2.18GB/s ± 0% 2.18GB/s ± 0% -0.10% Match/Easy0i/32 11.4MB/s ± 0% 19.5MB/s ± 0% +71.48% Match/Easy0i/1K 12.3MB/s ± 0% 21.2MB/s ± 0% +72.62% Match/Easy0i/32K 15.4MB/s ± 0% 18.2MB/s ± 0% +18.12% Match/Easy0i/1M 15.4MB/s ± 0% 18.2MB/s ± 0% +18.12% Match/Easy0i/32M 15.4MB/s ± 0% 18.6MB/s ± 0% +21.21% Match/Easy1/16 2.17GB/s ± 0% 2.18GB/s ± 0% +0.24% Match/Easy1/32 271MB/s ± 0% 333MB/s ± 0% +23.07% Match/Easy1/1K 417MB/s ± 0% 648MB/s ± 0% +55.38% Match/Easy1/32K 409MB/s ± 0% 600MB/s ± 0% +46.88% Match/Easy1/1M 381MB/s ± 0% 558MB/s ± 0% +46.33% Match/Easy1/32M 383MB/s ± 0% 561MB/s ± 0% +46.25% Match/Medium/16 2.18GB/s ± 0% 2.18GB/s ± 0% -0.01% Match/Medium/32 12.3MB/s ± 0% 21.4MB/s ± 0% +74.13% Match/Medium/1K 13.1MB/s ± 0% 23.4MB/s ± 0% +78.73% Match/Medium/32K 15.7MB/s ± 0% 21.6MB/s ± 0% +37.23% Match/Medium/1M 15.8MB/s ± 0% 21.6MB/s ± 0% +36.93% Match/Medium/32M 15.7MB/s ± 0% 21.0MB/s ± 0% +33.67% Match/Hard/16 2.18GB/s ± 0% 2.18GB/s ± 0% -0.03% Match/Hard/32 8.93MB/s ± 0% 13.10MB/s ± 0% +46.70% Match/Hard/1K 9.48MB/s ± 0% 13.74MB/s ± 0% +44.94% Match/Hard/32K 11.7MB/s ± 0% 14.5MB/s ± 0% +23.87% Match/Hard/1M 11.7MB/s ± 0% 14.5MB/s ± 0% +23.87% Match/Hard/32M 11.6MB/s ± 0% 14.2MB/s ± 0% +22.86% Match/Hard1/16 1.44MB/s ± 0% 1.93MB/s ± 0% +34.03% Match/Hard1/32 1.49MB/s ± 0% 1.99MB/s ± 0% +33.56% Match/Hard1/1K 1.56MB/s ± 0% 2.05MB/s ± 0% +31.41% Match/Hard1/32K 2.68MB/s ± 0% 2.80MB/s ± 0% +4.48% Match/Hard1/1M 2.68MB/s ± 0% 2.80MB/s ± 0% +4.48% Match/Hard1/32M 2.66MB/s ± 0% 2.79MB/s ± 0% +4.89% Match_onepass_regex/16 18.4MB/s ± 0% 26.2MB/s ± 0% +42.41% Match_onepass_regex/32 20.2MB/s ± 0% 29.5MB/s ± 0% +45.92% Match_onepass_regex/1K 22.4MB/s ± 0% 33.8MB/s ± 0% +50.54% Match_onepass_regex/32K 22.6MB/s ± 0% 33.9MB/s ± 0% +49.67% Match_onepass_regex/1M 22.7MB/s ± 0% 33.9MB/s ± 0% +49.27% Match_onepass_regex/32M 23.0MB/s ± 0% 33.9MB/s ± 0% +47.14% Fixes #42709 Change-Id: Ice07fec2de4c5b1302febf8c2978ae8c1e4fd3e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/271337 Reviewed-by: Cherry Zhang Trust: Lynn Boger Trust: Carlos Eduardo Seo --- src/cmd/compile/internal/ppc64/ssa.go | 3 +++ src/cmd/internal/obj/ppc64/asm9.go | 11 +++++++++++ 2 files changed, 14 insertions(+) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 3888aa6527..3e20c44a4c 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -1781,6 +1781,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { pp := s.Call(v) pp.To.Reg = ppc64.REG_LR + // Insert a hint this is not a subroutine return. + pp.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: 1}) + if gc.Ctxt.Flag_shared { // When compiling Go into PIC, the function we just // called via pointer might have been implemented in diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 775d27d8e8..41e263b2c0 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -334,6 +334,7 @@ var optab = []Optab{ {ABC, C_SCON, C_REG, C_NONE, C_SBRA, 16, 4, 0}, {ABC, C_SCON, C_REG, C_NONE, C_LBRA, 17, 4, 0}, {ABR, C_NONE, C_NONE, C_NONE, C_LR, 18, 4, 0}, + {ABR, C_NONE, C_NONE, C_SCON, C_LR, 18, 4, 0}, {ABR, C_NONE, C_NONE, C_NONE, C_CTR, 18, 4, 0}, {ABR, C_REG, C_NONE, C_NONE, C_CTR, 18, 4, 0}, {ABR, C_NONE, C_NONE, C_NONE, C_ZOREG, 15, 8, 0}, @@ -2844,6 +2845,7 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { case 18: /* br/bl (lr/ctr); bc/bcl bo,bi,(lr/ctr) */ var v int32 + var bh uint32 = 0 if p.As == ABC || p.As == ABCL { v = c.regoff(&p.From) & 31 } else { @@ -2865,6 +2867,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) { v = 0 } + // Insert optional branch hint for bclr[l]/bcctr[l] + if p.From3Type() != obj.TYPE_NONE { + bh = uint32(p.GetFrom3().Offset) + if bh == 2 || bh > 3 { + log.Fatalf("BH must be 0,1,3 for %v", p) + } + o1 |= bh << 11 + } + if p.As == ABL || p.As == ABCL { o1 |= 1 } -- cgit v1.3 From f5978a09589badb927d3aa96998fc785524cae02 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sun, 22 Nov 2020 11:52:55 +1100 Subject: cmd/internal/obj/riscv: add tests for BGE/BGEU/BLT/BLTU Add tests for BGE/BGEU/BLT/BLTU branch instructions. Also add pure Go variants of these to ensure that the test data, Go and assembly all match up. Change-Id: I84c68605e116a4e57f6c5c765bf0aaecab84b675 Reviewed-on: https://go-review.googlesource.com/c/go/+/271913 Trust: Joel Sing Reviewed-by: Quey-Liang Kao Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot --- .../obj/riscv/testdata/testbranch/branch_test.go | 54 +++++++++++++++++++++- .../obj/riscv/testdata/testbranch/branch_test.s | 44 ++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go index 803ba8c77c..5412577a05 100644 --- a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go @@ -11,6 +11,8 @@ import ( ) func testBEQZ(a int64) (r bool) +func testBGE(a, b int64) (r bool) +func testBGEU(a, b int64) (r bool) func testBGEZ(a int64) (r bool) func testBGT(a, b int64) (r bool) func testBGTU(a, b int64) (r bool) @@ -18,6 +20,8 @@ func testBGTZ(a int64) (r bool) func testBLE(a, b int64) (r bool) func testBLEU(a, b int64) (r bool) func testBLEZ(a int64) (r bool) +func testBLT(a, b int64) (r bool) +func testBLTU(a, b int64) (r bool) func testBLTZ(a int64) (r bool) func testBNEZ(a int64) (r bool) @@ -29,6 +33,16 @@ func TestBranchCondition(t *testing.T) { fn func(a, b int64) bool want bool }{ + {"BGE", 0, 1, testBGE, false}, + {"BGE", 0, 0, testBGE, true}, + {"BGE", 0, -1, testBGE, true}, + {"BGE", -1, 0, testBGE, false}, + {"BGE", 1, 0, testBGE, true}, + {"BGEU", 0, 1, testBGEU, false}, + {"BGEU", 0, 0, testBGEU, true}, + {"BGEU", 0, -1, testBGEU, false}, + {"BGEU", -1, 0, testBGEU, true}, + {"BGEU", 1, 0, testBGEU, true}, {"BGT", 0, 1, testBGT, true}, {"BGT", 0, 0, testBGT, false}, {"BGT", 0, -1, testBGT, false}, @@ -48,11 +62,49 @@ func TestBranchCondition(t *testing.T) { {"BLEU", 0, 0, testBLEU, true}, {"BLEU", -1, 0, testBLEU, true}, {"BLEU", 1, 0, testBLEU, true}, + {"BLT", 0, 1, testBLT, true}, + {"BLT", 0, -1, testBLT, false}, + {"BLT", 0, 0, testBLT, false}, + {"BLT", -1, 0, testBLT, true}, + {"BLT", 1, 0, testBLT, false}, + {"BLTU", 0, 1, testBLTU, true}, + {"BLTU", 0, -1, testBLTU, true}, + {"BLTU", 0, 0, testBLTU, false}, + {"BLTU", -1, 0, testBLTU, false}, + {"BLTU", 1, 0, testBLTU, false}, } for _, test := range tests { t.Run(test.ins, func(t *testing.T) { + var fn func(a, b int64) bool + switch test.ins { + case "BGE": + fn = func(a, b int64) bool { return a >= b } + case "BGEU": + fn = func(a, b int64) bool { return uint64(a) >= uint64(b) } + case "BGT": + // TODO: Currently reversed. + fn = func(a, b int64) bool { return b > a } + case "BGTU": + // TODO: Currently reversed. + fn = func(a, b int64) bool { return uint64(b) > uint64(a) } + case "BLE": + // TODO: Currently reversed. + fn = func(a, b int64) bool { return b <= a } + case "BLEU": + // TODO: Currently reversed. + fn = func(a, b int64) bool { return uint64(b) <= uint64(a) } + case "BLT": + fn = func(a, b int64) bool { return a < b } + case "BLTU": + fn = func(a, b int64) bool { return uint64(a) < uint64(b) } + default: + t.Fatalf("Unknown instruction %q", test.ins) + } + if got := fn(test.a, test.b); got != test.want { + t.Errorf("Go %v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want) + } if got := test.fn(test.a, test.b); got != test.want { - t.Errorf("%v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want) + t.Errorf("Assembly %v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want) } }) } diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s index 6cff235848..8dd6f563af 100644 --- a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s @@ -16,6 +16,28 @@ b: MOV X6, r+8(FP) RET +// func testBGE(a, b int64) (r bool) +TEXT ·testBGE(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGE X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBGEU(a, b int64) (r bool) +TEXT ·testBGEU(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BGEU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + // func testBGEZ(a int64) (r bool) TEXT ·testBGEZ(SB),NOSPLIT,$0-0 MOV a+0(FP), X5 @@ -90,6 +112,28 @@ b: MOV X6, r+8(FP) RET +// func testBLT(a, b int64) (r bool) +TEXT ·testBLT(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLT X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + +// func testBLTU(a, b int64) (r bool) +TEXT ·testBLTU(SB),NOSPLIT,$0-0 + MOV a+0(FP), X5 + MOV b+8(FP), X6 + MOV $1, X7 + BLTU X5, X6, b + MOV $0, X7 +b: + MOV X7, r+16(FP) + RET + // func testBLTZ(a int64) (r bool) TEXT ·testBLTZ(SB),NOSPLIT,$0-0 MOV a+0(FP), X5 -- cgit v1.3 From 0433845ad18a355413033bb3495ba3195f4c69ec Mon Sep 17 00:00:00 2001 From: Quey-Liang Kao Date: Sat, 21 Nov 2020 22:48:55 +0800 Subject: cmd/asm, cmd/internal/obj/riscv: fix branch pseudo-instructions Pseudo branch instructions BGT, BGTU, BLE, and BLEU implemented In CL 226397 were translated inconsistently compared to other ones due to the inversion of registers. For instance, while "BLT a, b" generates "jump if a < b", "BLE a, b" generates "jump if b <= a." This CL fixes the translation in the assembler and the tests. Change-Id: Ia757be73e848734ca5b3a790e081f7c4f98c30f2 Reviewed-on: https://go-review.googlesource.com/c/go/+/271911 Reviewed-by: Cherry Zhang Reviewed-by: Joel Sing Run-TryBot: Joel Sing --- src/cmd/asm/internal/asm/testdata/riscvenc.s | 8 ++-- src/cmd/internal/obj/riscv/obj.go | 8 ++-- .../obj/riscv/testdata/testbranch/branch_test.go | 49 ++++++++++------------ 3 files changed, 31 insertions(+), 34 deletions(-) (limited to 'src/cmd/internal/obj') diff --git a/src/cmd/asm/internal/asm/testdata/riscvenc.s b/src/cmd/asm/internal/asm/testdata/riscvenc.s index e30a576473..9a49d96ca0 100644 --- a/src/cmd/asm/internal/asm/testdata/riscvenc.s +++ b/src/cmd/asm/internal/asm/testdata/riscvenc.s @@ -340,11 +340,11 @@ start: // Branch pseudo-instructions BEQZ X5, start // BEQZ X5, 2 // e38602c0 BGEZ X5, start // BGEZ X5, 2 // e3d402c0 - BGT X5, X6, start // BGT X5, X6, 2 // e3c262c0 - BGTU X5, X6, start // BGTU X5, X6, 2 // e3e062c0 + BGT X5, X6, start // BGT X5, X6, 2 // e34253c0 + BGTU X5, X6, start // BGTU X5, X6, 2 // e36053c0 BGTZ X5, start // BGTZ X5, 2 // e34e50be - BLE X5, X6, start // BLE X5, X6, 2 // e3dc62be - BLEU X5, X6, start // BLEU X5, X6, 2 // e3fa62be + BLE X5, X6, start // BLE X5, X6, 2 // e35c53be + BLEU X5, X6, start // BLEU X5, X6, 2 // e37a53be BLEZ X5, start // BLEZ X5, 2 // e35850be BLTZ X5, start // BLTZ X5, 2 // e3c602be BNEZ X5, start // BNEZ X5, 2 // e39402be diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 0cffa54fa6..9257a6453a 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -1777,15 +1777,15 @@ func instructionsForProg(p *obj.Prog) []*instruction { case ABGEZ: ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg) case ABGT: - ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg) case ABGTU: - ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg) case ABGTZ: ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO case ABLE: - ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg) case ABLEU: - ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.Reg), uint32(p.From.Reg) + ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg) case ABLEZ: ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO case ABLTZ: diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go index 5412577a05..279aeb2c32 100644 --- a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go +++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go @@ -43,33 +43,34 @@ func TestBranchCondition(t *testing.T) { {"BGEU", 0, -1, testBGEU, false}, {"BGEU", -1, 0, testBGEU, true}, {"BGEU", 1, 0, testBGEU, true}, - {"BGT", 0, 1, testBGT, true}, + {"BGT", 0, 1, testBGT, false}, {"BGT", 0, 0, testBGT, false}, - {"BGT", 0, -1, testBGT, false}, - {"BGT", -1, 0, testBGT, true}, - {"BGT", 1, 0, testBGT, false}, - {"BGTU", 0, 1, testBGTU, true}, - {"BGTU", 0, -1, testBGTU, true}, - {"BGTU", -1, 0, testBGTU, false}, - {"BGTU", 1, 0, testBGTU, false}, - {"BLE", 0, 1, testBLE, false}, - {"BLE", 0, -1, testBLE, true}, + {"BGT", 0, -1, testBGT, true}, + {"BGT", -1, 0, testBGT, false}, + {"BGT", 1, 0, testBGT, true}, + {"BGTU", 0, 1, testBGTU, false}, + {"BGTU", 0, 0, testBGTU, false}, + {"BGTU", 0, -1, testBGTU, false}, + {"BGTU", -1, 0, testBGTU, true}, + {"BGTU", 1, 0, testBGTU, true}, + {"BLE", 0, 1, testBLE, true}, {"BLE", 0, 0, testBLE, true}, - {"BLE", -1, 0, testBLE, false}, - {"BLE", 1, 0, testBLE, true}, - {"BLEU", 0, 1, testBLEU, false}, - {"BLEU", 0, -1, testBLEU, false}, + {"BLE", 0, -1, testBLE, false}, + {"BLE", -1, 0, testBLE, true}, + {"BLE", 1, 0, testBLE, false}, + {"BLEU", 0, 1, testBLEU, true}, {"BLEU", 0, 0, testBLEU, true}, - {"BLEU", -1, 0, testBLEU, true}, - {"BLEU", 1, 0, testBLEU, true}, + {"BLEU", 0, -1, testBLEU, true}, + {"BLEU", -1, 0, testBLEU, false}, + {"BLEU", 1, 0, testBLEU, false}, {"BLT", 0, 1, testBLT, true}, - {"BLT", 0, -1, testBLT, false}, {"BLT", 0, 0, testBLT, false}, + {"BLT", 0, -1, testBLT, false}, {"BLT", -1, 0, testBLT, true}, {"BLT", 1, 0, testBLT, false}, {"BLTU", 0, 1, testBLTU, true}, - {"BLTU", 0, -1, testBLTU, true}, {"BLTU", 0, 0, testBLTU, false}, + {"BLTU", 0, -1, testBLTU, true}, {"BLTU", -1, 0, testBLTU, false}, {"BLTU", 1, 0, testBLTU, false}, } @@ -82,17 +83,13 @@ func TestBranchCondition(t *testing.T) { case "BGEU": fn = func(a, b int64) bool { return uint64(a) >= uint64(b) } case "BGT": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return b > a } + fn = func(a, b int64) bool { return a > b } case "BGTU": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return uint64(b) > uint64(a) } + fn = func(a, b int64) bool { return uint64(a) > uint64(b) } case "BLE": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return b <= a } + fn = func(a, b int64) bool { return a <= b } case "BLEU": - // TODO: Currently reversed. - fn = func(a, b int64) bool { return uint64(b) <= uint64(a) } + fn = func(a, b int64) bool { return uint64(a) <= uint64(b) } case "BLT": fn = func(a, b int64) bool { return a < b } case "BLTU": -- cgit v1.3