From cfc2aa56b0bf6b7dfb8f38cd2cfbe8799fc5a31a Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 18 May 2015 16:44:20 -0700 Subject: [dev.ssa] cmd/internal/ssa: Handle more instructions + some cleanup Add & as an input op. Add several output ops (loads & stores, TESTB, LEAQglobal, branches, memcopy) Some other small things: - Add exprAddr to builder to generate addresses of expressions. Use it in various places that had ad-hoc code. - Separate out nil & bounds check generation to separate functions. - Add explicit FP and SP ops so we dont need specialized *FP and *SP opcodes. - Fix fallthrough at end of functions with no return values. - rematerialization of more opcodes. Change-Id: I781decfcef9770fb15f0cd6b061547f7824a2d5e Reviewed-on: https://go-review.googlesource.com/10213 Reviewed-by: Alan Donovan --- src/cmd/internal/obj/x86/6.out.go | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'src/cmd/internal/obj/x86') diff --git a/src/cmd/internal/obj/x86/6.out.go b/src/cmd/internal/obj/x86/6.out.go index c7f46e1801..e36cb9e7a3 100644 --- a/src/cmd/internal/obj/x86/6.out.go +++ b/src/cmd/internal/obj/x86/6.out.go @@ -110,23 +110,23 @@ const ( AINTO AIRETL AIRETW - AJCC - AJCS + AJCC // >= unsigned + AJCS // < unsigned AJCXZL - AJEQ - AJGE - AJGT - AJHI - AJLE - AJLS - AJLT - AJMI - AJNE - AJOC - AJOS - AJPC - AJPL - AJPS + AJEQ // == (zero) + AJGE // >= signed + AJGT // > signed + AJHI // > unsigned + AJLE // <= signed + AJLS // <= unsigned + AJLT // < signed + AJMI // sign bit set (negative) + AJNE // != (nonzero) + AJOC // overflow clear + AJOS // overflow set + AJPC // parity clear + AJPL // sign bit clear (positive) + AJPS // parity set ALAHF ALARL ALARW -- cgit v1.3 From 3d23afb9133c151404635f2476bf895028b972bc Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 12 Aug 2015 11:22:16 -0700 Subject: [dev.ssa] cmd/compile: implement OGETG Change-Id: I7ecf62cf399c710b4a617803c43e83fce09b8a7d Reviewed-on: https://go-review.googlesource.com/13585 Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/ssa.go | 31 ++++++++++++++++++++++++++ src/cmd/compile/internal/ssa/gen/AMD64.rules | 1 + src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 3 ++- src/cmd/compile/internal/ssa/gen/generic.rules | 2 ++ src/cmd/compile/internal/ssa/gen/genericOps.go | 2 ++ src/cmd/compile/internal/ssa/opGen.go | 10 +++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 14 ++++++++++++ src/cmd/compile/internal/ssa/rewritegeneric.go | 24 ++++++++++++++++++++ src/cmd/internal/obj/x86/obj6.go | 4 ++-- 9 files changed, 88 insertions(+), 3 deletions(-) (limited to 'src/cmd/internal/obj/x86') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 0086feceab..c8ec01f5b6 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -1340,6 +1340,10 @@ func (s *state) expr(n *Node) *ssa.Value { } a := s.entryNewValue1I(ssa.OpOffPtr, Ptrto(fp.Type), fp.Width, s.sp) return s.newValue2(ssa.OpLoad, fp.Type, a, call) + + case OGETG: + return s.newValue0(ssa.OpGetG, n.Type) + default: s.Unimplementedf("unhandled expr %s", opnames[n.Op]) return nil @@ -2185,6 +2189,33 @@ func genValue(v *ssa.Value) { q.From.Reg = x86.REG_AX q.To.Type = obj.TYPE_MEM q.To.Reg = x86.REG_AX + case ssa.OpAMD64LoweredGetG: + r := regnum(v) + // See the comments in cmd/internal/obj/x86/obj6.go + // near CanUse1InsnTLS for a detailed explanation of these instructions. + if x86.CanUse1InsnTLS(Ctxt) { + // MOVQ (TLS), r + p := Prog(x86.AMOVQ) + p.From.Type = obj.TYPE_MEM + p.From.Reg = x86.REG_TLS + p.To.Type = obj.TYPE_REG + p.To.Reg = r + } else { + // MOVQ TLS, r + // MOVQ (r)(TLS*1), r + p := Prog(x86.AMOVQ) + p.From.Type = obj.TYPE_REG + p.From.Reg = x86.REG_TLS + p.To.Type = obj.TYPE_REG + p.To.Reg = r + q := Prog(x86.AMOVQ) + q.From.Type = obj.TYPE_MEM + q.From.Reg = r + q.From.Index = x86.REG_TLS + q.From.Scale = 1 + q.To.Type = obj.TYPE_REG + q.To.Reg = r + } case ssa.OpAMD64CALLstatic: p := Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 29f60d9a6b..ab8e44a444 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -217,6 +217,7 @@ (IsInBounds idx len) -> (SETB (CMPQ idx len)) (PanicNilCheck ptr mem) -> (LoweredPanicNilCheck ptr mem) +(GetG) -> (LoweredGetG) (Move [size] dst src mem) -> (REPMOVSB dst src (MOVQconst [size]) mem) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 9808745e35..903eea3057 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -288,8 +288,9 @@ func init() { // InvertFlags is a pseudo-op which can't appear in assembly output. {name: "InvertFlags"}, // reverse direction of arg0 - // LoweredPanicNilCheck is a pseudo-op. + // Pseudo-ops {name: "LoweredPanicNilCheck"}, + {name: "LoweredGetG"}, } var AMD64blocks = []blockData{ diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 8656b7cc4f..f4f49acb86 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -71,6 +71,8 @@ (StringLen (StringMake _ len)) -> len (Store dst str mem) && str.Type.IsString() -> (Store (OffPtr [config.PtrSize] dst) (StringLen str) (Store dst (StringPtr str) mem)) +(If (IsNonNil (GetG)) yes no) -> (Plain nil yes) + (If (Not cond) yes no) -> (If cond no yes) (If (ConstBool {c}) yes no) && c.(bool) -> (Plain nil yes) (If (ConstBool {c}) yes no) && !c.(bool) -> (Plain nil no) diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 6ff5d1ea1a..ec4f038f43 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -252,7 +252,9 @@ var genericOps = []opData{ {name: "IsNonNil"}, // arg0 != nil {name: "IsInBounds"}, // 0 <= arg0 < arg1 + // Pseudo-ops {name: "PanicNilCheck"}, // trigger a dereference fault; arg0=nil ptr, arg1=mem + {name: "GetG"}, // runtime.getg() (read g pointer) // Indexing operations {name: "ArrayIndex"}, // arg0=array, arg1=index. Returns a[i] diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d56a8ba81b..425c7e468c 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -195,6 +195,7 @@ const ( OpAMD64REPMOVSB OpAMD64InvertFlags OpAMD64LoweredPanicNilCheck + OpAMD64LoweredGetG OpAdd8 OpAdd16 @@ -369,6 +370,7 @@ const ( OpIsNonNil OpIsInBounds OpPanicNilCheck + OpGetG OpArrayIndex OpPtrIndex OpOffPtr @@ -2119,6 +2121,10 @@ var opcodeTable = [...]opInfo{ name: "LoweredPanicNilCheck", reg: regInfo{}, }, + { + name: "LoweredGetG", + reg: regInfo{}, + }, { name: "Add8", @@ -2812,6 +2818,10 @@ var opcodeTable = [...]opInfo{ name: "PanicNilCheck", generic: true, }, + { + name: "GetG", + generic: true, + }, { name: "ArrayIndex", generic: true, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 2668d570d1..a18097f91e 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1893,6 +1893,20 @@ func rewriteValueAMD64(v *Value, config *Config) bool { goto endd30ee67afc0284c419cef70261f61452 endd30ee67afc0284c419cef70261f61452: ; + case OpGetG: + // match: (GetG) + // cond: + // result: (LoweredGetG) + { + v.Op = OpAMD64LoweredGetG + v.AuxInt = 0 + v.Aux = nil + v.resetArgs() + return true + } + goto endb17140e71dd641aa4d89e14479160260 + endb17140e71dd641aa4d89e14479160260: + ; case OpGreater16: // match: (Greater16 x y) // cond: diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 6371ac2b38..e39305461d 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -782,6 +782,30 @@ func rewriteValuegeneric(v *Value, config *Config) bool { func rewriteBlockgeneric(b *Block) bool { switch b.Kind { case BlockIf: + // match: (If (IsNonNil (GetG)) yes no) + // cond: + // result: (Plain nil yes) + { + v := b.Control + if v.Op != OpIsNonNil { + goto end0f2bb0111a86be0436b44210dbd83a90 + } + if v.Args[0].Op != OpGetG { + goto end0f2bb0111a86be0436b44210dbd83a90 + } + yes := b.Succs[0] + no := b.Succs[1] + b.Func.removePredecessor(b, no) + b.Kind = BlockPlain + b.Control = nil + b.Succs = b.Succs[:1] + b.Succs[0] = yes + b.Likely = BranchUnknown + return true + } + goto end0f2bb0111a86be0436b44210dbd83a90 + end0f2bb0111a86be0436b44210dbd83a90: + ; // match: (If (Not cond) yes no) // cond: // result: (If cond no yes) diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index fa9c474adb..5249ca9581 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -38,7 +38,7 @@ import ( "math" ) -func canuse1insntls(ctxt *obj.Link) bool { +func CanUse1InsnTLS(ctxt *obj.Link) bool { if ctxt.Arch.Regsize == 4 { switch ctxt.Headtype { case obj.Hlinux, @@ -120,7 +120,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog) { // rewriting the instructions more comprehensively, and it only does because // we only support a single TLS variable (g). - if canuse1insntls(ctxt) { + if CanUse1InsnTLS(ctxt) { // Reduce 2-instruction sequence to 1-instruction sequence. // Sequences like // MOVQ TLS, BX -- cgit v1.3 From 5cb352edeba36e862995dd82fe7312368e6e8571 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Tue, 1 Sep 2015 21:25:24 -0500 Subject: [dev.ssa] cmd/compile: fix liblink rewrite of -0.0 liblink was rewriting xor by a negative zero (used by SSA for negation) as XORPS reg,reg. Fixes strconv. Change-Id: I627a0a7366618e6b07ba8f0ad0db0e102340c5e3 Reviewed-on: https://go-review.googlesource.com/14200 Reviewed-by: Josh Bleecher Snyder Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/testdata/fp_ssa.go | 14 ++++++++++++++ src/cmd/internal/obj/x86/obj6.go | 6 ++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/cmd/internal/obj/x86') diff --git a/src/cmd/compile/internal/gc/testdata/fp_ssa.go b/src/cmd/compile/internal/gc/testdata/fp_ssa.go index 6193983e4c..ee3163abb3 100644 --- a/src/cmd/compile/internal/gc/testdata/fp_ssa.go +++ b/src/cmd/compile/internal/gc/testdata/fp_ssa.go @@ -105,6 +105,12 @@ func div64_ssa(a, b float64) float64 { return a / b } +func neg64_ssa(a, b float64) float64 { + switch { + } + return -a + -1*b +} + func add32_ssa(a, b float32) float32 { switch { } @@ -128,6 +134,12 @@ func div32_ssa(a, b float32) float32 { return a / b } +func neg32_ssa(a, b float32) float32 { + switch { + } + return -a + -1*b +} + func conv2Float64_ssa(a int8, b uint8, c int16, d uint16, e int32, f uint32, g int64, h uint64, i float32) (aa, bb, cc, dd, ee, ff, gg, hh, ii float64) { switch { @@ -1548,11 +1560,13 @@ func main() { fails += fail64("*", mul64_ssa, a, b, 12.0) fails += fail64("-", sub64_ssa, a, b, -1.0) fails += fail64("/", div64_ssa, a, b, 0.75) + fails += fail64("neg", neg64_ssa, a, b, -7) fails += fail32("+", add32_ssa, c, d, 7.0) fails += fail32("*", mul32_ssa, c, d, 12.0) fails += fail32("-", sub32_ssa, c, d, -1.0) fails += fail32("/", div32_ssa, c, d, 0.75) + fails += fail32("neg", neg32_ssa, c, d, -7) // denorm-squared should underflow to zero. fails += fail32("*", mul32_ssa, tiny, tiny, 0) diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index d0efffbc0a..d55dcc7cac 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -221,7 +221,8 @@ func progedit(ctxt *obj.Link, p *obj.Prog) { // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx case AMOVSS: if p.From.Type == obj.TYPE_FCONST { - if p.From.Val.(float64) == 0 { + // f == 0 can't be used here due to -0, so use Float64bits + if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To @@ -261,7 +262,8 @@ func progedit(ctxt *obj.Link, p *obj.Prog) { case AMOVSD: // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx if p.From.Type == obj.TYPE_FCONST { - if p.From.Val.(float64) == 0 { + // f == 0 can't be used here due to -0, so use Float64bits + if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To -- cgit v1.3 From 7b773946c09e075ed50c49e76e08f61c16616ee4 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 22 Jan 2016 13:44:58 -0800 Subject: [dev.ssa] cmd/compile: disable xor clearing when flags must be preserved The x86 backend automatically rewrites MOV $0, AX to XOR AX, AX. That rewrite isn't ok when the flags register is live across the MOV. Keep track of which moves care about preserving flags, then disable this rewrite for them. On x86, Prog.Mark was being used to hold the length of the instruction. We already store that in Prog.Isize, so no need to store it in Prog.Mark also. This frees up Prog.Mark to hold a bitmask on x86 just like all the other architectures. Update #12405 Change-Id: Ibad8a8f41fc6222bec1e4904221887d3cc3ca029 Reviewed-on: https://go-review.googlesource.com/18861 Reviewed-by: David Chase Reviewed-by: Russ Cox --- src/cmd/compile/internal/gc/ssa.go | 29 ++++++++++++++++++++++++++++ src/cmd/compile/internal/ssa/block.go | 3 +++ src/cmd/compile/internal/ssa/flagalloc.go | 5 +++++ src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 11 ++++------- src/cmd/compile/internal/ssa/opGen.go | 4 ---- src/cmd/compile/internal/ssa/regalloc.go | 9 --------- src/cmd/internal/obj/link.go | 6 +++--- src/cmd/internal/obj/pass.go | 1 - src/cmd/internal/obj/x86/a.out.go | 6 ++++++ src/cmd/internal/obj/x86/asm6.go | 9 ++++++--- src/cmd/internal/obj/x86/obj6.go | 20 +++++++++---------- 11 files changed, 66 insertions(+), 37 deletions(-) (limited to 'src/cmd/internal/obj/x86') diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 5b8d2423d7..de00fe9651 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3405,6 +3405,7 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { for i, b := range f.Blocks { s.bstart[b.ID] = Pc // Emit values in block + s.markMoves(b) for _, v := range b.Values { x := Pc s.genValue(v) @@ -3864,6 +3865,11 @@ func (s *genState) genValue(v *ssa.Value) { p.From.Offset = i p.To.Type = obj.TYPE_REG p.To.Reg = x + // If flags are live at this instruction, suppress the + // MOV $0,AX -> XOR AX,AX optimization. + if v.Aux != nil { + p.Mark |= x86.PRESERVEFLAGS + } case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := regnum(v) p := Prog(v.Op.Asm()) @@ -4237,6 +4243,29 @@ func (s *genState) genValue(v *ssa.Value) { } } +// markMoves marks any MOVXconst ops that need to avoid clobbering flags. +func (s *genState) markMoves(b *ssa.Block) { + flive := b.FlagsLiveAtEnd + if b.Control != nil && b.Control.Type.IsFlags() { + flive = true + } + for i := len(b.Values) - 1; i >= 0; i-- { + v := b.Values[i] + if flive && (v.Op == ssa.OpAMD64MOVWconst || v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) { + // The "mark" is any non-nil Aux value. + v.Aux = v + } + if v.Type.IsFlags() { + flive = false + } + for _, a := range v.Args { + if a.Type.IsFlags() { + flive = true + } + } + } +} + // movZero generates a register indirect move with a 0 immediate and keeps track of bytes left and next offset func movZero(as int, width int64, nbytes int64, offset int64, regnum int16) (nleft int64, noff int64) { p := Prog(as) diff --git a/src/cmd/compile/internal/ssa/block.go b/src/cmd/compile/internal/ssa/block.go index 5fb93cd5a7..02673f0650 100644 --- a/src/cmd/compile/internal/ssa/block.go +++ b/src/cmd/compile/internal/ssa/block.go @@ -50,6 +50,9 @@ type Block struct { // Ignored if len(Succs) < 2. // Fatal if not BranchUnknown and len(Succs) > 2. Likely BranchPrediction + + // After flagalloc, records whether flags are live at the end of the block. + FlagsLiveAtEnd bool } // kind control successors diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index c088158057..f4e289e782 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -120,4 +120,9 @@ func flagalloc(f *Func) { // standard regs, and it runs next.) } } + + // Save live flag state for later. + for _, b := range f.Blocks { + b.FlagsLiveAtEnd = end[b.ID] != nil + } } diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index daee7336b0..dcffb49f63 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -93,7 +93,6 @@ func init() { // Common regInfo var ( gp01 = regInfo{inputs: []regMask{}, outputs: gponly} - gp01flags = regInfo{inputs: []regMask{}, outputs: gponly, clobbers: flags} gp11 = regInfo{inputs: []regMask{gpsp}, outputs: gponly, clobbers: flags} gp11nf = regInfo{inputs: []regMask{gpsp}, outputs: gponly} // nf: no flags clobbered gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} @@ -340,12 +339,10 @@ func init() { {name: "MOVLQSX", reg: gp11nf, asm: "MOVLQSX"}, // sign extend arg0 from int32 to int64 {name: "MOVLQZX", reg: gp11nf, asm: "MOVLQZX"}, // zero extend arg0 from int32 to int64 - // clobbers flags as liblink will rewrite these to XOR reg, reg if the constant is zero - // TODO: revisit when issue 12405 is fixed - {name: "MOVBconst", reg: gp01flags, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint - {name: "MOVWconst", reg: gp01flags, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint - {name: "MOVLconst", reg: gp01flags, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint - {name: "MOVQconst", reg: gp01flags, asm: "MOVQ", typ: "UInt64"}, // auxint + {name: "MOVBconst", reg: gp01, asm: "MOVB", typ: "UInt8"}, // 8 low bits of auxint + {name: "MOVWconst", reg: gp01, asm: "MOVW", typ: "UInt16"}, // 16 low bits of auxint + {name: "MOVLconst", reg: gp01, asm: "MOVL", typ: "UInt32"}, // 32 low bits of auxint + {name: "MOVQconst", reg: gp01, asm: "MOVQ", typ: "UInt64"}, // auxint {name: "CVTTSD2SL", reg: fpgp, asm: "CVTTSD2SL"}, // convert float64 to int32 {name: "CVTTSD2SQ", reg: fpgp, asm: "CVTTSD2SQ"}, // convert float64 to int64 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 497b690192..d391b2435e 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2694,7 +2694,6 @@ var opcodeTable = [...]opInfo{ name: "MOVBconst", asm: x86.AMOVB, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, @@ -2704,7 +2703,6 @@ var opcodeTable = [...]opInfo{ name: "MOVWconst", asm: x86.AMOVW, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, @@ -2714,7 +2712,6 @@ var opcodeTable = [...]opInfo{ name: "MOVLconst", asm: x86.AMOVL, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, @@ -2724,7 +2721,6 @@ var opcodeTable = [...]opInfo{ name: "MOVQconst", asm: x86.AMOVQ, reg: regInfo{ - clobbers: 8589934592, // .FLAGS outputs: []regMask{ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15 }, diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 27deeba718..7cbd30311f 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -1415,15 +1415,6 @@ func (v *Value) rematerializeable() bool { // We can't rematerialize instructions which // clobber the flags register. if regspec.clobbers&flagRegMask != 0 { - if v.Op == OpAMD64MOVQconst && v.AuxInt != 0 || - v.Op == OpAMD64MOVLconst && int32(v.AuxInt) != 0 || - v.Op == OpAMD64MOVWconst && int16(v.AuxInt) != 0 || - v.Op == OpAMD64MOVBconst && int8(v.AuxInt) != 0 { - // These are marked as clobbering flags, but only - // the 0 versions actually do. TODO: fix MOV->XOR rewrites - // to understand when they are allowed to clobber flags? - return true - } return false } diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index bc898235c1..f3d1a9557a 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -214,14 +214,14 @@ type Prog struct { Spadj int32 As int16 Reg int16 - RegTo2 int16 // 2nd register output operand - Mark uint16 + RegTo2 int16 // 2nd register output operand + Mark uint16 // bitmask of arch-specific items Optab uint16 Scond uint8 Back uint8 Ft uint8 Tt uint8 - Isize uint8 + Isize uint8 // size of the instruction in bytes (x86 only) Mode int8 Info ProgInfo diff --git a/src/cmd/internal/obj/pass.go b/src/cmd/internal/obj/pass.go index b92dfe23fb..14c9b6aaba 100644 --- a/src/cmd/internal/obj/pass.go +++ b/src/cmd/internal/obj/pass.go @@ -203,7 +203,6 @@ func linkpatch(ctxt *Link, sym *LSym) { } for p := sym.Text; p != nil; p = p.Link { - p.Mark = 0 /* initialization for follow */ if p.Pcond != nil { p.Pcond = brloop(ctxt, p.Pcond) if p.Pcond != nil { diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go index 4ee8cfbc6c..f163505fd0 100644 --- a/src/cmd/internal/obj/x86/a.out.go +++ b/src/cmd/internal/obj/x86/a.out.go @@ -34,6 +34,12 @@ import "cmd/internal/obj" //go:generate go run ../stringer.go -i $GOFILE -o anames.go -p x86 +const ( + /* mark flags */ + DONE = 1 << iota + PRESERVEFLAGS // not allowed to clobber flags +) + /* * amd64 */ diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 164dbd6064..8d0f86681f 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -1748,7 +1748,7 @@ func span6(ctxt *obj.Link, s *obj.LSym) { // process forward jumps to p for q = p.Rel; q != nil; q = q.Forwd { - v = int32(p.Pc - (q.Pc + int64(q.Mark))) + v = int32(p.Pc - (q.Pc + int64(q.Isize))) if q.Back&2 != 0 { // short if v > 127 { loop++ @@ -1761,7 +1761,7 @@ func span6(ctxt *obj.Link, s *obj.LSym) { s.P[q.Pc+1] = byte(v) } } else { - bp = s.P[q.Pc+int64(q.Mark)-4:] + bp = s.P[q.Pc+int64(q.Isize)-4:] bp[0] = byte(v) bp = bp[1:] bp[0] = byte(v >> 8) @@ -1784,7 +1784,6 @@ func span6(ctxt *obj.Link, s *obj.LSym) { obj.Symgrow(ctxt, s, p.Pc+int64(m)) copy(s.P[p.Pc:][:m], ctxt.And[:m]) - p.Mark = uint16(m) c += int32(m) } @@ -2157,6 +2156,10 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { v = int64(int32(v)) } if v == 0 { + if p.Mark&PRESERVEFLAGS != 0 { + // If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX. + return Yu7 + } return Yi0 } if v == 1 { diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index eff6c004c6..e545374828 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -1214,16 +1214,16 @@ loop: q = p.Pcond if q != nil && q.As != obj.ATEXT { /* mark instruction as done and continue layout at target of jump */ - p.Mark = 1 + p.Mark |= DONE p = q - if p.Mark == 0 { + if p.Mark&DONE == 0 { goto loop } } } - if p.Mark != 0 { + if p.Mark&DONE != 0 { /* * p goes here, but already used it elsewhere. * copy up to 4 instructions or else branch to other copy. @@ -1246,7 +1246,7 @@ loop: if nofollow(a) || pushpop(a) { break // NOTE(rsc): arm does goto copy } - if q.Pcond == nil || q.Pcond.Mark != 0 { + if q.Pcond == nil || q.Pcond.Mark&DONE != 0 { continue } if a == obj.ACALL || a == ALOOP { @@ -1260,10 +1260,10 @@ loop: q = obj.Copyp(ctxt, p) p = p.Link - q.Mark = 1 + q.Mark |= DONE (*last).Link = q *last = q - if int(q.As) != a || q.Pcond == nil || q.Pcond.Mark != 0 { + if int(q.As) != a || q.Pcond == nil || q.Pcond.Mark&DONE != 0 { continue } @@ -1273,7 +1273,7 @@ loop: q.Link = p xfol(ctxt, q.Link, last) p = q.Link - if p.Mark != 0 { + if p.Mark&DONE != 0 { return } goto loop @@ -1290,7 +1290,7 @@ loop: } /* emit p */ - p.Mark = 1 + p.Mark |= DONE (*last).Link = p *last = p @@ -1328,7 +1328,7 @@ loop: } } else { q = p.Link - if q.Mark != 0 { + if q.Mark&DONE != 0 { if a != ALOOP { p.As = relinv(int16(a)) p.Link = p.Pcond @@ -1338,7 +1338,7 @@ loop: } xfol(ctxt, p.Link, last) - if p.Pcond.Mark != 0 { + if p.Pcond.Mark&DONE != 0 { return } p = p.Pcond -- cgit v1.3 From 7de8cfdf9ce3942a255172e03c8f2ad380a30aca Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 4 Feb 2016 11:21:31 -0800 Subject: [dev.ssa] cmd/internal/obj/x86: don't clobber flags with dynlink rewrite LEAQ symbol+100(SB), AX Under dynamic link, rewrites to MOVQ symbol@GOT(SB), AX ADDQ $100, AX but ADDQ clobbers flags, whereas the original LEAQ (when not dynamic linking) doesn't. Use LEAQ instead of ADDQ to add that constant in so we preserve flags. Change-Id: Ibb055403d94a4c5163e1c7d2f45da633ffd0b6a3 Reviewed-on: https://go-review.googlesource.com/19230 Reviewed-by: David Chase Run-TryBot: David Chase Reviewed-by: Ian Lance Taylor --- src/cmd/internal/obj/x86/obj6.go | 7 ++++--- src/cmd/internal/obj/x86/obj6_test.go | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/cmd/internal/obj/x86') diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index e545374828..1955aa560d 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -377,7 +377,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) { } if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { // $MOV $sym, Rx becomes $MOV sym@GOT, Rx - // $MOV $sym+, Rx becomes $MOV sym@GOT, Rx; $ADD , Rx + // $MOV $sym+, Rx becomes $MOV sym@GOT, Rx; $LEA (Rx), Rx // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX cmplxdest := false pAs := p.As @@ -399,8 +399,9 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) { q := p if p.From.Offset != 0 { q = obj.Appendp(ctxt, p) - q.As = add - q.From.Type = obj.TYPE_CONST + q.As = lea + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg q.From.Offset = p.From.Offset q.To = p.To p.From.Offset = 0 diff --git a/src/cmd/internal/obj/x86/obj6_test.go b/src/cmd/internal/obj/x86/obj6_test.go index 4387db696d..d83ab24ab8 100644 --- a/src/cmd/internal/obj/x86/obj6_test.go +++ b/src/cmd/internal/obj/x86/obj6_test.go @@ -20,9 +20,9 @@ const testdata = ` MOVQ AX, AX -> MOVQ AX, AX LEAQ name(SB), AX -> MOVQ name@GOT(SB), AX -LEAQ name+10(SB), AX -> MOVQ name@GOT(SB), AX; ADDQ $10, AX +LEAQ name+10(SB), AX -> MOVQ name@GOT(SB), AX; LEAQ 10(AX), AX MOVQ $name(SB), AX -> MOVQ name@GOT(SB), AX -MOVQ $name+10(SB), AX -> MOVQ name@GOT(SB), AX; ADDQ $10, AX +MOVQ $name+10(SB), AX -> MOVQ name@GOT(SB), AX; LEAQ 10(AX), AX MOVQ name(SB), AX -> NOP; MOVQ name@GOT(SB), R15; MOVQ (R15), AX MOVQ name+10(SB), AX -> NOP; MOVQ name@GOT(SB), R15; MOVQ 10(R15), AX -- cgit v1.3