diff options
| author | David Chase <drchase@google.com> | 2016-03-11 00:10:52 -0500 |
|---|---|---|
| committer | David Chase <drchase@google.com> | 2016-03-28 16:29:59 +0000 |
| commit | 8eec2bbfbc4f209950f677906c6ce67e01d32930 (patch) | |
| tree | d7262de9875b76be2e91785823375865b230324c /src | |
| parent | 2e90192b0e774f44a2d918509e0bd32823ce5c2c (diff) | |
| download | go-8eec2bbfbc4f209950f677906c6ce67e01d32930.tar.xz | |
cmd/compile: added some intrinsics to SSA back end
One intrinsic was needed to help get the very best
performance out of a future GC; as long as that one was
being added, I also added Bswap since that is sometimes
a handy thing to have. I had intended to fill out the
bit-scan intrinsic family, but the mismatch between the
"scan forward" instruction and "count leading zeroes"
was large enough to cause me to leave it out -- it poses
a dilemma that I'd rather dodge right now.
These intrinsics are not exposed for general use.
That's a separate issue requiring an API proposal change
( https://github.com/golang/proposal )
All intrinsics are tested, both that they are substituted
on the appropriate architecture, and that they produce the
expected result.
Change-Id: I5848037cfd97de4f75bdc33bdd89bba00af4a8ee
Reviewed-on: https://go-review.googlesource.com/20564
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/cmd/compile/internal/amd64/prog.go | 58 | ||||
| -rw-r--r-- | src/cmd/compile/internal/amd64/ssa.go | 36 | ||||
| -rw-r--r-- | src/cmd/compile/internal/gc/go.go | 7 | ||||
| -rw-r--r-- | src/cmd/compile/internal/gc/inl.go | 2 | ||||
| -rw-r--r-- | src/cmd/compile/internal/gc/ssa.go | 77 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/compile.go | 18 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64.rules | 32 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 29 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/genericOps.go | 11 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 278 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 383 | ||||
| -rw-r--r-- | src/runtime/internal/sys/intrinsics.go | 105 |
12 files changed, 1006 insertions, 30 deletions
diff --git a/src/cmd/compile/internal/amd64/prog.go b/src/cmd/compile/internal/amd64/prog.go index 55ea7ee82a..91b479be22 100644 --- a/src/cmd/compile/internal/amd64/prog.go +++ b/src/cmd/compile/internal/amd64/prog.go @@ -36,26 +36,44 @@ var progtable = [x86.ALAST & obj.AMask]obj.ProgInfo{ // NOP is an internal no-op that also stands // for USED and SET annotations, not the Intel opcode. - obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite}, - x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, - x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, - x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, - x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr}, - x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr}, - x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, - x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, - obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry}, - x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, - x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, - x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, - x86.ACLD & obj.AMask: {Flags: gc.OK}, - x86.ASTD & obj.AMask: {Flags: gc.OK}, + obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite}, + x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, + x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, + x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry}, + x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr}, + x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr}, + x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry}, + x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry}, + + x86.ABSFL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSFQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSFW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSRL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSRQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSRW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry}, + x86.ABSWAPL & obj.AMask: {Flags: gc.SizeL | RightRdwr}, + x86.ABSWAPQ & obj.AMask: {Flags: gc.SizeQ | RightRdwr}, + + obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry}, + x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, + x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, + x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX}, + x86.ACLD & obj.AMask: {Flags: gc.OK}, + x86.ASTD & obj.AMask: {Flags: gc.OK}, + + x86.ACMOVLEQ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVLNE & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVQEQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVQNE & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVWEQ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMOVWNE & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry}, + x86.ACMPB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightRead | gc.SetCarry}, x86.ACMPL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.SetCarry}, x86.ACMPQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.SetCarry}, diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 307ba28e5e..dfacff6f40 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -477,6 +477,33 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r + + case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, + ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: + r := gc.SSARegNum(v) + x := gc.SSARegNum(v.Args[0]) + // Arg0 is in/out, move in to out if not already same + if r != x { + p := gc.Prog(moveByType(v.Type)) + p.From.Type = obj.TYPE_REG + p.From.Reg = x + p.To.Type = obj.TYPE_REG + p.To.Reg = r + } + + // Constant into AX, after arg0 movement in case arg0 is in AX + p := gc.Prog(moveByType(v.Type)) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt2Int64() + p.To.Type = obj.TYPE_REG + p.To.Reg = x86.REG_AX + + p = gc.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = x86.REG_AX + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) @@ -955,6 +982,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.Maxarg = v.AuxInt } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB, + ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) @@ -968,7 +996,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r - case ssa.OpAMD64SQRTSD: + case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, + ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, + ssa.OpAMD64SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) @@ -1008,9 +1038,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64InvertFlags: - v.Fatalf("InvertFlags should never make it to codegen %v", v) + v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: - v.Fatalf("Flag* ops should never make it to codegen %v", v) + v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSQ) diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index 78c177e616..448a0fd322 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -55,8 +55,8 @@ func (v Val) Ctype() Ctype { } type Pkg struct { - Name string // package name - Path string // string literal used in import statement + Name string // package name, e.g. "sys" + Path string // string literal used in import statement, e.g. "runtime/internal/sys" Pathsym *Sym Prefix string // escaped path for use in symbol table Imported bool // export data of this package was parsed @@ -469,6 +469,9 @@ const ( // Set, use, or kill of carry bit. // Kill means we never look at the carry bit after this kind of instruction. + // Originally for understanding ADC, RCR, and so on, but now also + // tracks set, use, and kill of the zero and overflow bits as well. + // TODO rename to {Set,Use,Kill}Flags SetCarry = 1 << 24 UseCarry = 1 << 25 KillCarry = 1 << 26 diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go index ff0791c537..e25ce132da 100644 --- a/src/cmd/compile/internal/gc/inl.go +++ b/src/cmd/compile/internal/gc/inl.go @@ -453,7 +453,7 @@ func inlnode(n *Node) *Node { if Debug['m'] > 3 { fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign)) } - if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 { // normal case + if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 && !isIntrinsicCall1(n) { // normal case n = mkinlcall(n, n.Left, n.Isddd) } else if n.Left.Op == ONAME && n.Left.Left != nil && n.Left.Left.Op == OTYPE && n.Left.Right != nil && n.Left.Right.Op == ONAME { // methods called as functions if n.Left.Sym.Def != nil { diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 93b820b17e..9b8ef20fed 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -2052,7 +2052,13 @@ func (s *state) expr(n *Node) *ssa.Value { p, l, c := s.slice(n.Left.Type, v, i, j, k) return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c) - case OCALLFUNC, OCALLINTER, OCALLMETH: + case OCALLFUNC: + if isIntrinsicCall1(n) { + return s.intrinsicCall1(n) + } + fallthrough + + case OCALLINTER, OCALLMETH: a := s.call(n, callNormal) return s.newValue2(ssa.OpLoad, n.Type, a, s.mem()) @@ -2373,6 +2379,75 @@ const ( callGo ) +// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic +// that can be handled by the SSA backend. +// SSA uses this, but so does the front end to see if should not +// inline a function because it is a candidate for intrinsic +// substitution. +func isSSAIntrinsic1(s *Sym) bool { + // The test below is not quite accurate -- in the event that + // a function is disabled on a per-function basis, for example + // because of hash-keyed binary failure search, SSA might be + // disabled for that function but it would not be noted here, + // and thus an inlining would not occur (in practice, inlining + // so far has only been noticed for Bswap32 and the 16-bit count + // leading/trailing instructions, but heuristics might change + // in the future or on different architectures). + if !ssaEnabled || ssa.IntrinsicsDisable || Thearch.Thechar != '6' { + return false + } + if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" { + switch s.Name { + case + "Ctz64", "Ctz32", "Ctz16", + "Bswap64", "Bswap32": + return true + } + } + return false +} + +func isIntrinsicCall1(n *Node) bool { + if n == nil || n.Left == nil { + return false + } + return isSSAIntrinsic1(n.Left.Sym) +} + +// intrinsicFirstArg extracts arg from n.List and eval +func (s *state) intrinsicFirstArg(n *Node) *ssa.Value { + x := n.List.First() + if x.Op == OAS { + x = x.Right + } + return s.expr(x) +} + +// intrinsicCall1 converts a call to a recognized 1-arg intrinsic +// into the intrinsic +func (s *state) intrinsicCall1(n *Node) *ssa.Value { + var result *ssa.Value + switch n.Left.Sym.Name { + case "Ctz64": + result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n)) + case "Ctz32": + result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n)) + case "Ctz16": + result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n)) + case "Bswap64": + result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n)) + case "Bswap32": + result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n)) + } + if result == nil { + Fatalf("Unknown special call: %v", n.Left.Sym) + } + if ssa.IntrinsicsDebug > 0 { + Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString()) + } + return result +} + // Calls the function n using the specified call type. // Returns the address of the return value (or nil if none). func (s *state) call(n *Node, k callKind) *ssa.Value { diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index b8e2b42c3e..d6c2bf83ef 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -120,6 +120,10 @@ type pass struct { // Run consistency checker between each phase var checkEnabled = false +// Debug output +var IntrinsicsDebug int +var IntrinsicsDisable bool + // PhaseOption sets the specified flag in the specified ssa phase, // returning empty string if this was successful or a string explaining // the error if it was not. @@ -157,6 +161,20 @@ func PhaseOption(phase, flag string, val int) string { } } + if phase == "intrinsics" { + switch flag { + case "on": + IntrinsicsDisable = val == 0 + case "off": + IntrinsicsDisable = val != 0 + case "debug": + IntrinsicsDebug = val + default: + return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase) + } + return "" + } + underphase := strings.Replace(phase, "_", " ", -1) var re *regexp.Regexp if phase[0] == '~' { diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index b595912cc6..cc210978ef 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -92,6 +92,38 @@ (Com16 x) -> (NOTW x) (Com8 x) -> (NOTB x) +// CMPQconst 0 below is redundant because BSF sets Z but how to remove? +(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64]) +(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32]) +(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16]) + +(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c]) +(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c]) +(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c]) + +(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c]) +(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c]) +(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c]) + +(CMOVQEQconst x (FlagLT_ULT)) -> x +(CMOVLEQconst x (FlagLT_ULT)) -> x +(CMOVWEQconst x (FlagLT_ULT)) -> x + +(CMOVQEQconst x (FlagLT_UGT)) -> x +(CMOVLEQconst x (FlagLT_UGT)) -> x +(CMOVWEQconst x (FlagLT_UGT)) -> x + +(CMOVQEQconst x (FlagGT_ULT)) -> x +(CMOVLEQconst x (FlagGT_ULT)) -> x +(CMOVWEQconst x (FlagGT_ULT)) -> x + +(CMOVQEQconst x (FlagGT_UGT)) -> x +(CMOVLEQconst x (FlagGT_UGT)) -> x +(CMOVWEQconst x (FlagGT_UGT)) -> x + +(Bswap64 x) -> (BSWAPQ x) +(Bswap32 x) -> (BSWAPL x) + (Sqrt x) -> (SQRTSD x) // Note: we always extend to 64 bits even though some ops don't need that many result bits. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 116e3ff9e3..9dc09aab53 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -103,9 +103,13 @@ func init() { gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax | flags} - gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly} - gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} - flagsgp = regInfo{inputs: flagsonly, outputs: gponly} + gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly} + gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly} + flagsgp = regInfo{inputs: flagsonly, outputs: gponly} + + // for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp. + gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} + readflags = regInfo{inputs: flagsonly, outputs: gponly} flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}} @@ -307,6 +311,25 @@ func init() { {name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 {name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0 + {name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero + {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero + {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero + + {name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero + {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero + {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero + + // Note ASM for ops moves whole register + {name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set + {name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set + {name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set + {name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set + {name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set + {name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set + + {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes + {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes + {name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0) {name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear. diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index ab5e335765..6d92926e3a 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -237,6 +237,17 @@ var genericOps = []opData{ {name: "Com32", argLength: 1}, {name: "Com64", argLength: 1}, + {name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16) + {name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32) + {name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64) + + {name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16) + {name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32) + {name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64) + + {name: "Bswap32", argLength: 1}, // Swap bytes + {name: "Bswap64", argLength: 1}, // Swap bytes + {name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only // Data movement, max argument length for Phi is indefinite so just pick diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 3ff2b5ac60..e76efd40ca 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -237,6 +237,20 @@ const ( OpAMD64NOTL OpAMD64NOTW OpAMD64NOTB + OpAMD64BSFQ + OpAMD64BSFL + OpAMD64BSFW + OpAMD64BSRQ + OpAMD64BSRL + OpAMD64BSRW + OpAMD64CMOVQEQconst + OpAMD64CMOVLEQconst + OpAMD64CMOVWEQconst + OpAMD64CMOVQNEconst + OpAMD64CMOVLNEconst + OpAMD64CMOVWNEconst + OpAMD64BSWAPQ + OpAMD64BSWAPL OpAMD64SQRTSD OpAMD64SBBQcarrymask OpAMD64SBBLcarrymask @@ -521,6 +535,14 @@ const ( OpCom16 OpCom32 OpCom64 + OpCtz16 + OpCtz32 + OpCtz64 + OpClz16 + OpClz32 + OpClz64 + OpBswap32 + OpBswap64 OpSqrt OpPhi OpCopy @@ -2804,6 +2826,222 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "BSFQ", + argLen: 1, + asm: x86.ABSFQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSFL", + argLen: 1, + asm: x86.ABSFL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSFW", + argLen: 1, + asm: x86.ABSFW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSRQ", + argLen: 1, + asm: x86.ABSRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSRL", + argLen: 1, + asm: x86.ABSRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSRW", + argLen: 1, + asm: x86.ABSRW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVQEQconst", + auxType: auxInt64, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVQEQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVLEQconst", + auxType: auxInt32, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLEQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVWEQconst", + auxType: auxInt16, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLEQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVQNEconst", + auxType: auxInt64, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVQNE, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVLNEconst", + auxType: auxInt32, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLNE, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "CMOVWNEconst", + auxType: auxInt16, + argLen: 2, + resultInArg0: true, + asm: x86.ACMOVLNE, + reg: regInfo{ + inputs: []inputInfo{ + {1, 8589934592}, // FLAGS + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934593, // AX FLAGS + outputs: []regMask{ + 65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSWAPQ", + argLen: 1, + resultInArg0: true, + asm: x86.ABSWAPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { + name: "BSWAPL", + argLen: 1, + resultInArg0: true, + asm: x86.ABSWAPL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + clobbers: 8589934592, // FLAGS + outputs: []regMask{ + 65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, + { name: "SQRTSD", argLen: 1, asm: x86.ASQRTSD, @@ -4982,6 +5220,46 @@ var opcodeTable = [...]opInfo{ generic: true, }, { + name: "Ctz16", + argLen: 1, + generic: true, + }, + { + name: "Ctz32", + argLen: 1, + generic: true, + }, + { + name: "Ctz64", + argLen: 1, + generic: true, + }, + { + name: "Clz16", + argLen: 1, + generic: true, + }, + { + name: "Clz32", + argLen: 1, + generic: true, + }, + { + name: "Clz64", + argLen: 1, + generic: true, + }, + { + name: "Bswap32", + argLen: 1, + generic: true, + }, + { + name: "Bswap64", + argLen: 1, + generic: true, + }, + { name: "Sqrt", argLen: 1, generic: true, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 046973859a..8dd1b15f13 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -66,6 +66,16 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAnd8(v, config) case OpAvg64u: return rewriteValueAMD64_OpAvg64u(v, config) + case OpBswap32: + return rewriteValueAMD64_OpBswap32(v, config) + case OpBswap64: + return rewriteValueAMD64_OpBswap64(v, config) + case OpAMD64CMOVLEQconst: + return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config) + case OpAMD64CMOVQEQconst: + return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config) + case OpAMD64CMOVWEQconst: + return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config) case OpAMD64CMPB: return rewriteValueAMD64_OpAMD64CMPB(v, config) case OpAMD64CMPBconst: @@ -110,6 +120,12 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpConstNil(v, config) case OpConvert: return rewriteValueAMD64_OpConvert(v, config) + case OpCtz16: + return rewriteValueAMD64_OpCtz16(v, config) + case OpCtz32: + return rewriteValueAMD64_OpCtz32(v, config) + case OpCtz64: + return rewriteValueAMD64_OpCtz64(v, config) case OpCvt32Fto32: return rewriteValueAMD64_OpCvt32Fto32(v, config) case OpCvt32Fto64: @@ -2119,6 +2135,307 @@ func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpBswap32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Bswap32 x) + // cond: + // result: (BSWAPL x) + for { + x := v.Args[0] + v.reset(OpAMD64BSWAPL) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpBswap64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Bswap64 x) + // cond: + // result: (BSWAPQ x) + for { + x := v.Args[0] + v.reset(OpAMD64BSWAPQ) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (CMOVLEQconst x (InvertFlags y) [c]) + // cond: + // result: (CMOVLNEconst x y [c]) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64InvertFlags { + break + } + y := v_1.Args[0] + c := v.AuxInt + v.reset(OpAMD64CMOVLNEconst) + v.AddArg(x) + v.AddArg(y) + v.AuxInt = c + return true + } + // match: (CMOVLEQconst _ (FlagEQ) [c]) + // cond: + // result: (Const32 [c]) + for { + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagEQ { + break + } + c := v.AuxInt + v.reset(OpConst32) + v.AuxInt = c + return true + } + // match: (CMOVLEQconst x (FlagLT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVLEQconst x (FlagLT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVLEQconst x (FlagGT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVLEQconst x (FlagGT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (CMOVQEQconst x (InvertFlags y) [c]) + // cond: + // result: (CMOVQNEconst x y [c]) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64InvertFlags { + break + } + y := v_1.Args[0] + c := v.AuxInt + v.reset(OpAMD64CMOVQNEconst) + v.AddArg(x) + v.AddArg(y) + v.AuxInt = c + return true + } + // match: (CMOVQEQconst _ (FlagEQ) [c]) + // cond: + // result: (Const64 [c]) + for { + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagEQ { + break + } + c := v.AuxInt + v.reset(OpConst64) + v.AuxInt = c + return true + } + // match: (CMOVQEQconst x (FlagLT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVQEQconst x (FlagLT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVQEQconst x (FlagGT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVQEQconst x (FlagGT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (CMOVWEQconst x (InvertFlags y) [c]) + // cond: + // result: (CMOVWNEconst x y [c]) + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64InvertFlags { + break + } + y := v_1.Args[0] + c := v.AuxInt + v.reset(OpAMD64CMOVWNEconst) + v.AddArg(x) + v.AddArg(y) + v.AuxInt = c + return true + } + // match: (CMOVWEQconst _ (FlagEQ) [c]) + // cond: + // result: (Const16 [c]) + for { + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagEQ { + break + } + c := v.AuxInt + v.reset(OpConst16) + v.AuxInt = c + return true + } + // match: (CMOVWEQconst x (FlagLT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVWEQconst x (FlagLT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVWEQconst x (FlagGT_ULT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + // match: (CMOVWEQconst x (FlagGT_UGT)) + // cond: + // result: x + for { + x := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpCopy) + v.Type = x.Type + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool { b := v.Block _ = b @@ -3026,6 +3343,72 @@ func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Ctz16 <t> x) + // cond: + // result: (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16]) + for { + t := v.Type + x := v.Args[0] + v.reset(OpAMD64CMOVWEQconst) + v0 := b.NewValue0(v.Line, OpAMD64BSFW, t) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags) + v1.AddArg(x) + v1.AuxInt = 0 + v.AddArg(v1) + v.AuxInt = 16 + return true + } + return false +} +func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Ctz32 <t> x) + // cond: + // result: (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32]) + for { + t := v.Type + x := v.Args[0] + v.reset(OpAMD64CMOVLEQconst) + v0 := b.NewValue0(v.Line, OpAMD64BSFL, t) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags) + v1.AddArg(x) + v1.AuxInt = 0 + v.AddArg(v1) + v.AuxInt = 32 + return true + } + return false +} +func rewriteValueAMD64_OpCtz64(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Ctz64 <t> x) + // cond: + // result: (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64]) + for { + t := v.Type + x := v.Args[0] + v.reset(OpAMD64CMOVQEQconst) + v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t) + v0.AddArg(x) + v.AddArg(v0) + v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags) + v1.AddArg(x) + v1.AuxInt = 0 + v.AddArg(v1) + v.AuxInt = 64 + return true + } + return false +} func rewriteValueAMD64_OpCvt32Fto32(v *Value, config *Config) bool { b := v.Block _ = b diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go new file mode 100644 index 0000000000..8feb754dbd --- /dev/null +++ b/src/runtime/internal/sys/intrinsics.go @@ -0,0 +1,105 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package sys + +// Ctz64 counts trailing (low-order) zeroes, +// and if all are zero, then 64. +func Ctz64(x uint64) uint64 { + if x&0xffffffff == 0 { + return 32 + uint64(Ctz32(uint32(x>>32))) + } + return uint64(Ctz32(uint32(x))) + +} + +// Ctz32 counts trailing (low-order) zeroes, +// and if all are zero, then 32. +func Ctz32(x uint32) uint32 { + if x&0xffff == 0 { + return 16 + uint32(Ctz16(uint16(x>>16))) + } + return uint32(Ctz16(uint16(x))) +} + +// Ctz16 counts trailing (low-order) zeroes, +// and if all are zero, then 16. +func Ctz16(x uint16) uint16 { + if x&0xff == 0 { + return 8 + uint16(Ctz8(uint8(x>>8))) + } + return uint16(Ctz8(uint8(x))) +} + +// Ctz8 counts trailing (low-order) zeroes, +// and if all are zero, then 8. +func Ctz8(x uint8) uint8 { + return ctzVals[x] +} + +var ctzVals = [256]uint8{ + 8, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0} + +// Bswap64 returns its input with byte order reversed +// 0x0102030405060708 -> 0x0807060504030201 +func Bswap64(x uint64) uint64 { + c8 := uint64(0xff00ff00ff00ff00) + a := (x & c8) >> 8 + b := (x &^ c8) << 8 + x = a | b + c16 := uint64(0xffff0000ffff0000) + a = (x & c16) >> 16 + b = (x &^ c16) << 16 + x = a | b + c32 := uint64(0xffffffff00000000) + a = (x & c32) >> 32 + b = (x &^ c32) << 32 + x = a | b + return x +} + +// Bswap32 returns its input with byte order reversed +// 0x01020304 -> 0x04030201 +func Bswap32(x uint32) uint32 { + c8 := uint32(0xff00ff00) + a := (x & c8) >> 8 + b := (x &^ c8) << 8 + x = a | b + c16 := uint32(0xffff0000) + a = (x & c16) >> 16 + b = (x &^ c16) << 16 + x = a | b + return x +} |
