diff options
| author | isharipo <iskander.sharipov@intel.com> | 2017-09-13 14:32:08 +0300 |
|---|---|---|
| committer | Matthew Dempsky <mdempsky@google.com> | 2017-09-15 21:05:03 +0000 |
| commit | 8c67f210a181f4d3e003e46e544ae03ce231ca42 (patch) | |
| tree | 537c869a16a02ed96ed005b345aeeec040080686 /src/cmd/asm | |
| parent | e1cf2be7a82a2421c69c48cbdf596841f29bff77 (diff) | |
| download | go-8c67f210a181f4d3e003e46e544ae03ce231ca42.tar.xz | |
cmd/internal/obj: change Prog.From3 to RestArgs ([]Addr)
This change makes it easier to express instructions
with arbitrary number of operands.
Rationale: previous approach with operand "hiding" does
not scale well, AVX and especially AVX512 have many
instructions with 3+ operands.
x86 asm backend is updated to handle up to 6 explicit operands.
It also fixes issue with 4-th immediate operand type checks.
All `ytab` tables are updated accordingly.
Changes to non-x86 backends only include these patterns:
`p.From3 = X` => `p.SetFrom3(X)`
`p.From3.X = Y` => `p.GetFrom3().X = Y`
Over time, other backends can adapt Prog.RestArgs
and reduce the amount of workarounds.
-- Performance --
x/benchmark/build:
$ benchstat upstream.bench patched.bench
name old time/op new time/op delta
Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10)
name old binary-size new binary-size delta
Build-48 10.3M ± 0% 10.3M ± 0% ~ (all equal)
name old build-time/op new build-time/op delta
Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10)
name old build-peak-RSS-bytes new build-peak-RSS-bytes delta
Build-48 145MB ± 5% 148MB ± 5% ~ (p=0.218 n=10+10)
name old build-user+sys-time/op new build-user+sys-time/op delta
Build-48 21.0s ± 2% 21.2s ± 2% ~ (p=0.075 n=10+10)
Microbenchmark shows a slight slowdown.
name old time/op new time/op delta
AMD64asm-4 49.5ms ± 1% 49.9ms ± 1% +0.67% (p=0.001 n=23+15)
func BenchmarkAMD64asm(b *testing.B) {
for i := 0; i < b.N; i++ {
TestAMD64EndToEnd(nil)
TestAMD64Encoder(nil)
}
}
Change-Id: I4f1d37b5c2c966da3f2127705ccac9bff0038183
Reviewed-on: https://go-review.googlesource.com/63490
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Diffstat (limited to 'src/cmd/asm')
| -rw-r--r-- | src/cmd/asm/internal/arch/amd64.go | 28 | ||||
| -rw-r--r-- | src/cmd/asm/internal/asm/asm.go | 50 |
2 files changed, 17 insertions, 61 deletions
diff --git a/src/cmd/asm/internal/arch/amd64.go b/src/cmd/asm/internal/arch/amd64.go deleted file mode 100644 index ff20d32daa..0000000000 --- a/src/cmd/asm/internal/arch/amd64.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file encapsulates some of the odd characteristics of the -// AMD64 instruction set, to minimize its interaction -// with the core of the assembler. - -package arch - -import ( - "cmd/internal/obj" - "cmd/internal/obj/x86" -) - -// IsAMD4OP reports whether the op (as defined by an amd64.A* constant) is -// a 4-operand instruction. -func IsAMD4OP(op obj.As) bool { - switch op { - case x86.AVPERM2F128, - x86.AVPALIGNR, - x86.AVPERM2I128, - x86.AVINSERTI128, - x86.AVPBLENDD: - return true - } - return false -} diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 24aa0537ca..b5e4bddb96 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -384,7 +384,7 @@ func (p *Parser) asmJump(op obj.As, cond string, a []obj.Addr) { prog.Reg = p.getRegister(prog, op, &a[1]) } else { // Compare register with immediate and jump. - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) } break } @@ -567,7 +567,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { if arch.IsARMBFX(op) { // a[0] and a[1] must be constants, a[2] must be a register prog.From = a[0] - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) prog.To = a[2] break } @@ -576,13 +576,8 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.Reg = p.getRegister(prog, op, &a[1]) prog.To = a[2] case sys.AMD64: - // Catch missing operand here, because we store immediate as part of From3, and can't distinguish - // missing operand from legal value 0 in obj/x86/asm6. - if arch.IsAMD4OP(op) { - p.errorf("4 operands required, but only 3 are provided for %s instruction", op) - } prog.From = a[0] - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) prog.To = a[2] case sys.ARM64: // ARM64 instructions with one input and two outputs. @@ -601,7 +596,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.To = a[2] case sys.I386: prog.From = a[0] - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) prog.To = a[2] case sys.PPC64: if arch.IsPPC64CMP(op) { @@ -623,7 +618,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.To = a[2] case obj.TYPE_CONST: prog.From = a[0] - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) prog.To = a[2] default: p.errorf("invalid addressing modes for %s instruction", op) @@ -634,7 +629,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { if a[1].Type == obj.TYPE_REG { prog.Reg = p.getRegister(prog, op, &a[1]) } else { - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) } prog.To = a[2] default: @@ -646,7 +641,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { if arch.IsARMBFX(op) { // a[0] and a[1] must be constants, a[2] and a[3] must be registers prog.From = a[0] - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) prog.Reg = p.getRegister(prog, op, &a[2]) prog.To = a[3] break @@ -666,26 +661,15 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } } if p.arch.Family == sys.AMD64 { - // 4 operand instruction have form ymm1, ymm2, ymm3/m256, imm8 - // So From3 is always just a register, so we store imm8 in Offset field, - // to avoid increasing size of Prog. - prog.From = a[1] - prog.From3 = newAddr(a[2]) - if a[0].Type != obj.TYPE_CONST { - p.errorf("first operand must be an immediate in %s instruction", op) - } - if prog.From3.Type != obj.TYPE_REG { - p.errorf("third operand must be a register in %s instruction", op) - } - prog.From3.Offset = int64(p.getImmediate(prog, op, &a[0])) + prog.From = a[0] + prog.RestArgs = []obj.Addr{a[1], a[2]} prog.To = a[3] - prog.RegTo2 = -1 break } if p.arch.Family == sys.ARM64 { prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) - prog.From3 = newAddr(a[2]) + prog.SetFrom3(a[2]) prog.To = a[3] break } @@ -693,12 +677,12 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { if arch.IsPPC64RLD(op) { prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) - prog.From3 = newAddr(a[2]) + prog.SetFrom3(a[2]) prog.To = a[3] break } else if arch.IsPPC64ISEL(op) { // ISEL BC,RB,RA,RT becomes isel rt,ra,rb,bc - prog.From3 = newAddr(a[2]) // ra + prog.SetFrom3(a[2]) // ra prog.From = a[0] // bc prog.Reg = p.getRegister(prog, op, &a[1]) // rb prog.To = a[3] // rt @@ -712,13 +696,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { if a[1].Type == obj.TYPE_REG { prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) - prog.From3 = newAddr(a[2]) + prog.SetFrom3(a[2]) prog.To = a[3] break } else if a[1].Type == obj.TYPE_CONST { prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[2]) - prog.From3 = newAddr(a[1]) + prog.SetFrom3(a[1]) prog.To = a[3] break } else { @@ -733,7 +717,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } prog.From = a[0] prog.Reg = p.getRegister(prog, op, &a[1]) - prog.From3 = newAddr(a[2]) + prog.SetFrom3(a[2]) prog.To = a[3] break } @@ -752,10 +736,10 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { } else { mask = (^uint32(0) >> uint(mask2+1)) & (^uint32(0) << uint(31-(mask1-1))) } - prog.From3 = &obj.Addr{ + prog.SetFrom3(obj.Addr{ Type: obj.TYPE_CONST, Offset: int64(mask), - } + }) prog.To = a[4] break } |
