aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/asm
diff options
context:
space:
mode:
authorisharipo <iskander.sharipov@intel.com>2017-09-13 14:32:08 +0300
committerMatthew Dempsky <mdempsky@google.com>2017-09-15 21:05:03 +0000
commit8c67f210a181f4d3e003e46e544ae03ce231ca42 (patch)
tree537c869a16a02ed96ed005b345aeeec040080686 /src/cmd/asm
parente1cf2be7a82a2421c69c48cbdf596841f29bff77 (diff)
downloadgo-8c67f210a181f4d3e003e46e544ae03ce231ca42.tar.xz
cmd/internal/obj: change Prog.From3 to RestArgs ([]Addr)
This change makes it easier to express instructions with arbitrary number of operands. Rationale: previous approach with operand "hiding" does not scale well, AVX and especially AVX512 have many instructions with 3+ operands. x86 asm backend is updated to handle up to 6 explicit operands. It also fixes issue with 4-th immediate operand type checks. All `ytab` tables are updated accordingly. Changes to non-x86 backends only include these patterns: `p.From3 = X` => `p.SetFrom3(X)` `p.From3.X = Y` => `p.GetFrom3().X = Y` Over time, other backends can adapt Prog.RestArgs and reduce the amount of workarounds. -- Performance -- x/benchmark/build: $ benchstat upstream.bench patched.bench name old time/op new time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old binary-size new binary-size delta Build-48 10.3M ± 0% 10.3M ± 0% ~ (all equal) name old build-time/op new build-time/op delta Build-48 21.7s ± 2% 21.8s ± 2% ~ (p=0.218 n=10+10) name old build-peak-RSS-bytes new build-peak-RSS-bytes delta Build-48 145MB ± 5% 148MB ± 5% ~ (p=0.218 n=10+10) name old build-user+sys-time/op new build-user+sys-time/op delta Build-48 21.0s ± 2% 21.2s ± 2% ~ (p=0.075 n=10+10) Microbenchmark shows a slight slowdown. name old time/op new time/op delta AMD64asm-4 49.5ms ± 1% 49.9ms ± 1% +0.67% (p=0.001 n=23+15) func BenchmarkAMD64asm(b *testing.B) { for i := 0; i < b.N; i++ { TestAMD64EndToEnd(nil) TestAMD64Encoder(nil) } } Change-Id: I4f1d37b5c2c966da3f2127705ccac9bff0038183 Reviewed-on: https://go-review.googlesource.com/63490 Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Diffstat (limited to 'src/cmd/asm')
-rw-r--r--src/cmd/asm/internal/arch/amd64.go28
-rw-r--r--src/cmd/asm/internal/asm/asm.go50
2 files changed, 17 insertions, 61 deletions
diff --git a/src/cmd/asm/internal/arch/amd64.go b/src/cmd/asm/internal/arch/amd64.go
deleted file mode 100644
index ff20d32daa..0000000000
--- a/src/cmd/asm/internal/arch/amd64.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file encapsulates some of the odd characteristics of the
-// AMD64 instruction set, to minimize its interaction
-// with the core of the assembler.
-
-package arch
-
-import (
- "cmd/internal/obj"
- "cmd/internal/obj/x86"
-)
-
-// IsAMD4OP reports whether the op (as defined by an amd64.A* constant) is
-// a 4-operand instruction.
-func IsAMD4OP(op obj.As) bool {
- switch op {
- case x86.AVPERM2F128,
- x86.AVPALIGNR,
- x86.AVPERM2I128,
- x86.AVINSERTI128,
- x86.AVPBLENDD:
- return true
- }
- return false
-}
diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go
index 24aa0537ca..b5e4bddb96 100644
--- a/src/cmd/asm/internal/asm/asm.go
+++ b/src/cmd/asm/internal/asm/asm.go
@@ -384,7 +384,7 @@ func (p *Parser) asmJump(op obj.As, cond string, a []obj.Addr) {
prog.Reg = p.getRegister(prog, op, &a[1])
} else {
// Compare register with immediate and jump.
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
}
break
}
@@ -567,7 +567,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
if arch.IsARMBFX(op) {
// a[0] and a[1] must be constants, a[2] must be a register
prog.From = a[0]
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
prog.To = a[2]
break
}
@@ -576,13 +576,8 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
prog.Reg = p.getRegister(prog, op, &a[1])
prog.To = a[2]
case sys.AMD64:
- // Catch missing operand here, because we store immediate as part of From3, and can't distinguish
- // missing operand from legal value 0 in obj/x86/asm6.
- if arch.IsAMD4OP(op) {
- p.errorf("4 operands required, but only 3 are provided for %s instruction", op)
- }
prog.From = a[0]
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
prog.To = a[2]
case sys.ARM64:
// ARM64 instructions with one input and two outputs.
@@ -601,7 +596,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
prog.To = a[2]
case sys.I386:
prog.From = a[0]
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
prog.To = a[2]
case sys.PPC64:
if arch.IsPPC64CMP(op) {
@@ -623,7 +618,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
prog.To = a[2]
case obj.TYPE_CONST:
prog.From = a[0]
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
prog.To = a[2]
default:
p.errorf("invalid addressing modes for %s instruction", op)
@@ -634,7 +629,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
if a[1].Type == obj.TYPE_REG {
prog.Reg = p.getRegister(prog, op, &a[1])
} else {
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
}
prog.To = a[2]
default:
@@ -646,7 +641,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
if arch.IsARMBFX(op) {
// a[0] and a[1] must be constants, a[2] and a[3] must be registers
prog.From = a[0]
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
prog.Reg = p.getRegister(prog, op, &a[2])
prog.To = a[3]
break
@@ -666,26 +661,15 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
}
}
if p.arch.Family == sys.AMD64 {
- // 4 operand instruction have form ymm1, ymm2, ymm3/m256, imm8
- // So From3 is always just a register, so we store imm8 in Offset field,
- // to avoid increasing size of Prog.
- prog.From = a[1]
- prog.From3 = newAddr(a[2])
- if a[0].Type != obj.TYPE_CONST {
- p.errorf("first operand must be an immediate in %s instruction", op)
- }
- if prog.From3.Type != obj.TYPE_REG {
- p.errorf("third operand must be a register in %s instruction", op)
- }
- prog.From3.Offset = int64(p.getImmediate(prog, op, &a[0]))
+ prog.From = a[0]
+ prog.RestArgs = []obj.Addr{a[1], a[2]}
prog.To = a[3]
- prog.RegTo2 = -1
break
}
if p.arch.Family == sys.ARM64 {
prog.From = a[0]
prog.Reg = p.getRegister(prog, op, &a[1])
- prog.From3 = newAddr(a[2])
+ prog.SetFrom3(a[2])
prog.To = a[3]
break
}
@@ -693,12 +677,12 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
if arch.IsPPC64RLD(op) {
prog.From = a[0]
prog.Reg = p.getRegister(prog, op, &a[1])
- prog.From3 = newAddr(a[2])
+ prog.SetFrom3(a[2])
prog.To = a[3]
break
} else if arch.IsPPC64ISEL(op) {
// ISEL BC,RB,RA,RT becomes isel rt,ra,rb,bc
- prog.From3 = newAddr(a[2]) // ra
+ prog.SetFrom3(a[2]) // ra
prog.From = a[0] // bc
prog.Reg = p.getRegister(prog, op, &a[1]) // rb
prog.To = a[3] // rt
@@ -712,13 +696,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
if a[1].Type == obj.TYPE_REG {
prog.From = a[0]
prog.Reg = p.getRegister(prog, op, &a[1])
- prog.From3 = newAddr(a[2])
+ prog.SetFrom3(a[2])
prog.To = a[3]
break
} else if a[1].Type == obj.TYPE_CONST {
prog.From = a[0]
prog.Reg = p.getRegister(prog, op, &a[2])
- prog.From3 = newAddr(a[1])
+ prog.SetFrom3(a[1])
prog.To = a[3]
break
} else {
@@ -733,7 +717,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
}
prog.From = a[0]
prog.Reg = p.getRegister(prog, op, &a[1])
- prog.From3 = newAddr(a[2])
+ prog.SetFrom3(a[2])
prog.To = a[3]
break
}
@@ -752,10 +736,10 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
} else {
mask = (^uint32(0) >> uint(mask2+1)) & (^uint32(0) << uint(31-(mask1-1)))
}
- prog.From3 = &obj.Addr{
+ prog.SetFrom3(obj.Addr{
Type: obj.TYPE_CONST,
Offset: int64(mask),
- }
+ })
prog.To = a[4]
break
}