aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile
diff options
context:
space:
mode:
authorMichael Munday <mndygolang+git@gmail.com>2025-06-27 21:05:38 +0100
committerMichael Munday <mndygolang+git@gmail.com>2025-07-30 12:29:15 -0700
commitcedf63616a00c8a6a69f6bbe4bc93b6865cec842 (patch)
treee94c1a8afe63304f07725460363f7957325566b3 /src/cmd/compile
parent82a1921c3b6b493c4ff358f063c2ffffabba9fd5 (diff)
downloadgo-cedf63616a00c8a6a69f6bbe4bc93b6865cec842.tar.xz
cmd/compile: add floating point min/max intrinsics on s390x
Add the VECTOR FP (MINIMUM|MAXIMUM) instructions to the assembler and use them in the compiler to implement min and max. Note: I've allowed floating point registers to be used with the single element instructions (those with the W instead of V prefix) to allow easier integration into the compiler. Change-Id: I5f80a510bd248cf483cce95f1979bf63fbae7de6 Reviewed-on: https://go-review.googlesource.com/c/go/+/684715 Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Mark Freeman <mark@golang.org> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/cmd/compile')
-rw-r--r--src/cmd/compile/internal/s390x/ssa.go4
-rw-r--r--src/cmd/compile/internal/ssa/_gen/S390X.rules3
-rw-r--r--src/cmd/compile/internal/ssa/_gen/S390XOps.go6
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go60
-rw-r--r--src/cmd/compile/internal/ssa/rewriteS390X.go12
-rw-r--r--src/cmd/compile/internal/ssagen/ssa.go2
6 files changed, 86 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go
index 4d24881dba..ad66bfb5d8 100644
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@@ -281,6 +281,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpS390XCPSDR:
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
p.Reg = v.Args[0].Reg()
+ case ssa.OpS390XWFMAXDB, ssa.OpS390XWFMAXSB,
+ ssa.OpS390XWFMINDB, ssa.OpS390XWFMINSB:
+ p := opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), 1 /* Java Math.Max() */)
+ p.AddRestSource(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
ssa.OpS390XMODD, ssa.OpS390XMODW,
diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules
index 231ad0615d..80e12f8e29 100644
--- a/src/cmd/compile/internal/ssa/_gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules
@@ -145,6 +145,9 @@
(Sqrt32 ...) => (FSQRTS ...)
+(Max(64|32)F ...) => (WFMAX(D|S)B ...)
+(Min(64|32)F ...) => (WFMIN(D|S)B ...)
+
// Atomic loads and stores.
// The SYNC instruction (fast-BCR-serialization) prevents store-load
// reordering. Other sequences of memory operations (load-load,
diff --git a/src/cmd/compile/internal/ssa/_gen/S390XOps.go b/src/cmd/compile/internal/ssa/_gen/S390XOps.go
index 2f57d12630..38fb3cb074 100644
--- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go
@@ -222,6 +222,12 @@ func init() {
{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit
{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0
+ // Single element vector floating point min / max instructions
+ {name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
+ {name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
+ {name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
+ {name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
+
// Round to integer, float64 only.
//
// aux | rounding mode
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index e88af66f5f..36c1815ea2 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2655,6 +2655,10 @@ const (
OpS390XLPDFR
OpS390XLNDFR
OpS390XCPSDR
+ OpS390XWFMAXDB
+ OpS390XWFMAXSB
+ OpS390XWFMINDB
+ OpS390XWFMINSB
OpS390XFIDBR
OpS390XFMOVSload
OpS390XFMOVDload
@@ -35776,6 +35780,62 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "WFMAXDB",
+ argLen: 2,
+ asm: s390x.AWFMAXDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ },
+ },
+ {
+ name: "WFMAXSB",
+ argLen: 2,
+ asm: s390x.AWFMAXSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ },
+ },
+ {
+ name: "WFMINDB",
+ argLen: 2,
+ asm: s390x.AWFMINDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ },
+ },
+ {
+ name: "WFMINSB",
+ argLen: 2,
+ asm: s390x.AWFMINSB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ },
+ },
+ {
name: "FIDBR",
auxType: auxInt8,
argLen: 1,
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index 2e7492501a..a7fde81c47 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -368,6 +368,18 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpLsh8x64(v)
case OpLsh8x8:
return rewriteValueS390X_OpLsh8x8(v)
+ case OpMax32F:
+ v.Op = OpS390XWFMAXSB
+ return true
+ case OpMax64F:
+ v.Op = OpS390XWFMAXDB
+ return true
+ case OpMin32F:
+ v.Op = OpS390XWFMINSB
+ return true
+ case OpMin64F:
+ v.Op = OpS390XWFMINDB
+ return true
case OpMod16:
return rewriteValueS390X_OpMod16(v)
case OpMod16u:
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 3deb0ecf23..bce94d35f9 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -3986,7 +3986,7 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
if typ.IsFloat() {
hasIntrinsic := false
switch Arch.LinkArch.Family {
- case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64:
+ case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64, sys.S390X:
hasIntrinsic = true
case sys.PPC64:
hasIntrinsic = buildcfg.GOPPC64 >= 9