From 5ee1b849592787ed050ef3fbd9b2c58aabd20ff3 Mon Sep 17 00:00:00 2001 From: Richard Musiol Date: Tue, 5 Mar 2019 01:56:17 +0100 Subject: math, math/bits: add intrinsics for wasm This commit adds compiler intrinsics for the packages math and math/bits on the wasm architecture for better performance. benchmark old ns/op new ns/op delta BenchmarkCeil 8.31 3.21 -61.37% BenchmarkCopysign 5.24 3.88 -25.95% BenchmarkAbs 5.42 3.34 -38.38% BenchmarkFloor 8.29 3.18 -61.64% BenchmarkRoundToEven 9.76 3.26 -66.60% BenchmarkSqrtLatency 8.13 4.88 -39.98% BenchmarkSqrtPrime 5246 3535 -32.62% BenchmarkTrunc 8.29 3.15 -62.00% BenchmarkLeadingZeros 13.0 4.23 -67.46% BenchmarkLeadingZeros8 4.65 4.42 -4.95% BenchmarkLeadingZeros16 7.60 4.38 -42.37% BenchmarkLeadingZeros32 10.7 4.48 -58.13% BenchmarkLeadingZeros64 12.9 4.31 -66.59% BenchmarkTrailingZeros 6.52 4.04 -38.04% BenchmarkTrailingZeros8 4.57 4.14 -9.41% BenchmarkTrailingZeros16 6.69 4.16 -37.82% BenchmarkTrailingZeros32 6.97 4.23 -39.31% BenchmarkTrailingZeros64 6.59 4.00 -39.30% BenchmarkOnesCount 7.93 3.30 -58.39% BenchmarkOnesCount8 3.56 3.19 -10.39% BenchmarkOnesCount16 4.85 3.19 -34.23% BenchmarkOnesCount32 7.27 3.19 -56.12% BenchmarkOnesCount64 8.08 3.28 -59.41% BenchmarkRotateLeft 4.88 3.80 -22.13% BenchmarkRotateLeft64 5.03 3.63 -27.83% Change-Id: Ic1e0c2984878be8defb6eb7eb6ee63765c793222 Reviewed-on: https://go-review.googlesource.com/c/go/+/165177 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- test/codegen/mathbits.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'test/codegen/mathbits.go') diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index c77b66c3f7..9a4051a0ce 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -17,6 +17,7 @@ func LeadingZeros(n uint) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.LeadingZeros(n) } @@ -25,6 +26,7 @@ func LeadingZeros64(n uint64) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.LeadingZeros64(n) } @@ -33,6 +35,7 @@ func LeadingZeros32(n uint32) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZW" // mips:"CLZ" + // wasm:"I64Clz" return bits.LeadingZeros32(n) } @@ -41,6 +44,7 @@ func LeadingZeros16(n uint16) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.LeadingZeros16(n) } @@ -49,6 +53,7 @@ func LeadingZeros8(n uint8) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.LeadingZeros8(n) } @@ -61,6 +66,7 @@ func Len(n uint) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.Len(n) } @@ -69,6 +75,7 @@ func Len64(n uint64) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.Len64(n) } @@ -77,6 +84,7 @@ func Len32(n uint32) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.Len32(n) } @@ -85,6 +93,7 @@ func Len16(n uint16) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.Len16(n) } @@ -93,6 +102,7 @@ func Len8(n uint8) int { // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" + // wasm:"I64Clz" return bits.Len8(n) } @@ -106,6 +116,7 @@ func OnesCount(n uint) int { // s390x:"POPCNT" // ppc64:"POPCNTD" // ppc64le:"POPCNTD" + // wasm:"I64Popcnt" return bits.OnesCount(n) } @@ -115,6 +126,7 @@ func OnesCount64(n uint64) int { // s390x:"POPCNT" // ppc64:"POPCNTD" // ppc64le:"POPCNTD" + // wasm:"I64Popcnt" return bits.OnesCount64(n) } @@ -124,6 +136,7 @@ func OnesCount32(n uint32) int { // s390x:"POPCNT" // ppc64:"POPCNTW" // ppc64le:"POPCNTW" + // wasm:"I64Popcnt" return bits.OnesCount32(n) } @@ -133,6 +146,7 @@ func OnesCount16(n uint16) int { // s390x:"POPCNT" // ppc64:"POPCNTW" // ppc64le:"POPCNTW" + // wasm:"I64Popcnt" return bits.OnesCount16(n) } @@ -140,6 +154,7 @@ func OnesCount8(n uint8) int { // s390x:"POPCNT" // ppc64:"POPCNTB" // ppc64le:"POPCNTB" + // wasm:"I64Popcnt" return bits.OnesCount8(n) } @@ -187,6 +202,7 @@ func RotateLeft64(n uint64) uint64 { // ppc64:"ROTL" // ppc64le:"ROTL" // s390x:"RLLG" + // wasm:"I64Rotl" return bits.RotateLeft64(n, 37) } @@ -246,6 +262,7 @@ func TrailingZeros(n uint) int { // s390x:"FLOGR" // ppc64:"ANDN","POPCNTD" // ppc64le:"ANDN","POPCNTD" + // wasm:"I64Ctz" return bits.TrailingZeros(n) } @@ -255,6 +272,7 @@ func TrailingZeros64(n uint64) int { // s390x:"FLOGR" // ppc64:"ANDN","POPCNTD" // ppc64le:"ANDN","POPCNTD" + // wasm:"I64Ctz" return bits.TrailingZeros64(n) } @@ -264,6 +282,7 @@ func TrailingZeros32(n uint32) int { // s390x:"FLOGR","MOVWZ" // ppc64:"ANDN","POPCNTW" // ppc64le:"ANDN","POPCNTW" + // wasm:"I64Ctz" return bits.TrailingZeros32(n) } @@ -273,6 +292,7 @@ func TrailingZeros16(n uint16) int { // s390x:"FLOGR","OR\t\\$65536" // ppc64:"POPCNTD","OR\\t\\$65536" // ppc64le:"POPCNTD","OR\\t\\$65536" + // wasm:"I64Ctz" return bits.TrailingZeros16(n) } @@ -280,6 +300,7 @@ func TrailingZeros8(n uint8) int { // amd64:"BSFL","BTSL\\t\\$8" // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" // s390x:"FLOGR","OR\t\\$256" + // wasm:"I64Ctz" return bits.TrailingZeros8(n) } -- cgit v1.3