From e8f0340fa483c6deb0bf7bba9788a2dd0f2f8a59 Mon Sep 17 00:00:00 2001 From: Wayne Zuo Date: Tue, 9 Aug 2022 23:53:37 +0800 Subject: cmd/compile: intrinsify RotateLeft{32,64} on loong64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall Run-TryBot: Wayne Zuo Reviewed-by: Ian Lance Taylor Reviewed-by: David Chase --- test/codegen/rotate.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'test/codegen') diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index 5876050ba0..f42993532d 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -18,6 +18,7 @@ func rot64(x uint64) uint64 { // amd64:"ROLQ\t[$]7" // ppc64:"ROTL\t[$]7" // ppc64le:"ROTL\t[$]7" + // loong64: "ROTRV\t[$]57" a += x<<7 | x>>57 // amd64:"ROLQ\t[$]8" @@ -25,6 +26,7 @@ func rot64(x uint64) uint64 { // s390x:"RISBGZ\t[$]0, [$]63, [$]8, " // ppc64:"ROTL\t[$]8" // ppc64le:"ROTL\t[$]8" + // loong64: "ROTRV\t[$]56" a += x<<8 + x>>56 // amd64:"ROLQ\t[$]9" @@ -32,6 +34,7 @@ func rot64(x uint64) uint64 { // s390x:"RISBGZ\t[$]0, [$]63, [$]9, " // ppc64:"ROTL\t[$]9" // ppc64le:"ROTL\t[$]9" + // loong64: "ROTRV\t[$]55" a += x<<9 ^ x>>55 // amd64:"ROLQ\t[$]10" @@ -41,6 +44,7 @@ func rot64(x uint64) uint64 { // ppc64le:"ROTL\t[$]10" // arm64:"ROR\t[$]54" // s390x:"RISBGZ\t[$]0, [$]63, [$]10, " + // loong64: "ROTRV\t[$]54" a += bits.RotateLeft64(x, 10) return a @@ -53,6 +57,7 @@ func rot32(x uint32) uint32 { // arm:"MOVW\tR\\d+@>25" // ppc64:"ROTLW\t[$]7" // ppc64le:"ROTLW\t[$]7" + // loong64: "ROTR\t[$]25" a += x<<7 | x>>25 // amd64:`ROLL\t[$]8` @@ -61,6 +66,7 @@ func rot32(x uint32) uint32 { // s390x:"RLL\t[$]8" // ppc64:"ROTLW\t[$]8" // ppc64le:"ROTLW\t[$]8" + // loong64: "ROTR\t[$]24" a += x<<8 + x>>24 // amd64:"ROLL\t[$]9" @@ -69,6 +75,7 @@ func rot32(x uint32) uint32 { // s390x:"RLL\t[$]9" // ppc64:"ROTLW\t[$]9" // ppc64le:"ROTLW\t[$]9" + // loong64: "ROTR\t[$]23" a += x<<9 ^ x>>23 // amd64:"ROLL\t[$]10" @@ -79,6 +86,7 @@ func rot32(x uint32) uint32 { // ppc64le:"ROTLW\t[$]10" // arm64:"RORW\t[$]22" // s390x:"RLL\t[$]10" + // loong64: "ROTR\t[$]22" a += bits.RotateLeft32(x, 10) return a @@ -127,12 +135,14 @@ func rot64nc(x uint64, z uint) uint64 { // arm64:"ROR","NEG",-"AND" // ppc64:"ROTL",-"NEG",-"AND" // ppc64le:"ROTL",-"NEG",-"AND" + // loong64: "ROTRV", -"AND" a += x<>(64-z) // amd64:"RORQ",-"AND" // arm64:"ROR",-"NEG",-"AND" // ppc64:"ROTL","NEG",-"AND" // ppc64le:"ROTL","NEG",-"AND" + // loong64: "ROTRV", -"AND" a += x>>z | x<<(64-z) return a @@ -147,12 +157,14 @@ func rot32nc(x uint32, z uint) uint32 { // arm64:"ROR","NEG",-"AND" // ppc64:"ROTLW",-"NEG",-"AND" // ppc64le:"ROTLW",-"NEG",-"AND" + // loong64: "ROTR", -"AND" a += x<>(32-z) // amd64:"RORL",-"AND" // arm64:"ROR",-"NEG",-"AND" // ppc64:"ROTLW","NEG",-"AND" // ppc64le:"ROTLW","NEG",-"AND" + // loong64: "ROTR", -"AND" a += x>>z | x<<(32-z) return a -- cgit v1.3