diff options
| author | Michael Pratt <mpratt@google.com> | 2020-10-08 14:38:39 -0400 |
|---|---|---|
| committer | Michael Pratt <mpratt@google.com> | 2020-10-23 14:17:57 +0000 |
| commit | ad61343f886cc5ce677e7bd62385144b2ba7b8f5 (patch) | |
| tree | a48e880416e8464ffa21e700772545d40d87b6c4 /src/runtime/internal/atomic | |
| parent | 7e01b3b3879593828b89f4ff4a04667a547b22d9 (diff) | |
| download | go-ad61343f886cc5ce677e7bd62385144b2ba7b8f5.tar.xz | |
runtime/internal/atomic: add 32-bit And/Or
These will be used in a following CL to perform larger bit clear and bit
set than And8/Or8.
Change-Id: I60f7b1099e29b69eb64add77564faee862880a8d
Reviewed-on: https://go-review.googlesource.com/c/go/+/260977
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: Michael Pratt <mpratt@google.com>
Diffstat (limited to 'src/runtime/internal/atomic')
20 files changed, 400 insertions, 8 deletions
diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index 7ebf675ac5..d82faef1f0 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -243,3 +243,19 @@ TEXT ·Store8(SB), NOSPLIT, $0-5 MOVB val+4(FP), AX XCHGB AX, 0(BX) RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-8 + MOVL ptr+0(FP), AX + MOVL val+4(FP), BX + LOCK + ORL BX, (AX) + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-8 + MOVL ptr+0(FP), AX + MOVL val+4(FP), BX + LOCK + ANDL BX, (AX) + RET diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s index 80fb31285d..2cf7c55870 100644 --- a/src/runtime/internal/atomic/asm_amd64.s +++ b/src/runtime/internal/atomic/asm_amd64.s @@ -169,3 +169,19 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 LOCK ANDB BX, (AX) RET + +// func Or(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12 + MOVQ ptr+0(FP), AX + MOVL val+8(FP), BX + LOCK + ORL BX, (AX) + RET + +// func And(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12 + MOVQ ptr+0(FP), AX + MOVL val+8(FP), BX + LOCK + ANDL BX, (AX) + RET diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s index 03fb822929..a515683ebb 100644 --- a/src/runtime/internal/atomic/asm_mips64x.s +++ b/src/runtime/internal/atomic/asm_mips64x.s @@ -243,3 +243,29 @@ TEXT ·And8(SB), NOSPLIT, $0-9 BEQ R4, -4(PC) SYNC RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R1 + MOVW val+8(FP), R2 + + SYNC + LL (R1), R3 + OR R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R1 + MOVW val+8(FP), R2 + + SYNC + LL (R1), R3 + AND R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s index 63bb548825..2b2cfabe08 100644 --- a/src/runtime/internal/atomic/asm_mipsx.s +++ b/src/runtime/internal/atomic/asm_mipsx.s @@ -172,3 +172,29 @@ try_and8: BEQ R4, try_and8 SYNC RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-8 + MOVW ptr+0(FP), R1 + MOVW val+4(FP), R2 + + SYNC + LL (R1), R3 + OR R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-8 + MOVW ptr+0(FP), R1 + MOVW val+4(FP), R2 + + SYNC + LL (R1), R3 + AND R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s index c0237de4d0..bb009ab34d 100644 --- a/src/runtime/internal/atomic/asm_ppc64x.s +++ b/src/runtime/internal/atomic/asm_ppc64x.s @@ -222,8 +222,32 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 MOVBZ val+8(FP), R4 LWSYNC again: - LBAR (R3),R6 - AND R4,R6 - STBCCC R6,(R3) + LBAR (R3), R6 + AND R4, R6 + STBCCC R6, (R3) + BNE again + RET + +// func Or(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LWSYNC +again: + LWAR (R3), R6 + OR R4, R6 + STWCCC R6, (R3) + BNE again + RET + +// func And(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LWSYNC +again: + LWAR (R3),R6 + AND R4, R6 + STWCCC R6, (R3) BNE again RET diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s index 9a19bc0ece..daf1f3cc9f 100644 --- a/src/runtime/internal/atomic/asm_s390x.s +++ b/src/runtime/internal/atomic/asm_s390x.s @@ -174,8 +174,8 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 // func Or8(addr *uint8, v uint8) TEXT ·Or8(SB), NOSPLIT, $0-9 - MOVD ptr+0(FP), R3 - MOVBZ val+8(FP), R4 + MOVD ptr+0(FP), R3 + MOVBZ val+8(FP), R4 // We don't have atomic operations that work on individual bytes so we // need to align addr down to a word boundary and create a mask // containing v to OR with the entire word atomically. @@ -188,8 +188,8 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 // func And8(addr *uint8, v uint8) TEXT ·And8(SB), NOSPLIT, $0-9 - MOVD ptr+0(FP), R3 - MOVBZ val+8(FP), R4 + MOVD ptr+0(FP), R3 + MOVBZ val+8(FP), R4 // We don't have atomic operations that work on individual bytes so we // need to align addr down to a word boundary and create a mask // containing v to AND with the entire word atomically. @@ -200,3 +200,17 @@ TEXT ·And8(SB), NOSPLIT, $0-9 RLL R5, R4, R4 // R4 = rotl(R4, R5) LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic) RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic) + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic) + RET diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go index 06ce6a5356..1bfcb1143d 100644 --- a/src/runtime/internal/atomic/atomic_386.go +++ b/src/runtime/internal/atomic/atomic_386.go @@ -69,6 +69,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go index 1b71a16d94..e36eb83a11 100644 --- a/src/runtime/internal/atomic/atomic_amd64.go +++ b/src/runtime/internal/atomic/atomic_amd64.go @@ -77,6 +77,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go index 67d529c1cb..546b3d6120 100644 --- a/src/runtime/internal/atomic/atomic_arm.go +++ b/src/runtime/internal/atomic/atomic_arm.go @@ -183,6 +183,26 @@ func And8(addr *uint8, v uint8) { } //go:nosplit +func Or(addr *uint32, v uint32) { + for { + old := *addr + if Cas(addr, old, old|v) { + return + } + } +} + +//go:nosplit +func And(addr *uint32, v uint32) { + for { + old := *addr + if Cas(addr, old, old&v) { + return + } + } +} + +//go:nosplit func armcas(ptr *uint32, old, new uint32) bool //go:noescape diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go index c9b4322fe9..d49bee8936 100644 --- a/src/runtime/internal/atomic/atomic_arm64.go +++ b/src/runtime/internal/atomic/atomic_arm64.go @@ -54,6 +54,12 @@ func Or8(ptr *uint8, val uint8) func And8(ptr *uint8, val uint8) //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s index 36c7698b18..0cf3c40223 100644 --- a/src/runtime/internal/atomic/atomic_arm64.s +++ b/src/runtime/internal/atomic/atomic_arm64.s @@ -164,3 +164,22 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 CBNZ R3, -3(PC) RET +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 + LDAXRW (R0), R2 + AND R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, -3(PC) + RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 + LDAXRW (R0), R2 + ORR R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, -3(PC) + RET diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go index fca2242514..b0109d72b0 100644 --- a/src/runtime/internal/atomic/atomic_mips64x.go +++ b/src/runtime/internal/atomic/atomic_mips64x.go @@ -56,6 +56,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go index be1e6a038b..1336b50121 100644 --- a/src/runtime/internal/atomic/atomic_mipsx.go +++ b/src/runtime/internal/atomic/atomic_mipsx.go @@ -142,6 +142,12 @@ func And8(ptr *uint8, val uint8) func Or8(ptr *uint8, val uint8) //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Store(ptr *uint32, val uint32) //go:noescape diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go index e759bb27a2..e4b109f0ec 100644 --- a/src/runtime/internal/atomic/atomic_ppc64x.go +++ b/src/runtime/internal/atomic/atomic_ppc64x.go @@ -56,6 +56,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go index 617bc1a3eb..8f24d61625 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.go +++ b/src/runtime/internal/atomic/atomic_riscv64.go @@ -52,6 +52,12 @@ func Or8(ptr *uint8, val uint8) func And8(ptr *uint8, val uint8) //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Cas64(ptr *uint64, old, new uint64) bool //go:noescape diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s index db139d690a..74c896cea6 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.s +++ b/src/runtime/internal/atomic/atomic_riscv64.s @@ -242,3 +242,17 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 SLL A2, A1 AMOORW A1, (A0), ZERO RET + +// func And(ptr *uint32, val uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOV ptr+0(FP), A0 + MOVW val+8(FP), A1 + AMOANDW A1, (A0), ZERO + RET + +// func Or(ptr *uint32, val uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOV ptr+0(FP), A0 + MOVW val+8(FP), A1 + AMOORW A1, (A0), ZERO + RET diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go index b649caa39f..a058d60102 100644 --- a/src/runtime/internal/atomic/atomic_s390x.go +++ b/src/runtime/internal/atomic/atomic_s390x.go @@ -92,6 +92,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + +//go:noescape func Xadd(ptr *uint32, delta int32) uint32 //go:noescape diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go index a9f95077c0..c9c2eba248 100644 --- a/src/runtime/internal/atomic/atomic_test.go +++ b/src/runtime/internal/atomic/atomic_test.go @@ -150,6 +150,45 @@ func TestAnd8(t *testing.T) { } } +func TestAnd(t *testing.T) { + // Basic sanity check. + x := uint32(0xffffffff) + for i := uint32(0); i < 32; i++ { + atomic.And(&x, ^(1 << i)) + if r := uint32(0xffffffff) << (i + 1); x != r { + t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1<<i), r, x) + } + } + + // Set every bit in array to 1. + a := make([]uint32, 1<<12) + for i := range a { + a[i] = 0xffffffff + } + + // Clear array bit-by-bit in different goroutines. + done := make(chan bool) + for i := 0; i < 32; i++ { + m := ^uint32(1 << i) + go func() { + for i := range a { + atomic.And(&a[i], m) + } + done <- true + }() + } + for i := 0; i < 32; i++ { + <-done + } + + // Check that the array has been totally cleared. + for i, v := range a { + if v != 0 { + t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v) + } + } +} + func TestOr8(t *testing.T) { // Basic sanity check. x := uint8(0) @@ -186,7 +225,43 @@ func TestOr8(t *testing.T) { } } -func TestBitwiseContended(t *testing.T) { +func TestOr(t *testing.T) { + // Basic sanity check. + x := uint32(0) + for i := uint32(0); i < 32; i++ { + atomic.Or(&x, 1<<i) + if r := (uint32(1) << (i + 1)) - 1; x != r { + t.Fatalf("setting bit %#x: want %#x, got %#x", uint32(1)<<i, r, x) + } + } + + // Start with every bit in array set to 0. + a := make([]uint32, 1<<12) + + // Set every bit in array bit-by-bit in different goroutines. + done := make(chan bool) + for i := 0; i < 32; i++ { + m := uint32(1 << i) + go func() { + for i := range a { + atomic.Or(&a[i], m) + } + done <- true + }() + } + for i := 0; i < 32; i++ { + <-done + } + + // Check that the array has been totally set. + for i, v := range a { + if v != 0xffffffff { + t.Fatalf("a[%v] not fully set: want %#x, got %#x", i, uint32(0xffffffff), v) + } + } +} + +func TestBitwiseContended8(t *testing.T) { // Start with every bit in array set to 0. a := make([]uint8, 16) @@ -228,6 +303,48 @@ func TestBitwiseContended(t *testing.T) { } } +func TestBitwiseContended(t *testing.T) { + // Start with every bit in array set to 0. + a := make([]uint32, 16) + + // Iterations to try. + N := 1 << 16 + if testing.Short() { + N = 1 << 10 + } + + // Set and then clear every bit in the array bit-by-bit in different goroutines. + done := make(chan bool) + for i := 0; i < 32; i++ { + m := uint32(1 << i) + go func() { + for n := 0; n < N; n++ { + for i := range a { + atomic.Or(&a[i], m) + if atomic.Load(&a[i])&m != m { + t.Errorf("a[%v] bit %#x not set", i, m) + } + atomic.And(&a[i], ^m) + if atomic.Load(&a[i])&m != 0 { + t.Errorf("a[%v] bit %#x not clear", i, m) + } + } + } + done <- true + }() + } + for i := 0; i < 32; i++ { + <-done + } + + // Check that the array has been totally cleared. + for i, v := range a { + if v != 0 { + t.Fatalf("a[%v] not cleared: want %#x, got %#x", i, uint32(0), v) + } + } +} + func TestStorepNoWB(t *testing.T) { var p [2]*int for i := range p { diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go index 60a4942884..b05d98ed51 100644 --- a/src/runtime/internal/atomic/atomic_wasm.go +++ b/src/runtime/internal/atomic/atomic_wasm.go @@ -133,6 +133,18 @@ func Or8(ptr *uint8, val uint8) { //go:nosplit //go:noinline +func And(ptr *uint32, val uint32) { + *ptr = *ptr & val +} + +//go:nosplit +//go:noinline +func Or(ptr *uint32, val uint32) { + *ptr = *ptr | val +} + +//go:nosplit +//go:noinline func Cas64(ptr *uint64, old, new uint64) bool { if *ptr == old { *ptr = new diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go index de71b0f2c7..434aa6d434 100644 --- a/src/runtime/internal/atomic/bench_test.go +++ b/src/runtime/internal/atomic/bench_test.go @@ -51,6 +51,14 @@ func BenchmarkAnd8(b *testing.B) { } } +func BenchmarkAnd(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + for i := 0; i < b.N; i++ { + atomic.And(&x[63], uint32(i)) + } +} + func BenchmarkAnd8Parallel(b *testing.B) { var x [512]uint8 // give byte its own cache line sink = &x @@ -63,6 +71,18 @@ func BenchmarkAnd8Parallel(b *testing.B) { }) } +func BenchmarkAndParallel(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + b.RunParallel(func(pb *testing.PB) { + i := uint32(0) + for pb.Next() { + atomic.And(&x[63], i) + i++ + } + }) +} + func BenchmarkOr8(b *testing.B) { var x [512]uint8 // give byte its own cache line sink = &x @@ -71,6 +91,14 @@ func BenchmarkOr8(b *testing.B) { } } +func BenchmarkOr(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + for i := 0; i < b.N; i++ { + atomic.Or(&x[63], uint32(i)) + } +} + func BenchmarkOr8Parallel(b *testing.B) { var x [512]uint8 // give byte its own cache line sink = &x @@ -83,6 +111,18 @@ func BenchmarkOr8Parallel(b *testing.B) { }) } +func BenchmarkOrParallel(b *testing.B) { + var x [128]uint32 // give x its own cache line + sink = &x + b.RunParallel(func(pb *testing.PB) { + i := uint32(0) + for pb.Next() { + atomic.Or(&x[63], i) + i++ + } + }) +} + func BenchmarkXadd(b *testing.B) { var x uint32 ptr := &x |
