cmd/compile,sync/atomic: make Add And & Or SQCST on PPC64 Fixes #79186 Change-Id: If7e298270ac6252b092371725d6a96aa871bf919 Reviewed-on: https://go-review.googlesource.com/c/go/+/774020 LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Jayanth Krishnamurthy <jayanth.krishnamurthy@ibm.com> Auto-Submit: Jorropo <jorropo.pgm@gmail.com> Reviewed-by: Paul Murphy <paumurph@redhat.com>
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index b2358a9..a0d81d3 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go
@@ -135,6 +135,7 @@ // AND/OR Rarg1, Rtmp // STBCCC/STWCCC Rtmp, (Rarg0) // BNE -3(PC) + // LWSYNC ld := ppc64.ALBAR st := ppc64.ASTBCCC if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 { @@ -170,6 +171,10 @@ p3 := s.Prog(ppc64.ABNE) p3.To.Type = obj.TYPE_BRANCH p3.To.SetTarget(p) + // LWSYNC - Provide acquire ordering to pair with the + // release (pre-LWSYNC) above, making the operation + // sequentially consistent. + s.Prog(ppc64.ALWSYNC) case ssa.OpPPC64LoweredAtomicAdd32, ssa.OpPPC64LoweredAtomicAdd64: @@ -179,6 +184,7 @@ // STDCCC/STWCCC Rout, (Rarg0) // BNE -3(PC) // MOVW Rout,Rout (if Add32) + // LWSYNC ld := ppc64.ALDAR st := ppc64.ASTDCCC if v.Op == ssa.OpPPC64LoweredAtomicAdd32 { @@ -188,10 +194,10 @@ r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() out := v.Reg0() - // LWSYNC - Assuming shared data not write-through-required nor - // caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b. - plwsync := s.Prog(ppc64.ALWSYNC) - plwsync.To.Type = obj.TYPE_NONE + // LWSYNC - Provide acquire ordering to pair with the + // release (pre-LWSYNC) above, making the operation + // sequentially consistent. + s.Prog(ppc64.ALWSYNC) // LDAR or LWAR p := s.Prog(ld) p.From.Type = obj.TYPE_MEM @@ -223,6 +229,11 @@ p5.From.Type = obj.TYPE_REG p5.From.Reg = out } + // LWSYNC - Provide acquire ordering to pair with the + // release (pre-LWSYNC) above, making the operation + // sequentially consistent. + plwsync2 := s.Prog(ppc64.ALWSYNC) + plwsync2.To.Type = obj.TYPE_NONE case ssa.OpPPC64LoweredAtomicExchange8, ssa.OpPPC64LoweredAtomicExchange32,
diff --git a/src/internal/runtime/atomic/atomic_ppc64x.s b/src/internal/runtime/atomic/atomic_ppc64x.s index bff7d19..a82a34e 100644 --- a/src/internal/runtime/atomic/atomic_ppc64x.s +++ b/src/internal/runtime/atomic/atomic_ppc64x.s
@@ -220,6 +220,7 @@ ADD R5, R3 STWCCC R3, (R4) BNE -3(PC) + LWSYNC MOVW R3, ret+16(FP) RET @@ -235,6 +236,7 @@ ADD R5, R3 STDCCC R3, (R4) BNE -3(PC) + LWSYNC MOVD R3, ret+16(FP) RET @@ -343,6 +345,7 @@ OR R4, R6 STBCCC R6, (R3) BNE again + LWSYNC RET // void ·And8(byte volatile*, byte); @@ -355,6 +358,7 @@ AND R4, R6 STBCCC R6, (R3) BNE again + LWSYNC RET // func Or(addr *uint32, v uint32) @@ -367,6 +371,7 @@ OR R4, R6 STWCCC R6, (R3) BNE again + LWSYNC RET // func And(addr *uint32, v uint32) @@ -379,6 +384,7 @@ AND R4, R6 STWCCC R6, (R3) BNE again + LWSYNC RET // func Or32(addr *uint32, v uint32) old uint32 @@ -391,6 +397,7 @@ OR R4, R6, R7 STWCCC R7, (R3) BNE again + LWSYNC MOVW R6, ret+16(FP) RET @@ -404,6 +411,7 @@ AND R4, R6, R7 STWCCC R7, (R3) BNE again + LWSYNC MOVW R6, ret+16(FP) RET @@ -417,6 +425,7 @@ OR R4, R6, R7 STDCCC R7, (R3) BNE again + LWSYNC MOVD R6, ret+16(FP) RET @@ -430,6 +439,7 @@ AND R4, R6, R7 STDCCC R7, (R3) BNE again + LWSYNC MOVD R6, ret+16(FP) RET
diff --git a/test/fixedbugs/issue79186.go b/test/fixedbugs/issue79186.go new file mode 100644 index 0000000..21bb6c8 --- /dev/null +++ b/test/fixedbugs/issue79186.go
@@ -0,0 +1,67 @@ +// run + +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Issue 79186: on ppc64le (POWER8/9), atomic add operations lacked a +// post-barrier (acquire ordering), allowing loads after an RWMutex.RLock +// to be speculatively reordered before the lock acquisition, causing +// concurrent map read and map write. + +package main + +import ( + "runtime" + "sync" +) + +type M struct { + mu sync.RWMutex + m map[int]int +} + +func NewM() *M { + return &M{m: make(map[int]int)} +} + +func (x *M) Get(k int) (int, bool) { + x.mu.RLock() + v, ok := x.m[k] + x.mu.RUnlock() + return v, ok +} + +func (x *M) Set(k, v int) { + x.mu.Lock() + x.m[k] = v + x.mu.Unlock() +} + +func main() { + runtime.GOMAXPROCS(2) + + x := NewM() + + const goroutines = 256 + const iters = 200000 + + var wg sync.WaitGroup + wg.Add(goroutines) + + for g := 0; g < goroutines; g++ { + go func(id int) { + defer wg.Done() + for i := 0; i < iters; i++ { + k := (id + i) & 15 + if _, ok := x.Get(k); !ok { + x.Set(k, i) + } else if i&7 == 0 { + x.Set(k, i) + } + } + }(g) + } + + wg.Wait() +}