semaphore: cancel acquisition with a done context

When acquiring from a semaphore could proceed without contention, the
previous behavior was to always do so, even when the provided context
was done. This was the documented behavior, but it could lead to
confusion. It isn't much more expensive to check the context error, so
cancel acquisition if it's done.

Fixes golang/go#63615.

goos: linux
goarch: amd64
pkg: golang.org/x/sync/semaphore
cpu: 12th Gen Intel(R) Core(TM) i5-1235U
                                        │  old.bench  │             new.bench              │
                                        │   sec/op    │   sec/op     vs base               │
AcquireSeq/Weighted-acquire-1-1-1-12      26.45n ± 2%   27.25n ± 3%  +3.04% (p=0.001 n=20)
AcquireSeq/Weighted-acquire-2-1-1-12      26.96n ± 1%   27.12n ± 1%       ~ (p=0.104 n=20)
AcquireSeq/Weighted-acquire-16-1-1-12     26.07n ± 3%   27.48n ± 1%  +5.45% (p=0.000 n=20)
AcquireSeq/Weighted-acquire-128-1-1-12    26.19n ± 2%   27.24n ± 1%  +4.01% (p=0.000 n=20)
AcquireSeq/Weighted-acquire-2-2-1-12      25.61n ± 1%   25.99n ± 2%       ~ (p=0.066 n=20)
AcquireSeq/Weighted-acquire-16-2-8-12     209.6n ± 2%   211.0n ± 3%       ~ (p=0.280 n=20)
AcquireSeq/Weighted-acquire-128-2-64-12   1.669µ ± 1%   1.721µ ± 2%  +3.09% (p=0.000 n=20)
AcquireSeq/Weighted-acquire-2-1-2-12      51.08n ± 1%   53.03n ± 2%  +3.82% (p=0.000 n=20)
AcquireSeq/Weighted-acquire-16-8-2-12     52.48n ± 2%   53.66n ± 2%  +2.26% (p=0.028 n=20)
AcquireSeq/Weighted-acquire-128-64-2-12   52.27n ± 1%   53.71n ± 2%  +2.75% (p=0.000 n=20)
geomean                                   60.06n        61.69n       +2.71%

Change-Id: I0ae1a0bb6c027461ac1a9ee71c51efd8427ab308
Reviewed-on: https://go-review.googlesource.com/c/sync/+/536275
Auto-Submit: Bryan Mills <bcmills@google.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/semaphore/semaphore.go b/semaphore/semaphore.go
index 30f632c..b618162 100644
--- a/semaphore/semaphore.go
+++ b/semaphore/semaphore.go
@@ -35,11 +35,25 @@
 // Acquire acquires the semaphore with a weight of n, blocking until resources
 // are available or ctx is done. On success, returns nil. On failure, returns
 // ctx.Err() and leaves the semaphore unchanged.
-//
-// If ctx is already done, Acquire may still succeed without blocking.
 func (s *Weighted) Acquire(ctx context.Context, n int64) error {
+	done := ctx.Done()
+
 	s.mu.Lock()
+	select {
+	case <-done:
+		// ctx becoming done has "happened before" acquiring the semaphore,
+		// whether it became done before the call began or while we were
+		// waiting for the mutex. We prefer to fail even if we could acquire
+		// the mutex without blocking.
+		s.mu.Unlock()
+		return ctx.Err()
+	default:
+	}
 	if s.size-s.cur >= n && s.waiters.Len() == 0 {
+		// Since we hold s.mu and haven't synchronized since checking done, if
+		// ctx becomes done before we return here, it becoming done must have
+		// "happened concurrently" with this call - it cannot "happen before"
+		// we return in this branch. So, we're ok to always acquire here.
 		s.cur += n
 		s.mu.Unlock()
 		return nil
@@ -48,7 +62,7 @@
 	if n > s.size {
 		// Don't make other Acquire calls block on one that's doomed to fail.
 		s.mu.Unlock()
-		<-ctx.Done()
+		<-done
 		return ctx.Err()
 	}
 
@@ -58,14 +72,14 @@
 	s.mu.Unlock()
 
 	select {
-	case <-ctx.Done():
-		err := ctx.Err()
+	case <-done:
 		s.mu.Lock()
 		select {
 		case <-ready:
-			// Acquired the semaphore after we were canceled.  Rather than trying to
-			// fix up the queue, just pretend we didn't notice the cancelation.
-			err = nil
+			// Acquired the semaphore after we were canceled.
+			// Pretend we didn't and put the tokens back.
+			s.cur -= n
+			s.notifyWaiters()
 		default:
 			isFront := s.waiters.Front() == elem
 			s.waiters.Remove(elem)
@@ -75,9 +89,19 @@
 			}
 		}
 		s.mu.Unlock()
-		return err
+		return ctx.Err()
 
 	case <-ready:
+		// Acquired the semaphore. Check that ctx isn't already done.
+		// We check the done channel instead of calling ctx.Err because we
+		// already have the channel, and ctx.Err is O(n) with the nesting
+		// depth of ctx.
+		select {
+		case <-done:
+			s.Release(n)
+			return ctx.Err()
+		default:
+		}
 		return nil
 	}
 }
diff --git a/semaphore/semaphore_test.go b/semaphore/semaphore_test.go
index 6e8eca2..61012d6 100644
--- a/semaphore/semaphore_test.go
+++ b/semaphore/semaphore_test.go
@@ -200,3 +200,38 @@
 	}
 	sem.Release(1)
 }
+
+func TestWeightedAcquireCanceled(t *testing.T) {
+	// https://go.dev/issue/63615
+	sem := semaphore.NewWeighted(2)
+	ctx, cancel := context.WithCancel(context.Background())
+	sem.Acquire(context.Background(), 1)
+	ch := make(chan struct{})
+	go func() {
+		// Synchronize with the Acquire(2) below.
+		for sem.TryAcquire(1) {
+			sem.Release(1)
+		}
+		// Now cancel ctx, and then release the token.
+		cancel()
+		sem.Release(1)
+		close(ch)
+	}()
+	// Since the context closing happens before enough tokens become available,
+	// this Acquire must fail.
+	if err := sem.Acquire(ctx, 2); err != context.Canceled {
+		t.Errorf("Acquire with canceled context returned wrong error: want context.Canceled, got %v", err)
+	}
+	// There must always be two tokens in the semaphore after the other
+	// goroutine releases the one we held at the start.
+	<-ch
+	if !sem.TryAcquire(2) {
+		t.Fatal("TryAcquire after canceled Acquire failed")
+	}
+	// Additionally verify that we don't acquire with a done context even when
+	// we wouldn't need to block to do so.
+	sem.Release(2)
+	if err := sem.Acquire(ctx, 1); err != context.Canceled {
+		t.Errorf("Acquire with canceled context returned wrong error: want context.Canceled, got %v", err)
+	}
+}