aboutsummaryrefslogtreecommitdiff
path: root/src/testing/testing.go
diff options
context:
space:
mode:
authorBryan C. Mills <bcmills@google.com>2023-06-28 09:37:47 -0400
committerGopher Robot <gobot@golang.org>2023-11-21 22:58:46 +0000
commite6b76bfc4675eae8d1e4c7e8dc28897339b13824 (patch)
treedc62e2a94a7162476d9d3417853142f7c3925453 /src/testing/testing.go
parentf5bf9fb278c473104b0b987fc1dd165566cbec71 (diff)
downloadgo-e6b76bfc4675eae8d1e4c7e8dc28897339b13824.tar.xz
testing: simplify concurrency and cleanup logic
While investigating #60083, I found a couple of bugs (notably #61034) that had slipped through code review in part because the concurrency patterns used in the testing package were too complex for me to fully reason about. This change adjusts those patterns to be more in line with current idioms, and to reduce the number of special cases that depend on details that should be orthogonal. (For example: the details of how we invoke the Cleanup functions should not depend on whether the test happened to run any parallel subtests.) In the process, this change fixes a handful of bugs: - Concurrent calls to Run (explicitly allowed by TestParallelSub) could previously drive the testcontext.running count negative, causing the number of running parallel tests to exceed the -parallel flag. - The -failfast flag now takes effect immediately on failure. It no longer delays until the test finishes, and no longer misses failures during cleanup (fixing #61034). - If a Cleanup function calls runtime.Goexit (typically via t.FailNow) during a panic, Cleanup functions from its parent tests are no longer skipped and buffered logs from its parent tests are now flushed. - The time reported for a test with subtests now includes the time spent running those subtests, regardless of whether they are parallel. (Previously, non-parallel subtests were included but parallel subtests were not.) - Calls to (*B).Run in iterations after the first are now diagnosed with a panic. (This diagnoses badly-behaved benchmarks: if Run is called during the first iteration, no subsequent iterations are supposed to occur.) Fixes #61034. Change-Id: I3797f6ef5210a3d2d5d6c2710d3f35c0219b02ea Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race,gotip-windows-amd64-longtest Reviewed-on: https://go-review.googlesource.com/c/go/+/506755 Auto-Submit: Bryan Mills <bcmills@google.com> Reviewed-by: Alan Donovan <adonovan@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/testing/testing.go')
-rw-r--r--src/testing/testing.go402
1 files changed, 201 insertions, 201 deletions
diff --git a/src/testing/testing.go b/src/testing/testing.go
index ed8b3630f1..96f71c89b9 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -480,7 +480,7 @@ var (
cpuList []int
testlogFile *os.File
- numFailed atomic.Uint32 // number of test failures
+ anyFailed atomic.Bool
running sync.Map // map[string]time.Time of running, unpaused tests
)
@@ -593,38 +593,40 @@ const maxStackLen = 50
// captures common methods such as Errorf.
type common struct {
mu sync.RWMutex // guards this group of fields
- output []byte // Output generated by test or benchmark.
- w io.Writer // For flushToParent.
- ran bool // Test or benchmark (or one of its subtests) was executed.
+ output []byte // output generated by test or benchmark
+ w io.Writer // output to which flushToParent should write
+ ranAnyLeaf bool // Test or benchmark ran to completion without calling Run itself, or had a subtest that did so.
failed bool // Test or benchmark has failed.
skipped bool // Test or benchmark has been skipped.
+ goexiting bool // Test function is attempting to abort by goexit (implies failed || skipped).
done bool // Test is finished and all subtests have completed.
helperPCs map[uintptr]struct{} // functions to be skipped when writing file/line info
helperNames map[string]struct{} // helperPCs converted to function names
cleanups []func() // optional functions to be called at the end of the test
- cleanupName string // Name of the cleanup function.
- cleanupPc []uintptr // The stack trace at the point where Cleanup was called.
- finished bool // Test function has completed.
- inFuzzFn bool // Whether the fuzz target, if this is one, is running.
+ cleanupName string // name of the cleanup function currently running
+ cleanupPc []uintptr // stack trace at the point where Cleanup was called
- chatty *chattyPrinter // A copy of chattyPrinter, if the chatty flag is set.
- bench bool // Whether the current test is a benchmark.
- hasSub atomic.Bool // whether there are sub-benchmarks.
- cleanupStarted atomic.Bool // Registered cleanup callbacks have started to execute
- runner string // Function name of tRunner running the test.
- isParallel bool // Whether the test is parallel.
+ chatty *chattyPrinter // copy of chattyPrinter, if the chatty flag is set
+ bench bool // Current test is a benchmark.
+ runner string // function name of tRunner running the test
+ isParallel bool // Test is running in parallel.
+ inFuzzFn bool // Fuzz target, if this is one, is running.
+ inCleanup bool // Cleanup callbacks, if any, are running.
- parent *common
- level int // Nesting depth of test or benchmark.
- creator []uintptr // If level > 0, the stack trace at the point where the parent called t.Run.
- name string // Name of test or benchmark.
- start time.Time // Time test or benchmark started
- duration time.Duration
- barrier chan bool // To signal parallel subtests they may start. Nil when T.Parallel is not present (B) or not usable (when fuzzing).
- signal chan bool // To signal a test is done.
- sub []*T // Queue of subtests to be run in parallel.
+ parent *common
+ level int // nesting depth of test or benchmark
+ creator []uintptr // if level > 0, the stack trace at the point where the parent called t.Run
+ name string // name of test or benchmark
+ start time.Time // time test or benchmark started or resumed
+ duration time.Duration // time in the test function, excluding time blocked in t.Parallel
+ runParallel chan struct{} // Closed when parallel subtests may start. Nil when T.Parallel is not present (B) or not usable (when fuzzing).
+ doneOrParallel chan struct{} // Closed when the test is either blocked in Parallel or done running.
- lastRaceErrors atomic.Int64 // Max value of race.Errors seen during the test or its subtests.
+ hasSub atomic.Bool // Test or benchmark has subtests or sub-benchmarks.
+ parallelSubtests sync.WaitGroup
+ runMu sync.Mutex // Held during calls to Run to prevent the total number of active subtests from exceeding the parallelism limit.
+
+ lastRaceErrors atomic.Int64 // max value of race.Errors seen during the test or its subtests
raceErrorLogged atomic.Bool
tempDirMu sync.Mutex
@@ -931,13 +933,13 @@ func (c *common) Name() string {
return c.name
}
-func (c *common) setRan() {
+func (c *common) setRanLeaf() {
if c.parent != nil {
- c.parent.setRan()
+ c.parent.setRanLeaf()
}
c.mu.Lock()
defer c.mu.Unlock()
- c.ran = true
+ c.ranAnyLeaf = true
}
// Fail marks the function as having failed but continues execution.
@@ -952,6 +954,7 @@ func (c *common) Fail() {
panic("Fail in goroutine after " + c.name + " has completed")
}
c.failed = true
+ anyFailed.Store(true)
}
// Failed reports whether the function has failed.
@@ -1000,7 +1003,7 @@ func (c *common) FailNow() {
// a top-of-stack deferred function now, we know that the send
// only happens after any other stacked defers have completed.
c.mu.Lock()
- c.finished = true
+ c.goexiting = true
c.mu.Unlock()
runtime.Goexit()
}
@@ -1115,7 +1118,7 @@ func (c *common) SkipNow() {
c.checkFuzzFn("SkipNow")
c.mu.Lock()
c.skipped = true
- c.finished = true
+ c.goexiting = true
c.mu.Unlock()
runtime.Goexit()
}
@@ -1318,8 +1321,8 @@ const (
// If ph is recoverAndReturnPanic, it will catch panics, and return the
// recovered value if any.
func (c *common) runCleanup(ph panicHandling) (panicVal any) {
- c.cleanupStarted.Store(true)
- defer c.cleanupStarted.Store(false)
+ c.inCleanup = true
+ defer func() { c.inCleanup = false }()
if ph == recoverAndReturnPanic {
defer func() {
@@ -1446,8 +1449,7 @@ func (t *T) Parallel() {
if t.isEnvSet {
panic("testing: t.Parallel called after t.Setenv; cannot set environment variables in parallel tests")
}
- t.isParallel = true
- if t.parent.barrier == nil {
+ if t.parent.runParallel == nil {
// T.Parallel has no effect when fuzzing.
// Multiple processes may run in parallel, but only one input can run at a
// time per process so we can attribute crashes to specific inputs.
@@ -1460,7 +1462,7 @@ func (t *T) Parallel() {
t.duration += time.Since(t.start)
// Add to the list of tests to be released by the parent.
- t.parent.sub = append(t.parent.sub, t)
+ t.parent.parallelSubtests.Add(1)
// Report any races during execution of this test up to this point.
//
@@ -1479,9 +1481,19 @@ func (t *T) Parallel() {
}
running.Delete(t.name)
- t.signal <- true // Release calling test.
- <-t.parent.barrier // Wait for the parent test to complete.
- t.context.waitParallel()
+ t.isParallel = true
+
+ // Release the parent test to run. We can't just use parallelSem tokens for
+ // this because some callers (notably TestParallelSub) expect to be able to
+ // call Run from multiple goroutines and have those calls succeed.
+ //
+ // Instead, we close a channel to unblock the parent's call to Run, allowing
+ // it to resume. Then, we wait for it to finish and unblock its parallel
+ // subtests, and acquire a parallel run token so that we don't run too many of
+ // the subtests together all at once.
+ close(t.doneOrParallel)
+ <-t.parent.runParallel
+ t.context.acquireParallel()
if t.chatty != nil {
t.chatty.Updatef(t.name, "=== CONT %s\n", t.name)
@@ -1538,19 +1550,13 @@ var errNilPanicOrGoexit = errors.New("test executed panic(nil) or runtime.Goexit
func tRunner(t *T, fn func(t *T)) {
t.runner = callerName(0)
+ returned := false
// When this goroutine is done, either because fn(t)
// returned normally or because a test failure triggered
// a call to runtime.Goexit, record the duration and send
// a signal saying that the test is done.
defer func() {
- t.checkRaces()
-
- // TODO(#61034): This is the wrong place for this check.
- if t.Failed() {
- numFailed.Add(1)
- }
-
// Check if the test panicked or Goexited inappropriately.
//
// If this happens in a normal test, print output but continue panicking.
@@ -1559,132 +1565,120 @@ func tRunner(t *T, fn func(t *T)) {
// If this happens while fuzzing, recover from the panic and treat it like a
// normal failure. It's important that the process keeps running in order to
// find short inputs that cause panics.
- err := recover()
- signal := true
-
- t.mu.RLock()
- finished := t.finished
- t.mu.RUnlock()
- if !finished && err == nil {
- err = errNilPanicOrGoexit
+ panicVal := recover()
+ if !returned && !t.goexiting && panicVal == nil {
+ panicVal = errNilPanicOrGoexit
for p := t.parent; p != nil; p = p.parent {
p.mu.RLock()
- finished = p.finished
+ pGoexiting := p.goexiting
p.mu.RUnlock()
- if finished {
- if !t.isParallel {
- t.Errorf("%v: subtest may have called FailNow on a parent test", err)
- err = nil
- }
- signal = false
+ if pGoexiting {
+ t.Errorf("%v: subtest may have called FailNow on a parent test", panicVal)
+ panicVal = nil
break
}
}
}
- if err != nil && t.context.isFuzzing {
+ if panicVal != nil && t.context.isFuzzing {
prefix := "panic: "
- if err == errNilPanicOrGoexit {
+ if panicVal == errNilPanicOrGoexit {
prefix = ""
}
- t.Errorf("%s%s\n%s\n", prefix, err, string(debug.Stack()))
- t.mu.Lock()
- t.finished = true
- t.mu.Unlock()
- err = nil
+ t.Errorf("%s%s\n%s\n", prefix, panicVal, string(debug.Stack()))
+ panicVal = nil
+ }
+
+ if panicVal != nil {
+ // Mark the test as failed so that Cleanup functions can see its correct status.
+ t.Fail()
+ } else if t.runParallel != nil {
+ // Run parallel subtests.
+
+ // Check for races before starting parallel subtests, so that if a
+ // parallel subtest *also* triggers a data race we will report the two
+ // races to the two tests and not attribute all of them to the subtest.
+ t.checkRaces()
+
+ if t.isParallel {
+ // Release our own parallel token first, so that it is available for
+ // subtests to acquire.
+ t.context.releaseParallel()
+ }
+ close(t.runParallel)
+ t.parallelSubtests.Wait()
+ if t.isParallel {
+ // Re-acquire a parallel token to limit concurrent cleanup.
+ t.context.acquireParallel()
+ }
}
// Use a deferred call to ensure that we report that the test is
// complete even if a cleanup function calls t.FailNow. See issue 41355.
- didPanic := false
defer func() {
+ cleanupPanic := recover()
+ if panicVal == nil {
+ panicVal = cleanupPanic
+ }
+
// Only report that the test is complete if it doesn't panic,
// as otherwise the test binary can exit before the panic is
// reported to the user. See issue 41479.
- if didPanic {
- return
- }
- if err != nil {
- panic(err)
- }
- running.Delete(t.name)
- t.signal <- signal
- }()
+ if panicVal != nil {
+ // Flush the output log up to the root before dying.
+ for root := &t.common; root.parent != nil; root = root.parent {
+ root.mu.Lock()
+ root.duration += time.Since(root.start)
+ d := root.duration
+ root.mu.Unlock()
+ root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
- doPanic := func(err any) {
- t.Fail()
- if r := t.runCleanup(recoverAndReturnPanic); r != nil {
- t.Logf("cleanup panicked with %v", r)
- }
- // Flush the output log up to the root before dying.
- for root := &t.common; root.parent != nil; root = root.parent {
- root.mu.Lock()
- root.duration += time.Since(root.start)
- d := root.duration
- root.mu.Unlock()
- root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d))
- if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil {
- fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r)
+ // Since the parent will never finish running, do its cleanup now.
+ // Run the cleanup in a fresh goroutine in case it calls runtime.Goexit,
+ // which we cannot recover.
+ cleanupDone := make(chan struct{})
+ go func() {
+ defer close(cleanupDone)
+ if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil {
+ fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r)
+ }
+ }()
+ <-cleanupDone
}
+ panic(panicVal)
}
- didPanic = true
- panic(err)
- }
- if err != nil {
- doPanic(err)
- }
- t.duration += time.Since(t.start)
+ t.checkRaces()
+ t.duration += time.Since(t.start)
+ t.report()
- if len(t.sub) > 0 {
- // Run parallel subtests.
- // Decrease the running count for this test.
- t.context.release()
- // Release the parallel subtests.
- close(t.barrier)
- // Wait for subtests to complete.
- for _, sub := range t.sub {
- <-sub.signal
- }
- cleanupStart := time.Now()
- err := t.runCleanup(recoverAndReturnPanic)
- t.duration += time.Since(cleanupStart)
- if err != nil {
- doPanic(err)
+ // Do not lock t.done to allow race detector to detect race in case
+ // the user does not appropriately synchronize a goroutine.
+ t.done = true
+ if t.parent != nil && !t.hasSub.Load() {
+ t.setRanLeaf()
}
- t.checkRaces()
- if !t.isParallel {
- // Reacquire the count for sequential tests. See comment in Run.
- t.context.waitParallel()
+
+ running.Delete(t.name)
+ if t.isParallel {
+ t.context.releaseParallel()
+ t.parent.parallelSubtests.Done()
+ } else {
+ close(t.doneOrParallel)
}
- } else if t.isParallel {
- // Only release the count for this test if it was run as a parallel
- // test. See comment in Run method.
- t.context.release()
- }
- t.report() // Report after all subtests have finished.
+ }()
- // Do not lock t.done to allow race detector to detect race in case
- // the user does not appropriately synchronize a goroutine.
- t.done = true
- if t.parent != nil && !t.hasSub.Load() {
- t.setRan()
- }
- }()
- defer func() {
- if len(t.sub) == 0 {
- t.runCleanup(normalPanic)
- }
+ t.runCleanup(normalPanic)
}()
+ // Run the actual test function.
t.start = time.Now()
t.resetRaces()
fn(t)
- // code beyond here will not be executed when FailNow is invoked
- t.mu.Lock()
- t.finished = true
- t.mu.Unlock()
+ // Code beyond this point will not be executed when FailNow or SkipNow
+ // is invoked.
+ returned = true
}
// Run runs f as a subtest of t called name. It runs f in a separate goroutine
@@ -1694,7 +1688,7 @@ func tRunner(t *T, fn func(t *T)) {
// Run may be called simultaneously from multiple goroutines, but all such calls
// must return before the outer test function for t returns.
func (t *T) Run(name string, f func(t *T)) bool {
- if t.cleanupStarted.Load() {
+ if t.inCleanup {
panic("testing: t.Run called during t.Cleanup")
}
@@ -1708,40 +1702,56 @@ func (t *T) Run(name string, f func(t *T)) bool {
// continue walking the stack into the parent test.
var pc [maxStackLen]uintptr
n := runtime.Callers(2, pc[:])
- t = &T{
+ sub := &T{
common: common{
- barrier: make(chan bool),
- signal: make(chan bool, 1),
- name: testName,
- parent: &t.common,
- level: t.level + 1,
- creator: pc[:n],
- chatty: t.chatty,
+ runParallel: make(chan struct{}),
+ doneOrParallel: make(chan struct{}),
+ name: testName,
+ parent: &t.common,
+ level: t.level + 1,
+ creator: pc[:n],
+ chatty: t.chatty,
},
context: t.context,
}
- t.w = indenter{&t.common}
+ sub.w = indenter{&sub.common}
- if t.chatty != nil {
- t.chatty.Updatef(t.name, "=== RUN %s\n", t.name)
+ // Ensure that only one non-parallel subtest runs at a time so that we don't
+ // exceed the -parallel setting due to concurrent calls.
+ // (Run may be called concurrently even if the test is not marked parallel —
+ // see TestParallelSub.)
+ t.runMu.Lock()
+ defer t.runMu.Unlock()
+
+ if t.isParallel {
+ // Release our parallelism token for the subtest to use
+ // for its own parallel subtests.
+ t.context.releaseParallel()
+ defer t.context.acquireParallel()
}
- running.Store(t.name, time.Now())
+
+ if sub.chatty != nil {
+ sub.chatty.Updatef(sub.name, "=== RUN %s\n", sub.name)
+ }
+ running.Store(sub.name, time.Now())
// Instead of reducing the running count of this test before calling the
// tRunner and increasing it afterwards, we rely on tRunner keeping the
// count correct. This ensures that a sequence of sequential tests runs
// without being preempted, even when their parent is a parallel test. This
// may especially reduce surprises if *parallel == 1.
- go tRunner(t, f)
- if !<-t.signal {
- // At this point, it is likely that FailNow was called on one of the
- // parent tests by one of the subtests. Continue aborting up the chain.
+ go tRunner(sub, f)
+ <-sub.doneOrParallel
+ if t.goexiting {
+ // The parent test (t) thinks it called runtime.Goexit, but here we are
+ // still running. It is likely that this subtest called FailNow or SkipNow
+ // on the t instead of sub, so propagate the Goexit to the parent goroutine.
runtime.Goexit()
}
if t.chatty != nil && t.chatty.json {
- t.chatty.Updatef(t.parent.name, "=== NAME %s\n", t.parent.name)
+ t.chatty.Updatef(t.name, "=== NAME %s\n", t.name)
}
- return !t.failed
+ return !sub.failed
}
// Deadline reports the time at which the test binary will have
@@ -1765,53 +1775,43 @@ type testContext struct {
// does not match).
isFuzzing bool
- mu sync.Mutex
-
- // Channel used to signal tests that are ready to be run in parallel.
- startParallel chan bool
-
- // running is the number of tests currently running in parallel.
- // This does not include tests that are waiting for subtests to complete.
- running int
-
- // numWaiting is the number tests waiting to be run in parallel.
- numWaiting int
-
- // maxParallel is a copy of the parallel flag.
- maxParallel int
+ // parallelSem is a counting semaphore to limit concurrency of Parallel tests.
+ // It has a capacity equal to the parallel flag.
+ // Send a token to acquire; receive to release.
+ // Non-parallel tests do not require a token.
+ parallelSem chan token
}
+// A token is a semaphore token.
+type token struct{}
+
+// newTestContext returns a new testContext with the given parallelism and matcher.
func newTestContext(maxParallel int, m *matcher) *testContext {
- return &testContext{
- match: m,
- startParallel: make(chan bool),
- maxParallel: maxParallel,
- running: 1, // Set the count to 1 for the main (sequential) test.
+ tc := &testContext{
+ match: m,
+ parallelSem: make(chan token, maxParallel),
}
+ return tc
}
-func (c *testContext) waitParallel() {
- c.mu.Lock()
- if c.running < c.maxParallel {
- c.running++
- c.mu.Unlock()
- return
- }
- c.numWaiting++
- c.mu.Unlock()
- <-c.startParallel
+// acquireParallel blocks until it can obtain a semaphore token for running a
+// parallel test.
+func (c *testContext) acquireParallel() {
+ c.parallelSem <- token{}
}
-func (c *testContext) release() {
- c.mu.Lock()
- if c.numWaiting == 0 {
- c.running--
- c.mu.Unlock()
- return
+// releaseParallel returns a semaphore token obtained by acquireParallel.
+func (c *testContext) releaseParallel() {
+ select {
+ case <-c.parallelSem:
+ default:
+ panic("testing: internal error: no parallel token to release")
}
- c.numWaiting--
- c.mu.Unlock()
- c.startParallel <- true // Pick a waiting test to be run.
+}
+
+// running returns the number of semaphore tokens outstanding.
+func (c *testContext) running() int {
+ return len(c.parallelSem)
}
// No one should be using func Main anymore.
@@ -2132,9 +2132,9 @@ func runTests(matchString func(pat, str string) (bool, error), tests []InternalT
ctx.deadline = deadline
t := &T{
common: common{
- signal: make(chan bool, 1),
- barrier: make(chan bool),
- w: os.Stdout,
+ doneOrParallel: make(chan struct{}),
+ runParallel: make(chan struct{}),
+ w: os.Stdout,
},
context: ctx,
}
@@ -2147,12 +2147,12 @@ func runTests(matchString func(pat, str string) (bool, error), tests []InternalT
}
})
select {
- case <-t.signal:
+ case <-t.doneOrParallel:
default:
- panic("internal error: tRunner exited without sending on t.signal")
+ panic("internal error: tRunner exited without closing t.doneOrParallel")
}
ok = ok && !t.Failed()
- ran = ran || t.ran
+ ran = ran || t.ranAnyLeaf
}
}
return ran, ok
@@ -2390,5 +2390,5 @@ func parseCpuList() {
}
func shouldFailFast() bool {
- return *failFast && numFailed.Load() > 0
+ return *failFast && anyFailed.Load()
}