diff options
| author | Lasse Folger <lassefolger@google.com> | 2023-11-24 15:30:07 +0000 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2023-11-27 16:49:24 +0000 |
| commit | 6317fb41e920cc593620d4b9a55376472fc02c07 (patch) | |
| tree | 5008e5b12850909a494a2bc9e186fa78d85a620a /src/testing/testing.go | |
| parent | d961b12be9001cf8dbf8f52847607dbf84d94f8d (diff) | |
| download | go-6317fb41e920cc593620d4b9a55376472fc02c07.tar.xz | |
Revert "testing: simplify concurrency and cleanup logic"
reverts CL 506755
Reason for revert: This leads to deadlocks of tests in Google internal testing
For #64402
Change-Id: I78329fc9dcc2377e7e880b264ac1d18d577ef99a
Reviewed-on: https://go-review.googlesource.com/c/go/+/544895
Auto-Submit: Bryan Mills <bcmills@google.com>
Auto-Submit: Lasse Folger <lassefolger@google.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Bryan Mills <bcmills@google.com>
Diffstat (limited to 'src/testing/testing.go')
| -rw-r--r-- | src/testing/testing.go | 402 |
1 files changed, 201 insertions, 201 deletions
diff --git a/src/testing/testing.go b/src/testing/testing.go index 96f71c89b9..ed8b3630f1 100644 --- a/src/testing/testing.go +++ b/src/testing/testing.go @@ -480,7 +480,7 @@ var ( cpuList []int testlogFile *os.File - anyFailed atomic.Bool + numFailed atomic.Uint32 // number of test failures running sync.Map // map[string]time.Time of running, unpaused tests ) @@ -593,40 +593,38 @@ const maxStackLen = 50 // captures common methods such as Errorf. type common struct { mu sync.RWMutex // guards this group of fields - output []byte // output generated by test or benchmark - w io.Writer // output to which flushToParent should write - ranAnyLeaf bool // Test or benchmark ran to completion without calling Run itself, or had a subtest that did so. + output []byte // Output generated by test or benchmark. + w io.Writer // For flushToParent. + ran bool // Test or benchmark (or one of its subtests) was executed. failed bool // Test or benchmark has failed. skipped bool // Test or benchmark has been skipped. - goexiting bool // Test function is attempting to abort by goexit (implies failed || skipped). done bool // Test is finished and all subtests have completed. helperPCs map[uintptr]struct{} // functions to be skipped when writing file/line info helperNames map[string]struct{} // helperPCs converted to function names cleanups []func() // optional functions to be called at the end of the test - cleanupName string // name of the cleanup function currently running - cleanupPc []uintptr // stack trace at the point where Cleanup was called + cleanupName string // Name of the cleanup function. + cleanupPc []uintptr // The stack trace at the point where Cleanup was called. + finished bool // Test function has completed. + inFuzzFn bool // Whether the fuzz target, if this is one, is running. - chatty *chattyPrinter // copy of chattyPrinter, if the chatty flag is set - bench bool // Current test is a benchmark. - runner string // function name of tRunner running the test - isParallel bool // Test is running in parallel. - inFuzzFn bool // Fuzz target, if this is one, is running. - inCleanup bool // Cleanup callbacks, if any, are running. + chatty *chattyPrinter // A copy of chattyPrinter, if the chatty flag is set. + bench bool // Whether the current test is a benchmark. + hasSub atomic.Bool // whether there are sub-benchmarks. + cleanupStarted atomic.Bool // Registered cleanup callbacks have started to execute + runner string // Function name of tRunner running the test. + isParallel bool // Whether the test is parallel. - parent *common - level int // nesting depth of test or benchmark - creator []uintptr // if level > 0, the stack trace at the point where the parent called t.Run - name string // name of test or benchmark - start time.Time // time test or benchmark started or resumed - duration time.Duration // time in the test function, excluding time blocked in t.Parallel - runParallel chan struct{} // Closed when parallel subtests may start. Nil when T.Parallel is not present (B) or not usable (when fuzzing). - doneOrParallel chan struct{} // Closed when the test is either blocked in Parallel or done running. + parent *common + level int // Nesting depth of test or benchmark. + creator []uintptr // If level > 0, the stack trace at the point where the parent called t.Run. + name string // Name of test or benchmark. + start time.Time // Time test or benchmark started + duration time.Duration + barrier chan bool // To signal parallel subtests they may start. Nil when T.Parallel is not present (B) or not usable (when fuzzing). + signal chan bool // To signal a test is done. + sub []*T // Queue of subtests to be run in parallel. - hasSub atomic.Bool // Test or benchmark has subtests or sub-benchmarks. - parallelSubtests sync.WaitGroup - runMu sync.Mutex // Held during calls to Run to prevent the total number of active subtests from exceeding the parallelism limit. - - lastRaceErrors atomic.Int64 // max value of race.Errors seen during the test or its subtests + lastRaceErrors atomic.Int64 // Max value of race.Errors seen during the test or its subtests. raceErrorLogged atomic.Bool tempDirMu sync.Mutex @@ -933,13 +931,13 @@ func (c *common) Name() string { return c.name } -func (c *common) setRanLeaf() { +func (c *common) setRan() { if c.parent != nil { - c.parent.setRanLeaf() + c.parent.setRan() } c.mu.Lock() defer c.mu.Unlock() - c.ranAnyLeaf = true + c.ran = true } // Fail marks the function as having failed but continues execution. @@ -954,7 +952,6 @@ func (c *common) Fail() { panic("Fail in goroutine after " + c.name + " has completed") } c.failed = true - anyFailed.Store(true) } // Failed reports whether the function has failed. @@ -1003,7 +1000,7 @@ func (c *common) FailNow() { // a top-of-stack deferred function now, we know that the send // only happens after any other stacked defers have completed. c.mu.Lock() - c.goexiting = true + c.finished = true c.mu.Unlock() runtime.Goexit() } @@ -1118,7 +1115,7 @@ func (c *common) SkipNow() { c.checkFuzzFn("SkipNow") c.mu.Lock() c.skipped = true - c.goexiting = true + c.finished = true c.mu.Unlock() runtime.Goexit() } @@ -1321,8 +1318,8 @@ const ( // If ph is recoverAndReturnPanic, it will catch panics, and return the // recovered value if any. func (c *common) runCleanup(ph panicHandling) (panicVal any) { - c.inCleanup = true - defer func() { c.inCleanup = false }() + c.cleanupStarted.Store(true) + defer c.cleanupStarted.Store(false) if ph == recoverAndReturnPanic { defer func() { @@ -1449,7 +1446,8 @@ func (t *T) Parallel() { if t.isEnvSet { panic("testing: t.Parallel called after t.Setenv; cannot set environment variables in parallel tests") } - if t.parent.runParallel == nil { + t.isParallel = true + if t.parent.barrier == nil { // T.Parallel has no effect when fuzzing. // Multiple processes may run in parallel, but only one input can run at a // time per process so we can attribute crashes to specific inputs. @@ -1462,7 +1460,7 @@ func (t *T) Parallel() { t.duration += time.Since(t.start) // Add to the list of tests to be released by the parent. - t.parent.parallelSubtests.Add(1) + t.parent.sub = append(t.parent.sub, t) // Report any races during execution of this test up to this point. // @@ -1481,19 +1479,9 @@ func (t *T) Parallel() { } running.Delete(t.name) - t.isParallel = true - - // Release the parent test to run. We can't just use parallelSem tokens for - // this because some callers (notably TestParallelSub) expect to be able to - // call Run from multiple goroutines and have those calls succeed. - // - // Instead, we close a channel to unblock the parent's call to Run, allowing - // it to resume. Then, we wait for it to finish and unblock its parallel - // subtests, and acquire a parallel run token so that we don't run too many of - // the subtests together all at once. - close(t.doneOrParallel) - <-t.parent.runParallel - t.context.acquireParallel() + t.signal <- true // Release calling test. + <-t.parent.barrier // Wait for the parent test to complete. + t.context.waitParallel() if t.chatty != nil { t.chatty.Updatef(t.name, "=== CONT %s\n", t.name) @@ -1550,13 +1538,19 @@ var errNilPanicOrGoexit = errors.New("test executed panic(nil) or runtime.Goexit func tRunner(t *T, fn func(t *T)) { t.runner = callerName(0) - returned := false // When this goroutine is done, either because fn(t) // returned normally or because a test failure triggered // a call to runtime.Goexit, record the duration and send // a signal saying that the test is done. defer func() { + t.checkRaces() + + // TODO(#61034): This is the wrong place for this check. + if t.Failed() { + numFailed.Add(1) + } + // Check if the test panicked or Goexited inappropriately. // // If this happens in a normal test, print output but continue panicking. @@ -1565,120 +1559,132 @@ func tRunner(t *T, fn func(t *T)) { // If this happens while fuzzing, recover from the panic and treat it like a // normal failure. It's important that the process keeps running in order to // find short inputs that cause panics. - panicVal := recover() - if !returned && !t.goexiting && panicVal == nil { - panicVal = errNilPanicOrGoexit + err := recover() + signal := true + + t.mu.RLock() + finished := t.finished + t.mu.RUnlock() + if !finished && err == nil { + err = errNilPanicOrGoexit for p := t.parent; p != nil; p = p.parent { p.mu.RLock() - pGoexiting := p.goexiting + finished = p.finished p.mu.RUnlock() - if pGoexiting { - t.Errorf("%v: subtest may have called FailNow on a parent test", panicVal) - panicVal = nil + if finished { + if !t.isParallel { + t.Errorf("%v: subtest may have called FailNow on a parent test", err) + err = nil + } + signal = false break } } } - if panicVal != nil && t.context.isFuzzing { + if err != nil && t.context.isFuzzing { prefix := "panic: " - if panicVal == errNilPanicOrGoexit { + if err == errNilPanicOrGoexit { prefix = "" } - t.Errorf("%s%s\n%s\n", prefix, panicVal, string(debug.Stack())) - panicVal = nil - } - - if panicVal != nil { - // Mark the test as failed so that Cleanup functions can see its correct status. - t.Fail() - } else if t.runParallel != nil { - // Run parallel subtests. - - // Check for races before starting parallel subtests, so that if a - // parallel subtest *also* triggers a data race we will report the two - // races to the two tests and not attribute all of them to the subtest. - t.checkRaces() - - if t.isParallel { - // Release our own parallel token first, so that it is available for - // subtests to acquire. - t.context.releaseParallel() - } - close(t.runParallel) - t.parallelSubtests.Wait() - if t.isParallel { - // Re-acquire a parallel token to limit concurrent cleanup. - t.context.acquireParallel() - } + t.Errorf("%s%s\n%s\n", prefix, err, string(debug.Stack())) + t.mu.Lock() + t.finished = true + t.mu.Unlock() + err = nil } // Use a deferred call to ensure that we report that the test is // complete even if a cleanup function calls t.FailNow. See issue 41355. + didPanic := false defer func() { - cleanupPanic := recover() - if panicVal == nil { - panicVal = cleanupPanic - } - // Only report that the test is complete if it doesn't panic, // as otherwise the test binary can exit before the panic is // reported to the user. See issue 41479. - if panicVal != nil { - // Flush the output log up to the root before dying. - for root := &t.common; root.parent != nil; root = root.parent { - root.mu.Lock() - root.duration += time.Since(root.start) - d := root.duration - root.mu.Unlock() - root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d)) + if didPanic { + return + } + if err != nil { + panic(err) + } + running.Delete(t.name) + t.signal <- signal + }() - // Since the parent will never finish running, do its cleanup now. - // Run the cleanup in a fresh goroutine in case it calls runtime.Goexit, - // which we cannot recover. - cleanupDone := make(chan struct{}) - go func() { - defer close(cleanupDone) - if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil { - fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r) - } - }() - <-cleanupDone + doPanic := func(err any) { + t.Fail() + if r := t.runCleanup(recoverAndReturnPanic); r != nil { + t.Logf("cleanup panicked with %v", r) + } + // Flush the output log up to the root before dying. + for root := &t.common; root.parent != nil; root = root.parent { + root.mu.Lock() + root.duration += time.Since(root.start) + d := root.duration + root.mu.Unlock() + root.flushToParent(root.name, "--- FAIL: %s (%s)\n", root.name, fmtDuration(d)) + if r := root.parent.runCleanup(recoverAndReturnPanic); r != nil { + fmt.Fprintf(root.parent.w, "cleanup panicked with %v", r) } - panic(panicVal) } + didPanic = true + panic(err) + } + if err != nil { + doPanic(err) + } - t.checkRaces() - t.duration += time.Since(t.start) - t.report() + t.duration += time.Since(t.start) - // Do not lock t.done to allow race detector to detect race in case - // the user does not appropriately synchronize a goroutine. - t.done = true - if t.parent != nil && !t.hasSub.Load() { - t.setRanLeaf() + if len(t.sub) > 0 { + // Run parallel subtests. + // Decrease the running count for this test. + t.context.release() + // Release the parallel subtests. + close(t.barrier) + // Wait for subtests to complete. + for _, sub := range t.sub { + <-sub.signal } - - running.Delete(t.name) - if t.isParallel { - t.context.releaseParallel() - t.parent.parallelSubtests.Done() - } else { - close(t.doneOrParallel) + cleanupStart := time.Now() + err := t.runCleanup(recoverAndReturnPanic) + t.duration += time.Since(cleanupStart) + if err != nil { + doPanic(err) } - }() + t.checkRaces() + if !t.isParallel { + // Reacquire the count for sequential tests. See comment in Run. + t.context.waitParallel() + } + } else if t.isParallel { + // Only release the count for this test if it was run as a parallel + // test. See comment in Run method. + t.context.release() + } + t.report() // Report after all subtests have finished. - t.runCleanup(normalPanic) + // Do not lock t.done to allow race detector to detect race in case + // the user does not appropriately synchronize a goroutine. + t.done = true + if t.parent != nil && !t.hasSub.Load() { + t.setRan() + } + }() + defer func() { + if len(t.sub) == 0 { + t.runCleanup(normalPanic) + } }() - // Run the actual test function. t.start = time.Now() t.resetRaces() fn(t) - // Code beyond this point will not be executed when FailNow or SkipNow - // is invoked. - returned = true + // code beyond here will not be executed when FailNow is invoked + t.mu.Lock() + t.finished = true + t.mu.Unlock() } // Run runs f as a subtest of t called name. It runs f in a separate goroutine @@ -1688,7 +1694,7 @@ func tRunner(t *T, fn func(t *T)) { // Run may be called simultaneously from multiple goroutines, but all such calls // must return before the outer test function for t returns. func (t *T) Run(name string, f func(t *T)) bool { - if t.inCleanup { + if t.cleanupStarted.Load() { panic("testing: t.Run called during t.Cleanup") } @@ -1702,56 +1708,40 @@ func (t *T) Run(name string, f func(t *T)) bool { // continue walking the stack into the parent test. var pc [maxStackLen]uintptr n := runtime.Callers(2, pc[:]) - sub := &T{ + t = &T{ common: common{ - runParallel: make(chan struct{}), - doneOrParallel: make(chan struct{}), - name: testName, - parent: &t.common, - level: t.level + 1, - creator: pc[:n], - chatty: t.chatty, + barrier: make(chan bool), + signal: make(chan bool, 1), + name: testName, + parent: &t.common, + level: t.level + 1, + creator: pc[:n], + chatty: t.chatty, }, context: t.context, } - sub.w = indenter{&sub.common} - - // Ensure that only one non-parallel subtest runs at a time so that we don't - // exceed the -parallel setting due to concurrent calls. - // (Run may be called concurrently even if the test is not marked parallel — - // see TestParallelSub.) - t.runMu.Lock() - defer t.runMu.Unlock() - - if t.isParallel { - // Release our parallelism token for the subtest to use - // for its own parallel subtests. - t.context.releaseParallel() - defer t.context.acquireParallel() - } + t.w = indenter{&t.common} - if sub.chatty != nil { - sub.chatty.Updatef(sub.name, "=== RUN %s\n", sub.name) + if t.chatty != nil { + t.chatty.Updatef(t.name, "=== RUN %s\n", t.name) } - running.Store(sub.name, time.Now()) + running.Store(t.name, time.Now()) // Instead of reducing the running count of this test before calling the // tRunner and increasing it afterwards, we rely on tRunner keeping the // count correct. This ensures that a sequence of sequential tests runs // without being preempted, even when their parent is a parallel test. This // may especially reduce surprises if *parallel == 1. - go tRunner(sub, f) - <-sub.doneOrParallel - if t.goexiting { - // The parent test (t) thinks it called runtime.Goexit, but here we are - // still running. It is likely that this subtest called FailNow or SkipNow - // on the t instead of sub, so propagate the Goexit to the parent goroutine. + go tRunner(t, f) + if !<-t.signal { + // At this point, it is likely that FailNow was called on one of the + // parent tests by one of the subtests. Continue aborting up the chain. runtime.Goexit() } if t.chatty != nil && t.chatty.json { - t.chatty.Updatef(t.name, "=== NAME %s\n", t.name) + t.chatty.Updatef(t.parent.name, "=== NAME %s\n", t.parent.name) } - return !sub.failed + return !t.failed } // Deadline reports the time at which the test binary will have @@ -1775,43 +1765,53 @@ type testContext struct { // does not match). isFuzzing bool - // parallelSem is a counting semaphore to limit concurrency of Parallel tests. - // It has a capacity equal to the parallel flag. - // Send a token to acquire; receive to release. - // Non-parallel tests do not require a token. - parallelSem chan token -} + mu sync.Mutex -// A token is a semaphore token. -type token struct{} + // Channel used to signal tests that are ready to be run in parallel. + startParallel chan bool -// newTestContext returns a new testContext with the given parallelism and matcher. -func newTestContext(maxParallel int, m *matcher) *testContext { - tc := &testContext{ - match: m, - parallelSem: make(chan token, maxParallel), - } - return tc + // running is the number of tests currently running in parallel. + // This does not include tests that are waiting for subtests to complete. + running int + + // numWaiting is the number tests waiting to be run in parallel. + numWaiting int + + // maxParallel is a copy of the parallel flag. + maxParallel int } -// acquireParallel blocks until it can obtain a semaphore token for running a -// parallel test. -func (c *testContext) acquireParallel() { - c.parallelSem <- token{} +func newTestContext(maxParallel int, m *matcher) *testContext { + return &testContext{ + match: m, + startParallel: make(chan bool), + maxParallel: maxParallel, + running: 1, // Set the count to 1 for the main (sequential) test. + } } -// releaseParallel returns a semaphore token obtained by acquireParallel. -func (c *testContext) releaseParallel() { - select { - case <-c.parallelSem: - default: - panic("testing: internal error: no parallel token to release") +func (c *testContext) waitParallel() { + c.mu.Lock() + if c.running < c.maxParallel { + c.running++ + c.mu.Unlock() + return } + c.numWaiting++ + c.mu.Unlock() + <-c.startParallel } -// running returns the number of semaphore tokens outstanding. -func (c *testContext) running() int { - return len(c.parallelSem) +func (c *testContext) release() { + c.mu.Lock() + if c.numWaiting == 0 { + c.running-- + c.mu.Unlock() + return + } + c.numWaiting-- + c.mu.Unlock() + c.startParallel <- true // Pick a waiting test to be run. } // No one should be using func Main anymore. @@ -2132,9 +2132,9 @@ func runTests(matchString func(pat, str string) (bool, error), tests []InternalT ctx.deadline = deadline t := &T{ common: common{ - doneOrParallel: make(chan struct{}), - runParallel: make(chan struct{}), - w: os.Stdout, + signal: make(chan bool, 1), + barrier: make(chan bool), + w: os.Stdout, }, context: ctx, } @@ -2147,12 +2147,12 @@ func runTests(matchString func(pat, str string) (bool, error), tests []InternalT } }) select { - case <-t.doneOrParallel: + case <-t.signal: default: - panic("internal error: tRunner exited without closing t.doneOrParallel") + panic("internal error: tRunner exited without sending on t.signal") } ok = ok && !t.Failed() - ran = ran || t.ranAnyLeaf + ran = ran || t.ran } } return ran, ok @@ -2390,5 +2390,5 @@ func parseCpuList() { } func shouldFailFast() bool { - return *failFast && anyFailed.Load() + return *failFast && numFailed.Load() > 0 } |
