runtime/trace: record event sequence numbers explicitly

Nearly all the flaky failures we've seen in trace tests have been due to the use of time stamps to determine relative event ordering. This is tricky for many reasons, including: - different cores might not have exactly synchronized clocks - VMs are worse than real hardware - non-x86 chips have different timer resolution than x86 chips - on fast systems two events can end up with the same time stamp Stop trying to make time reliable. It's clearly not going to be for Go 1.5. Instead, record an explicit event sequence number for ordering. Using our own counter solves all of the above problems. The trace still contains time stamps, of course. The sequence number is just used for ordering. Should alleviate #10554 somewhat. Then tickDiv can be chosen to be a useful time unit instead of having to be exact for ordering. Separating ordering and time stamps lets the trace parser diagnose systems where the time stamp order and actual order do not match for one reason or another. This CL adds that check to the end of trace.Parse, after all other sequence order-based checking. If that error is found, we skip the test instead of failing it. Putting the check in trace.Parse means that cmd/trace will pick up the same check, refusing to display a trace where the time stamps do not match actual ordering. Using net/http's BenchmarkClientServerParallel4 on various CPU counts, not tracing vs tracing: name old time/op new time/op delta ClientServerParallel4 50.4µs ± 4% 80.2µs ± 4% +59.06% (p=0.000 n=10+10) ClientServerParallel4-2 33.1µs ± 7% 57.8µs ± 5% +74.53% (p=0.000 n=10+10) ClientServerParallel4-4 18.5µs ± 4% 32.6µs ± 3% +75.77% (p=0.000 n=10+10) ClientServerParallel4-6 12.9µs ± 5% 24.4µs ± 2% +89.33% (p=0.000 n=10+10) ClientServerParallel4-8 11.4µs ± 6% 21.0µs ± 3% +83.40% (p=0.000 n=10+10) ClientServerParallel4-12 14.4µs ± 4% 23.8µs ± 4% +65.67% (p=0.000 n=10+10) Fixes #10512. Change-Id: I173eecf8191e86feefd728a5aad25bf1bc094b12 Reviewed-on: https://go-review.googlesource.com/12579 Reviewed-by: Austin Clements <austin@google.com>
author: Russ Cox <rsc@golang.org> 2015-07-23 14:01:03 -0400
committer: Russ Cox <rsc@golang.org> 2015-07-29 22:32:14 +0000
commit: 80c98fa901b2f393ef013ec9074630c948e3f8d4 (patch)
tree: 9c7cafa18a7b2344a6041c7cc9b2b80fc663255d /src/runtime
parent: fde392623a18ecedd7434db1cf40e82bdd482df9 (diff)
download: go-80c98fa901b2f393ef013ec9074630c948e3f8d4.tar.xz
5 files changed, 65 insertions, 26 deletions
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 788f4fd3b4..23beaf537c 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -1348,7 +1348,7 @@ func execute(gp *g, inheritTime bool) {
 		// GoSysExit has to happen when we have a P, but before GoStart.
 		// So we emit it here.
 		if gp.syscallsp != 0 && gp.sysblocktraced {
-			traceGoSysExit(gp.sysexitticks)
+			traceGoSysExit(gp.sysexitseq, gp.sysexitticks)
 		}
 		traceGoStart()
 	}
@@ -1986,6 +1986,7 @@ func exitsyscall(dummy int32) {
 	}
 
 	_g_.sysexitticks = 0
+	_g_.sysexitseq = 0
 	if trace.enabled {
 		// Wait till traceGoSysBlock event is emitted.
 		// This ensures consistency of the trace (the goroutine is started after it is blocked).
@@ -1996,7 +1997,7 @@ func exitsyscall(dummy int32) {
 		// Tracing code can invoke write barriers that cannot run without a P.
 		// So instead we remember the syscall exit time and emit the event
 		// in execute when we have a P.
-		_g_.sysexitticks = cputicks()
+		_g_.sysexitseq, _g_.sysexitticks = tracestamp()
 	}
 
 	_g_.m.locks--
@@ -2044,7 +2045,7 @@ func exitsyscallfast() bool {
 					// Denote blocking of the new syscall.
 					traceGoSysBlock(_g_.m.p.ptr())
 					// Denote completion of the current syscall.
-					traceGoSysExit(0)
+					traceGoSysExit(tracestamp())
 				})
 			}
 			_g_.m.p.ptr().syscalltick++
@@ -2068,7 +2069,7 @@ func exitsyscallfast() bool {
 						osyield()
 					}
 				}
-				traceGoSysExit(0)
+				traceGoSysExit(tracestamp())
 			}
 		})
 		if ok {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index dc600ae578..a1c790fa85 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -235,16 +235,17 @@ type g struct {
 	waitsince      int64  // approx time when the g become blocked
 	waitreason     string // if status==Gwaiting
 	schedlink      guintptr
-	preempt        bool  // preemption signal, duplicates stackguard0 = stackpreempt
-	paniconfault   bool  // panic (instead of crash) on unexpected fault address
-	preemptscan    bool  // preempted g does scan for gc
-	gcscandone     bool  // g has scanned stack; protected by _Gscan bit in status
-	gcscanvalid    bool  // false at start of gc cycle, true if G has not run since last scan
-	throwsplit     bool  // must not split stack
-	raceignore     int8  // ignore race detection events
-	sysblocktraced bool  // StartTrace has emitted EvGoInSyscall about this goroutine
-	sysexitticks   int64 // cputicks when syscall has returned (for tracing)
-	m              *m    // for debuggers, but offset not hard-coded
+	preempt        bool   // preemption signal, duplicates stackguard0 = stackpreempt
+	paniconfault   bool   // panic (instead of crash) on unexpected fault address
+	preemptscan    bool   // preempted g does scan for gc
+	gcscandone     bool   // g has scanned stack; protected by _Gscan bit in status
+	gcscanvalid    bool   // false at start of gc cycle, true if G has not run since last scan
+	throwsplit     bool   // must not split stack
+	raceignore     int8   // ignore race detection events
+	sysblocktraced bool   // StartTrace has emitted EvGoInSyscall about this goroutine
+	sysexitticks   int64  // cputicks when syscall has returned (for tracing)
+	sysexitseq     uint64 // trace seq when syscall has returned (for tracing)
+	m              *m     // for debuggers, but offset not hard-coded
 	lockedm        *m
 	sig            uint32
 	writebuf       []byte
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 8cd95893a0..c81846221a 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -95,6 +95,7 @@ var trace struct {
 	headerWritten bool      // whether ReadTrace has emitted trace header
 	footerWritten bool      // whether ReadTrace has emitted trace footer
 	shutdownSema  uint32    // used to wait for ReadTrace completion
+	seqStart      uint64    // sequence number when tracing was started
 	ticksStart    int64     // cputicks when tracing was started
 	ticksEnd      int64     // cputicks when tracing was stopped
 	timeStart     int64     // nanotime when tracing was started
@@ -110,9 +111,31 @@ var trace struct {
 	buf     *traceBuf // global trace buffer, used when running without a p
 }
 
+var traceseq uint64 // global trace sequence number
+
+// tracestamp returns a consistent sequence number, time stamp pair
+// for use in a trace. We need to make sure that time stamp ordering
+// (assuming synchronized CPUs) and sequence ordering match.
+// To do that, we increment traceseq, grab ticks, and increment traceseq again.
+// We treat odd traceseq as a sign that another thread is in the middle
+// of the sequence and spin until it is done.
+// Not splitting stack to avoid preemption, just in case the call sites
+// that used to call xadd64 and cputicks are sensitive to that.
+//go:nosplit
+func tracestamp() (seq uint64, ts int64) {
+	seq = atomicload64(&traceseq)
+	for seq&1 != 0 || !cas64(&traceseq, seq, seq+1) {
+		seq = atomicload64(&traceseq)
+	}
+	ts = cputicks()
+	atomicstore64(&traceseq, seq+2)
+	return seq >> 1, ts
+}
+
 // traceBufHeader is per-P tracing buffer.
 type traceBufHeader struct {
 	link      *traceBuf               // in trace.empty/full
+	lastSeq   uint64                  // sequence number of last event
 	lastTicks uint64                  // when we wrote the last event
 	buf       []byte                  // trace data, always points to traceBuf.arr
 	stk       [traceStackSize]uintptr // scratch buffer for traceback
@@ -147,7 +170,7 @@ func StartTrace() error {
 		return errorString("tracing is already enabled")
 	}
 
-	trace.ticksStart = cputicks()
+	trace.seqStart, trace.ticksStart = tracestamp()
 	trace.timeStart = nanotime()
 	trace.headerWritten = false
 	trace.footerWritten = false
@@ -308,7 +331,7 @@ func ReadTrace() []byte {
 		trace.headerWritten = true
 		trace.lockOwner = nil
 		unlock(&trace.lock)
-		return []byte("gotrace\x00")
+		return []byte("go 1.5 trace\x00\x00\x00\x00")
 	}
 	// Wait for new data.
 	if trace.fullHead == nil && !trace.shutdown {
@@ -334,9 +357,11 @@ func ReadTrace() []byte {
 		var data []byte
 		data = append(data, traceEvFrequency|0<<traceArgCountShift)
 		data = traceAppend(data, uint64(freq))
+		data = traceAppend(data, 0)
 		if timers.gp != nil {
 			data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
 			data = traceAppend(data, uint64(timers.gp.goid))
+			data = traceAppend(data, 0)
 		}
 		return data
 	}
@@ -435,22 +460,27 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		return
 	}
 	buf := *bufp
-	const maxSize = 2 + 4*traceBytesPerNumber // event type, length, timestamp, stack id and two add params
+	const maxSize = 2 + 5*traceBytesPerNumber // event type, length, sequence, timestamp, stack id and two add params
 	if buf == nil || cap(buf.buf)-len(buf.buf) < maxSize {
 		buf = traceFlush(buf)
 		*bufp = buf
 	}
 
-	ticks := uint64(cputicks()) / traceTickDiv
+	seq, ticksraw := tracestamp()
+	seqDiff := seq - buf.lastSeq
+	ticks := uint64(ticksraw) / traceTickDiv
 	tickDiff := ticks - buf.lastTicks
 	if len(buf.buf) == 0 {
 		data := buf.buf
 		data = append(data, traceEvBatch|1<<traceArgCountShift)
 		data = traceAppend(data, uint64(pid))
+		data = traceAppend(data, seq)
 		data = traceAppend(data, ticks)
 		buf.buf = data
+		seqDiff = 0
 		tickDiff = 0
 	}
+	buf.lastSeq = seq
 	buf.lastTicks = ticks
 	narg := byte(len(args))
 	if skip >= 0 {
@@ -469,6 +499,7 @@ func traceEvent(ev byte, skip int, args ...uint64) {
 		data = append(data, 0)
 		lenp = &data[len(data)-1]
 	}
+	data = traceAppend(data, seqDiff)
 	data = traceAppend(data, tickDiff)
 	for _, a := range args {
 		data = traceAppend(data, a)
@@ -800,12 +831,12 @@ func traceGoSysCall() {
 	traceEvent(traceEvGoSysCall, 4)
 }
 
-func traceGoSysExit(ts int64) {
-	if ts != 0 && ts < trace.ticksStart {
+func traceGoSysExit(seq uint64, ts int64) {
+	if int64(seq)-int64(trace.seqStart) < 0 {
 		// The timestamp was obtained during a previous tracing session, ignore.
 		return
 	}
-	traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), uint64(ts)/traceTickDiv)
+	traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
 }
 
 func traceGoSysBlock(pp *p) {
diff --git a/src/runtime/trace/trace_stack_test.go b/src/runtime/trace/trace_stack_test.go
index 1fca454ed0..58139ba144 100644
--- a/src/runtime/trace/trace_stack_test.go
+++ b/src/runtime/trace/trace_stack_test.go
@@ -125,7 +125,7 @@ func TestTraceSymbolize(t *testing.T) {
 	<-pipeReadDone
 
 	Stop()
-	events, _, err := parseTrace(buf)
+	events, _, err := parseTrace(t, buf)
 	if err != nil {
 		t.Fatalf("failed to parse trace: %v", err)
 	}
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go
index e987564d8b..76d3d44ae7 100644
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -75,13 +75,19 @@ func TestTrace(t *testing.T) {
 	}
 	Stop()
 	_, err := trace.Parse(buf)
+	if err == trace.ErrTimeOrder {
+		t.Skipf("skipping trace: %v", err)
+	}
 	if err != nil {
 		t.Fatalf("failed to parse trace: %v", err)
 	}
 }
 
-func parseTrace(r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
+func parseTrace(t *testing.T, r io.Reader) ([]*trace.Event, map[uint64]*trace.GDesc, error) {
 	events, err := trace.Parse(r)
+	if err == trace.ErrTimeOrder {
+		t.Skipf("skipping trace: %v", err)
+	}
 	if err != nil {
 		return nil, nil, err
 	}
@@ -221,7 +227,7 @@ func TestTraceStress(t *testing.T) {
 	runtime.GOMAXPROCS(procs)
 
 	Stop()
-	_, _, err = parseTrace(buf)
+	_, _, err = parseTrace(t, buf)
 	if err != nil {
 		t.Fatalf("failed to parse trace: %v", err)
 	}
@@ -361,7 +367,7 @@ func TestTraceStressStartStop(t *testing.T) {
 		}
 		time.Sleep(time.Millisecond)
 		Stop()
-		if _, _, err := parseTrace(buf); err != nil {
+		if _, _, err := parseTrace(t, buf); err != nil {
 			t.Fatalf("failed to parse trace: %v", err)
 		}
 	}
@@ -428,7 +434,7 @@ func TestTraceFutileWakeup(t *testing.T) {
 	done.Wait()
 
 	Stop()
-	events, _, err := parseTrace(buf)
+	events, _, err := parseTrace(t, buf)
 	if err != nil {
 		t.Fatalf("failed to parse trace: %v", err)
 	}
author	Russ Cox <rsc@golang.org>	2015-07-23 14:01:03 -0400
committer	Russ Cox <rsc@golang.org>	2015-07-29 22:32:14 +0000
commit	80c98fa901b2f393ef013ec9074630c948e3f8d4 (patch)
tree	9c7cafa18a7b2344a6041c7cc9b2b80fc663255d /src/runtime
parent	fde392623a18ecedd7434db1cf40e82bdd482df9 (diff)
download	go-80c98fa901b2f393ef013ec9074630c948e3f8d4.tar.xz