runtime/debug: SetCrashOutput sets the FD for fatal panics

This feature makes it possible to record unhandled panics in any goroutine through a watchdog process (e.g. the same application forked+exec'd as a child in a special mode) that can process the panic report, for example by sending it to a crash-reporting system such as Go telemetry or Sentry. Fixes #42888 Change-Id: I5aa7be8f726bbc70fc650540bd1a14ab60c62ecb Reviewed-on: https://go-review.googlesource.com/c/go/+/547978 Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Alan Donovan <adonovan@google.com> Reviewed-by: Russ Cox <rsc@golang.org>
author: Alan Donovan <adonovan@google.com> 2023-12-07 18:02:40 -0500
committer: Gopher Robot <gobot@golang.org> 2024-01-31 16:50:42 +0000
commit: 1bb947b2eb62f54dd7ff621f7e1c768c00edd1a7 (patch)
tree: 58f821389f1a025f8106b05856b942aa2fc4fa94 /src/runtime/debug
parent: 13766fe7d89c44bb45cfb33591b137553bda7c7d (diff)
download: go-1bb947b2eb62f54dd7ff621f7e1c768c00edd1a7.tar.xz
2 files changed, 131 insertions, 2 deletions
diff --git a/src/runtime/debug/stack.go b/src/runtime/debug/stack.go
index 3999840d3c..508afe1f97 100644
--- a/src/runtime/debug/stack.go
+++ b/src/runtime/debug/stack.go
@@ -7,8 +7,10 @@
 package debug
 
 import (
+	"internal/poll"
 	"os"
 	"runtime"
+	_ "unsafe" // for linkname
 )
 
 // PrintStack prints to standard error the stack trace returned by runtime.Stack.
@@ -28,3 +30,54 @@ func Stack() []byte {
 		buf = make([]byte, 2*len(buf))
 	}
 }
+
+// SetCrashOutput configures a single additional file where unhandled
+// panics and other fatal errors are printed, in addition to standard error.
+// There is only one additional file: calling SetCrashOutput again
+// overrides any earlier call; it does not close the previous file.
+// SetCrashOutput(nil) disables the use of any additional file.
+func SetCrashOutput(f *os.File) error {
+	fd := ^uintptr(0)
+	if f != nil {
+		// The runtime will write to this file descriptor from
+		// low-level routines during a panic, possibly without
+		// a G, so we must call f.Fd() eagerly. This creates a
+		// danger that that the file descriptor is no longer
+		// valid at the time of the write, because the caller
+		// (incorrectly) called f.Close() and the kernel
+		// reissued the fd in a later call to open(2), leading
+		// to crashes being written to the wrong file.
+		//
+		// So, we duplicate the fd to obtain a private one
+		// that cannot be closed by the user.
+		// This also alleviates us from concerns about the
+		// lifetime and finalization of f.
+		// (DupCloseOnExec returns an fd, not a *File, so
+		// there is no finalizer, and we are responsible for
+		// closing it.)
+		//
+		// The new fd must be close-on-exec, otherwise if the
+		// crash monitor is a child process, it may inherit
+		// it, so it will never see EOF from the pipe even
+		// when this process crashes.
+		//
+		// A side effect of Fd() is that it calls SetBlocking,
+		// which is important so that writes of a crash report
+		// to a full pipe buffer don't get lost.
+		fd2, _, err := poll.DupCloseOnExec(int(f.Fd()))
+		if err != nil {
+			return err
+		}
+		runtime.KeepAlive(f) // prevent finalization before dup
+		fd = uintptr(fd2)
+	}
+	if prev := runtime_setCrashFD(fd); prev != ^uintptr(0) {
+		// We use NewFile+Close because it is portable
+		// unlike syscall.Close, whose parameter type varies.
+		os.NewFile(prev, "").Close() // ignore error
+	}
+	return nil
+}
+
+//go:linkname runtime_setCrashFD runtime.setCrashFD
+func runtime_setCrashFD(uintptr) uintptr
diff --git a/src/runtime/debug/stack_test.go b/src/runtime/debug/stack_test.go
index 671057c3a0..289749ccb4 100644
--- a/src/runtime/debug/stack_test.go
+++ b/src/runtime/debug/stack_test.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"fmt"
 	"internal/testenv"
+	"log"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -18,10 +19,24 @@ import (
 )
 
 func TestMain(m *testing.M) {
-	if os.Getenv("GO_RUNTIME_DEBUG_TEST_DUMP_GOROOT") != "" {
+	switch os.Getenv("GO_RUNTIME_DEBUG_TEST_ENTRYPOINT") {
+	case "dumpgoroot":
 		fmt.Println(runtime.GOROOT())
 		os.Exit(0)
+
+	case "setcrashoutput":
+		f, err := os.Create(os.Getenv("CRASHOUTPUT"))
+		if err != nil {
+			log.Fatal(err)
+		}
+		if err := SetCrashOutput(f); err != nil {
+			log.Fatal(err) // e.g. EMFILE
+		}
+		println("hello")
+		panic("oops")
 	}
+
+	// default: run the tests.
 	os.Exit(m.Run())
 }
 
@@ -77,7 +92,7 @@ func TestStack(t *testing.T) {
 			t.Fatal(err)
 		}
 		cmd := exec.Command(exe)
-		cmd.Env = append(os.Environ(), "GOROOT=", "GO_RUNTIME_DEBUG_TEST_DUMP_GOROOT=1")
+		cmd.Env = append(os.Environ(), "GOROOT=", "GO_RUNTIME_DEBUG_TEST_ENTRYPOINT=dumpgoroot")
 		out, err := cmd.Output()
 		if err != nil {
 			t.Fatal(err)
@@ -119,3 +134,64 @@ func TestStack(t *testing.T) {
 	frame("runtime/debug/stack_test.go", "runtime/debug_test.TestStack")
 	frame("testing/testing.go", "")
 }
+
+func TestSetCrashOutput(t *testing.T) {
+	testenv.MustHaveExec(t)
+	exe, err := os.Executable()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	crashOutput := filepath.Join(t.TempDir(), "crash.out")
+
+	cmd := exec.Command(exe)
+	cmd.Stderr = new(strings.Builder)
+	cmd.Env = append(os.Environ(), "GO_RUNTIME_DEBUG_TEST_ENTRYPOINT=setcrashoutput", "CRASHOUTPUT="+crashOutput)
+	err = cmd.Run()
+	stderr := fmt.Sprint(cmd.Stderr)
+	if err == nil {
+		t.Fatalf("child process succeeded unexpectedly (stderr: %s)", stderr)
+	}
+	t.Logf("child process finished with error %v and stderr <<%s>>", err, stderr)
+
+	// Read the file the child process should have written.
+	// It should contain a crash report such as this:
+	//
+	// panic: oops
+	//
+	// goroutine 1 [running]:
+	// runtime/debug_test.TestMain(0x1400007e0a0)
+	// 	GOROOT/src/runtime/debug/stack_test.go:33 +0x18c
+	// main.main()
+	// 	_testmain.go:71 +0x170
+	data, err := os.ReadFile(crashOutput)
+	if err != nil {
+		t.Fatalf("child process failed to write crash report: %v", err)
+	}
+	crash := string(data)
+	t.Logf("crash = <<%s>>", crash)
+	t.Logf("stderr = <<%s>>", stderr)
+
+	// Check that the crash file and the stderr both contain the panic and stack trace.
+	for _, want := range []string{
+		"panic: oops",
+		"goroutine 1",
+		"debug_test.TestMain",
+	} {
+		if !strings.Contains(crash, want) {
+			t.Errorf("crash output does not contain %q", want)
+		}
+		if !strings.Contains(stderr, want) {
+			t.Errorf("stderr output does not contain %q", want)
+		}
+	}
+
+	// Check that stderr, but not crash, contains the output of println().
+	printlnOnly := "hello"
+	if strings.Contains(crash, printlnOnly) {
+		t.Errorf("crash output contains %q, but should not", printlnOnly)
+	}
+	if !strings.Contains(stderr, printlnOnly) {
+		t.Errorf("stderr output does not contain %q, but should", printlnOnly)
+	}
+}
author	Alan Donovan <adonovan@google.com>	2023-12-07 18:02:40 -0500
committer	Gopher Robot <gobot@golang.org>	2024-01-31 16:50:42 +0000
commit	1bb947b2eb62f54dd7ff621f7e1c768c00edd1a7 (patch)
tree	58f821389f1a025f8106b05856b942aa2fc4fa94 /src/runtime/debug
parent	13766fe7d89c44bb45cfb33591b137553bda7c7d (diff)
download	go-1bb947b2eb62f54dd7ff621f7e1c768c00edd1a7.tar.xz