From 515e6a9b12dfe654c86cfd070ee5d6ac144fe116 Mon Sep 17 00:00:00 2001 From: Alex Brainman Date: Sun, 19 Jul 2020 16:06:48 +1000 Subject: runtime: use CreateWaitableTimerEx to implement usleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @jstarks suggested that recent versions of Windows provide access to high resolution timers. See https://github.com/golang/go/issues/8687#issuecomment-656259353 for details. I tried to run this C program on my Windows 10 computer ``` #include #include #pragma comment(lib, "Winmm.lib") // Apparently this is already defined when I use msvc cl. //#define CREATE_WAITABLE_TIMER_HIGH_RESOLUTION = 0x00000002; int usleep(HANDLE timer, LONGLONG d) { LARGE_INTEGER liDueTime; DWORD ret; LARGE_INTEGER StartingTime, EndingTime, ElapsedMicroseconds; LARGE_INTEGER Frequency; QueryPerformanceFrequency(&Frequency); QueryPerformanceCounter(&StartingTime); liDueTime.QuadPart = d; liDueTime.QuadPart = liDueTime.QuadPart * 10; // us into 100 of ns units liDueTime.QuadPart = -liDueTime.QuadPart; // negative for relative dure time if (!SetWaitableTimer(timer, &liDueTime, 0, NULL, NULL, 0)) { printf("SetWaitableTimer failed: errno=%d\n", GetLastError()); return 1; } ret = WaitForSingleObject(timer, INFINITE); if (ret != WAIT_OBJECT_0) { printf("WaitForSingleObject failed: ret=%d errno=%d\n", ret, GetLastError()); return 1; } QueryPerformanceCounter(&EndingTime); ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart; ElapsedMicroseconds.QuadPart *= 1000000; ElapsedMicroseconds.QuadPart /= Frequency.QuadPart; printf("delay is %lld us - slept for %lld us\n", d, ElapsedMicroseconds.QuadPart); return 0; } int testTimer(DWORD createFlag) { HANDLE timer; timer = CreateWaitableTimerEx(NULL, NULL, createFlag, TIMER_ALL_ACCESS); if (timer == NULL) { printf("CreateWaitableTimerEx failed: errno=%d\n", GetLastError()); return 1; } usleep(timer, 1000LL); usleep(timer, 100LL); usleep(timer, 10LL); usleep(timer, 1LL); CloseHandle(timer); return 0; } int main() { printf("\n1. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is off\n"); testTimer(0); printf("\n2. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is off\n"); testTimer(CREATE_WAITABLE_TIMER_HIGH_RESOLUTION); timeBeginPeriod(1); printf("\n3. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is on\n"); testTimer(0); printf("\n4. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is on\n"); testTimer(CREATE_WAITABLE_TIMER_HIGH_RESOLUTION); } ``` and I see this output ``` 1. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is off delay is 1000 us - slept for 4045 us delay is 100 us - slept for 3915 us delay is 10 us - slept for 3291 us delay is 1 us - slept for 2234 us 2. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is off delay is 1000 us - slept for 1076 us delay is 100 us - slept for 569 us delay is 10 us - slept for 585 us delay is 1 us - slept for 17 us 3. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is on delay is 1000 us - slept for 742 us delay is 100 us - slept for 893 us delay is 10 us - slept for 414 us delay is 1 us - slept for 920 us 4. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is on delay is 1000 us - slept for 1466 us delay is 100 us - slept for 559 us delay is 10 us - slept for 535 us delay is 1 us - slept for 5 us ``` That shows, that indeed using CREATE_WAITABLE_TIMER_HIGH_RESOLUTION will provide sleeps as low as about 500 microseconds, while our current approach provides about 1 millisecond sleep. New approach also does not require for timeBeginPeriod to be on, so this change solves long standing problem with go programs draining laptop battery, because it calls timeBeginPeriod. This change will only run on systems where CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available. If not available, the runtime will fallback to original code that uses timeBeginPeriod. This is how this change affects benchmark reported in issue #14790 name               old time/op  new time/op  delta ChanToSyscallPing  1.05ms ± 2%  0.68ms ±11%  -35.43%  (p=0.000 n=10+10) The benchmark was run with GOMAXPROCS set to 1. Fixes #8687 Updates #14790 Change-Id: I5b97ba58289c088c17c05292e12e45285c467eae Reviewed-on: https://go-review.googlesource.com/c/go/+/248699 Run-TryBot: Alex Brainman TryBot-Result: Go Bot Trust: Alex Brainman Reviewed-by: Austin Clements --- src/runtime/os_windows.go | 73 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) (limited to 'src/runtime/os_windows.go') diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index a62e941229..9dd140c952 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -21,6 +21,7 @@ const ( //go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort%4 "kernel32.dll" //go:cgo_import_dynamic runtime._CreateThread CreateThread%6 "kernel32.dll" //go:cgo_import_dynamic runtime._CreateWaitableTimerA CreateWaitableTimerA%3 "kernel32.dll" +//go:cgo_import_dynamic runtime._CreateWaitableTimerExW CreateWaitableTimerExW%4 "kernel32.dll" //go:cgo_import_dynamic runtime._DuplicateHandle DuplicateHandle%7 "kernel32.dll" //go:cgo_import_dynamic runtime._ExitProcess ExitProcess%1 "kernel32.dll" //go:cgo_import_dynamic runtime._FreeEnvironmentStringsW FreeEnvironmentStringsW%1 "kernel32.dll" @@ -68,6 +69,7 @@ var ( _CreateIoCompletionPort, _CreateThread, _CreateWaitableTimerA, + _CreateWaitableTimerExW, _DuplicateHandle, _ExitProcess, _FreeEnvironmentStringsW, @@ -151,6 +153,8 @@ type mOS struct { waitsema uintptr // semaphore for parking on locks resumesema uintptr // semaphore to indicate suspend/resume + highResTimer uintptr // high resolution timer handle used in usleep + // preemptExtLock synchronizes preemptM with entry/exit from // external C code. // @@ -402,11 +406,21 @@ const osRelaxMinNS = 60 * 1e6 // osRelax is called by the scheduler when transitioning to and from // all Ps being idle. // -// On Windows, it adjusts the system-wide timer resolution. Go needs a +// Some versions of Windows have high resolution timer. For those +// versions osRelax is noop. +// For Windows versions without high resolution timer, osRelax +// adjusts the system-wide timer resolution. Go needs a // high resolution timer while running and there's little extra cost // if we're already using the CPU, but if all Ps are idle there's no // need to consume extra power to drive the high-res timer. func osRelax(relax bool) uint32 { + if haveHighResTimer { + // If the high resolution timer is available, the runtime uses the timer + // to sleep for short durations. This means there's no need to adjust + // the global clock frequency. + return 0 + } + if relax { return uint32(stdcall1(_timeEndPeriod, 1)) } else { @@ -414,6 +428,42 @@ func osRelax(relax bool) uint32 { } } +// haveHighResTimer indicates that the CreateWaitableTimerEx +// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available. +var haveHighResTimer = false + +// createHighResTimer calls CreateWaitableTimerEx with +// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag to create high +// resolution timer. createHighResTimer returns new timer +// handle or 0, if CreateWaitableTimerEx failed. +func createHighResTimer() uintptr { + const ( + // As per @jstarks, see + // https://github.com/golang/go/issues/8687#issuecomment-656259353 + _CREATE_WAITABLE_TIMER_HIGH_RESOLUTION = 0x00000002 + + _SYNCHRONIZE = 0x00100000 + _TIMER_QUERY_STATE = 0x0001 + _TIMER_MODIFY_STATE = 0x0002 + ) + return stdcall4(_CreateWaitableTimerExW, 0, 0, + _CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, + _SYNCHRONIZE|_TIMER_QUERY_STATE|_TIMER_MODIFY_STATE) +} + +func initHighResTimer() { + if GOARCH == "arm" { + // TODO: Not yet implemented. + return + } + h := createHighResTimer() + if h != 0 { + haveHighResTimer = true + usleep2Addr = unsafe.Pointer(funcPC(usleep2HighRes)) + stdcall1(_CloseHandle, h) + } +} + func osinit() { asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall)) usleep2Addr = unsafe.Pointer(funcPC(usleep2)) @@ -429,6 +479,7 @@ func osinit() { stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1) + initHighResTimer() timeBeginPeriodRetValue = osRelax(false) ncpu = getproccount() @@ -844,9 +895,20 @@ func minit() { var thandle uintptr stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS) + // Configure usleep timer, if possible. + var timer uintptr + if haveHighResTimer { + timer = createHighResTimer() + if timer == 0 { + print("runtime: CreateWaitableTimerEx failed; errno=", getlasterror(), "\n") + throw("CreateWaitableTimerEx when creating timer failed") + } + } + mp := getg().m lock(&mp.threadLock) mp.thread = thandle + mp.highResTimer = timer unlock(&mp.threadLock) // Query the true stack base from the OS. Currently we're @@ -884,6 +946,10 @@ func unminit() { lock(&mp.threadLock) stdcall1(_CloseHandle, mp.thread) mp.thread = 0 + if mp.highResTimer != 0 { + stdcall1(_CloseHandle, mp.highResTimer) + mp.highResTimer = 0 + } unlock(&mp.threadLock) } @@ -976,9 +1042,12 @@ func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr { return stdcall(fn) } -// in sys_windows_386.s and sys_windows_amd64.s +// In sys_windows_386.s and sys_windows_amd64.s. func onosstack(fn unsafe.Pointer, arg uint32) + +// These are not callable functions. They should only be called via onosstack. func usleep2(usec uint32) +func usleep2HighRes(usec uint32) func switchtothread() var usleep2Addr unsafe.Pointer -- cgit v1.3 From 368c40116434532dc0b53b72fa04788ca6742898 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 27 Oct 2020 16:09:40 -0700 Subject: runtime: block signals in needm before allocating M Otherwise, if a signal occurs just after we allocated the M, we can deadlock if the signal handler needs to allocate an M itself. Fixes #42207 Change-Id: I76f44547f419e8b1c14cbf49bf602c6e645d8c14 Reviewed-on: https://go-review.googlesource.com/c/go/+/265759 Trust: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Bryan C. Mills --- src/runtime/crash_unix_test.go | 9 +++ src/runtime/os_js.go | 2 +- src/runtime/os_plan9.go | 2 +- src/runtime/os_windows.go | 2 +- src/runtime/proc.go | 26 ++++--- src/runtime/signal_unix.go | 8 +- src/runtime/testdata/testprogcgo/needmdeadlock.go | 95 +++++++++++++++++++++++ 7 files changed, 127 insertions(+), 17 deletions(-) create mode 100644 src/runtime/testdata/testprogcgo/needmdeadlock.go (limited to 'src/runtime/os_windows.go') diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go index fc87f37408..7aba3d4846 100644 --- a/src/runtime/crash_unix_test.go +++ b/src/runtime/crash_unix_test.go @@ -358,3 +358,12 @@ func TestSignalM(t *testing.T) { t.Fatalf("signal sent to M %d, but received on M %d", want, got) } } + +// Issue #42207. +func TestNeedmDeadlock(t *testing.T) { + output := runTestProg(t, "testprogcgo", "NeedmDeadlock") + want := "OK\n" + if output != want { + t.Fatalf("want %s, got %s\n", want, output) + } +} diff --git a/src/runtime/os_js.go b/src/runtime/os_js.go index ff0ee3aa6b..94983b358d 100644 --- a/src/runtime/os_js.go +++ b/src/runtime/os_js.go @@ -59,7 +59,7 @@ func mpreinit(mp *m) { } //go:nosplit -func msigsave(mp *m) { +func sigsave(p *sigset) { } //go:nosplit diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go index f3037a7508..62aecea060 100644 --- a/src/runtime/os_plan9.go +++ b/src/runtime/os_plan9.go @@ -184,7 +184,7 @@ func mpreinit(mp *m) { mp.errstr = (*byte)(mallocgc(_ERRMAX, nil, true)) } -func msigsave(mp *m) { +func sigsave(p *sigset) { } func msigrestore(sigmask sigset) { diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index 9dd140c952..ffb087f9db 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -873,7 +873,7 @@ func mpreinit(mp *m) { } //go:nosplit -func msigsave(mp *m) { +func sigsave(p *sigset) { } //go:nosplit diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 87d4b6e568..b335e1184d 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -598,7 +598,7 @@ func schedinit() { typelinksinit() // uses maps, activeModules itabsinit() // uses activeModules - msigsave(_g_.m) + sigsave(&_g_.m.sigmask) initSigmask = _g_.m.sigmask goargs() @@ -1707,6 +1707,18 @@ func needm() { exit(1) } + // Save and block signals before getting an M. + // The signal handler may call needm itself, + // and we must avoid a deadlock. Also, once g is installed, + // any incoming signals will try to execute, + // but we won't have the sigaltstack settings and other data + // set up appropriately until the end of minit, which will + // unblock the signals. This is the same dance as when + // starting a new m to run Go code via newosproc. + var sigmask sigset + sigsave(&sigmask) + sigblock() + // Lock extra list, take head, unlock popped list. // nilokay=false is safe here because of the invariant above, // that the extra list always contains or will soon contain @@ -1724,14 +1736,8 @@ func needm() { extraMCount-- unlockextra(mp.schedlink.ptr()) - // Save and block signals before installing g. - // Once g is installed, any incoming signals will try to execute, - // but we won't have the sigaltstack settings and other data - // set up appropriately until the end of minit, which will - // unblock the signals. This is the same dance as when - // starting a new m to run Go code via newosproc. - msigsave(mp) - sigblock() + // Store the original signal mask for use by minit. + mp.sigmask = sigmask // Install g (= m->g0) and set the stack bounds // to match the current stack. We don't actually know @@ -3676,7 +3682,7 @@ func beforefork() { // a signal handler before exec if a signal is sent to the process // group. See issue #18600. gp.m.locks++ - msigsave(gp.m) + sigsave(&gp.m.sigmask) sigblock() // This function is called before fork in syscall package. diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index 9318a9b8bc..bf4a319b37 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -1031,15 +1031,15 @@ func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool { return true } -// msigsave saves the current thread's signal mask into mp.sigmask. +// sigsave saves the current thread's signal mask into *p. // This is used to preserve the non-Go signal mask when a non-Go // thread calls a Go function. // This is nosplit and nowritebarrierrec because it is called by needm // which may be called on a non-Go thread with no g available. //go:nosplit //go:nowritebarrierrec -func msigsave(mp *m) { - sigprocmask(_SIG_SETMASK, nil, &mp.sigmask) +func sigsave(p *sigset) { + sigprocmask(_SIG_SETMASK, nil, p) } // msigrestore sets the current thread's signal mask to sigmask. @@ -1111,7 +1111,7 @@ func minitSignalStack() { // thread's signal mask. When this is called all signals have been // blocked for the thread. This starts with m.sigmask, which was set // either from initSigmask for a newly created thread or by calling -// msigsave if this is a non-Go thread calling a Go function. It +// sigsave if this is a non-Go thread calling a Go function. It // removes all essential signals from the mask, thus causing those // signals to not be blocked. Then it sets the thread's signal mask. // After this is called the thread can receive signals. diff --git a/src/runtime/testdata/testprogcgo/needmdeadlock.go b/src/runtime/testdata/testprogcgo/needmdeadlock.go new file mode 100644 index 0000000000..5a9c359006 --- /dev/null +++ b/src/runtime/testdata/testprogcgo/needmdeadlock.go @@ -0,0 +1,95 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +package main + +// This is for issue #42207. +// During a call to needm we could get a SIGCHLD signal +// which would itself call needm, causing a deadlock. + +/* +#include +#include +#include +#include + +extern void GoNeedM(); + +#define SIGNALERS 10 + +static void* needmSignalThread(void* p) { + pthread_t* pt = (pthread_t*)(p); + int i; + + for (i = 0; i < 100; i++) { + if (pthread_kill(*pt, SIGCHLD) < 0) { + return NULL; + } + usleep(1); + } + return NULL; +} + +// We don't need many calls, as the deadlock is only likely +// to occur the first couple of times that needm is called. +// After that there will likely be an extra M available. +#define CALLS 10 + +static void* needmCallbackThread(void* p) { + int i; + + for (i = 0; i < SIGNALERS; i++) { + sched_yield(); // Help the signal threads get started. + } + for (i = 0; i < CALLS; i++) { + GoNeedM(); + } + return NULL; +} + +static void runNeedmSignalThread() { + int i; + pthread_t caller; + pthread_t s[SIGNALERS]; + + pthread_create(&caller, NULL, needmCallbackThread, NULL); + for (i = 0; i < SIGNALERS; i++) { + pthread_create(&s[i], NULL, needmSignalThread, &caller); + } + for (i = 0; i < SIGNALERS; i++) { + pthread_join(s[i], NULL); + } + pthread_join(caller, NULL); +} +*/ +import "C" + +import ( + "fmt" + "os" + "time" +) + +func init() { + register("NeedmDeadlock", NeedmDeadlock) +} + +//export GoNeedM +func GoNeedM() { +} + +func NeedmDeadlock() { + // The failure symptom is that the program hangs because of a + // deadlock in needm, so set an alarm. + go func() { + time.Sleep(5 * time.Second) + fmt.Println("Hung for 5 seconds") + os.Exit(1) + }() + + C.runNeedmSignalThread() + fmt.Println("OK") +} -- cgit v1.3