From 49e0300854dabc8d3c2e91d26897a998345f2447 Mon Sep 17 00:00:00 2001 From: Dmitriy Vyukov Date: Thu, 14 Mar 2013 19:06:35 +0400 Subject: runtime: integrated network poller for linux vs tip: BenchmarkTCP4OneShot 172994 40485 -76.60% BenchmarkTCP4OneShot-2 96581 30028 -68.91% BenchmarkTCP4OneShot-4 52615 18454 -64.93% BenchmarkTCP4OneShot-8 26351 12289 -53.36% BenchmarkTCP4OneShot-16 12258 16093 +31.29% BenchmarkTCP4OneShot-32 13200 17045 +29.13% BenchmarkTCP4OneShotTimeout 124814 42932 -65.60% BenchmarkTCP4OneShotTimeout-2 99090 29040 -70.69% BenchmarkTCP4OneShotTimeout-4 51860 18455 -64.41% BenchmarkTCP4OneShotTimeout-8 26100 12073 -53.74% BenchmarkTCP4OneShotTimeout-16 12198 16654 +36.53% BenchmarkTCP4OneShotTimeout-32 13438 17143 +27.57% BenchmarkTCP4Persistent 115647 7782 -93.27% BenchmarkTCP4Persistent-2 58024 4808 -91.71% BenchmarkTCP4Persistent-4 24715 3674 -85.13% BenchmarkTCP4Persistent-8 16431 2407 -85.35% BenchmarkTCP4Persistent-16 2336 1875 -19.73% BenchmarkTCP4Persistent-32 1689 1637 -3.08% BenchmarkTCP4PersistentTimeout 79754 7859 -90.15% BenchmarkTCP4PersistentTimeout-2 57708 5952 -89.69% BenchmarkTCP4PersistentTimeout-4 26907 3823 -85.79% BenchmarkTCP4PersistentTimeout-8 15036 2567 -82.93% BenchmarkTCP4PersistentTimeout-16 2507 1903 -24.09% BenchmarkTCP4PersistentTimeout-32 1717 1627 -5.24% vs old scheduler: benchmark old ns/op new ns/op delta BenchmarkTCPOneShot 192244 40485 -78.94% BenchmarkTCPOneShot-2 63835 30028 -52.96% BenchmarkTCPOneShot-4 35443 18454 -47.93% BenchmarkTCPOneShot-8 22140 12289 -44.49% BenchmarkTCPOneShot-16 16930 16093 -4.94% BenchmarkTCPOneShot-32 16719 17045 +1.95% BenchmarkTCPOneShotTimeout 190495 42932 -77.46% BenchmarkTCPOneShotTimeout-2 64828 29040 -55.20% BenchmarkTCPOneShotTimeout-4 34591 18455 -46.65% BenchmarkTCPOneShotTimeout-8 21989 12073 -45.10% BenchmarkTCPOneShotTimeout-16 16848 16654 -1.15% BenchmarkTCPOneShotTimeout-32 16796 17143 +2.07% BenchmarkTCPPersistent 81670 7782 -90.47% BenchmarkTCPPersistent-2 26598 4808 -81.92% BenchmarkTCPPersistent-4 15633 3674 -76.50% BenchmarkTCPPersistent-8 18093 2407 -86.70% BenchmarkTCPPersistent-16 17472 1875 -89.27% BenchmarkTCPPersistent-32 7679 1637 -78.68% BenchmarkTCPPersistentTimeout 83186 7859 -90.55% BenchmarkTCPPersistentTimeout-2 26883 5952 -77.86% BenchmarkTCPPersistentTimeout-4 15776 3823 -75.77% BenchmarkTCPPersistentTimeout-8 18180 2567 -85.88% BenchmarkTCPPersistentTimeout-16 17454 1903 -89.10% BenchmarkTCPPersistentTimeout-32 7798 1627 -79.14% R=golang-dev, iant, bradfitz, dave, rsc CC=golang-dev https://golang.org/cl/7579044 --- src/pkg/runtime/defs2_linux.go | 26 +++++++++++-- src/pkg/runtime/defs_linux.go | 21 +++++++++- src/pkg/runtime/defs_linux_386.h | 22 ++++++++++- src/pkg/runtime/defs_linux_amd64.h | 24 +++++++++++- src/pkg/runtime/mem_linux.c | 2 - src/pkg/runtime/netpoll.goc | 2 +- src/pkg/runtime/netpoll_epoll.c | 80 ++++++++++++++++++++++++++++++++++++++ src/pkg/runtime/netpoll_stub.c | 2 +- src/pkg/runtime/sys_linux_386.s | 43 ++++++++++++++++++++ src/pkg/runtime/sys_linux_amd64.s | 43 ++++++++++++++++++++ src/pkg/runtime/thread_linux.c | 3 -- 11 files changed, 254 insertions(+), 14 deletions(-) create mode 100644 src/pkg/runtime/netpoll_epoll.c (limited to 'src/pkg/runtime') diff --git a/src/pkg/runtime/defs2_linux.go b/src/pkg/runtime/defs2_linux.go index 9b07029556..60ecc69bb9 100644 --- a/src/pkg/runtime/defs2_linux.go +++ b/src/pkg/runtime/defs2_linux.go @@ -7,7 +7,7 @@ /* * Input to cgo -cdefs -GOARCH=386 cgo -cdefs defs2.go >386/defs.h +GOARCH=386 go tool cgo -cdefs defs2_linux.go >defs_linux_386.h The asm header tricks we have to use for Linux on amd64 (see defs.c and defs1.c) don't work here, so this is yet another @@ -17,15 +17,19 @@ file. Sigh. package runtime /* -#cgo CFLAGS: -I/home/rsc/pub/linux-2.6/arch/x86/include -I/home/rsc/pub/linux-2.6/include -D_LOOSE_KERNEL_NAMES -D__ARCH_SI_UID_T=__kernel_uid32_t +#cgo CFLAGS: -I/tmp/linux/arch/x86/include -I/tmp/linux/include -D_LOOSE_KERNEL_NAMES -D__ARCH_SI_UID_T=__kernel_uid32_t #define size_t __kernel_size_t +#define pid_t int #include #include #include #include #include +#include #include +#include +#include // This is the sigaction structure from the Linux 2.1.68 kernel which // is used with the rt_sigaction system call. For 386 this is not @@ -35,12 +39,16 @@ struct kernel_sigaction { __sighandler_t k_sa_handler; unsigned long sa_flags; void (*sa_restorer) (void); - sigset_t sa_mask; + unsigned long long sa_mask; }; */ import "C" const ( + EINTR = C.EINTR + EAGAIN = C.EAGAIN + ENOMEM = C.ENOMEM + PROT_NONE = C.PROT_NONE PROT_READ = C.PROT_READ PROT_WRITE = C.PROT_WRITE @@ -110,6 +118,17 @@ const ( O_RDONLY = C.O_RDONLY O_CLOEXEC = C.O_CLOEXEC + + EPOLLIN = C.POLLIN + EPOLLOUT = C.POLLOUT + EPOLLERR = C.POLLERR + EPOLLHUP = C.POLLHUP + EPOLLRDHUP = C.POLLRDHUP + EPOLLET = C.EPOLLET + EPOLL_CLOEXEC = C.EPOLL_CLOEXEC + EPOLL_CTL_ADD = C.EPOLL_CTL_ADD + EPOLL_CTL_DEL = C.EPOLL_CTL_DEL + EPOLL_CTL_MOD = C.EPOLL_CTL_MOD ) type Fpreg C.struct__fpreg @@ -124,3 +143,4 @@ type Sigaltstack C.struct_sigaltstack type Sigcontext C.struct_sigcontext type Ucontext C.struct_ucontext type Itimerval C.struct_itimerval +type EpollEvent C.struct_epoll_event diff --git a/src/pkg/runtime/defs_linux.go b/src/pkg/runtime/defs_linux.go index c0275e1114..2f4e03a016 100644 --- a/src/pkg/runtime/defs_linux.go +++ b/src/pkg/runtime/defs_linux.go @@ -7,7 +7,7 @@ /* Input to cgo -cdefs -GOARCH=amd64 cgo -cdefs defs.go defs1.go >amd64/defs.h +GOARCH=amd64 go tool cgo -cdefs defs_linux.go defs1_linux.go >defs_linux_amd64.h */ package runtime @@ -25,10 +25,17 @@ package runtime #include #include #include +#include +#include +#include */ import "C" const ( + EINTR = C.EINTR + EAGAIN = C.EAGAIN + ENOMEM = C.ENOMEM + PROT_NONE = C.PROT_NONE PROT_READ = C.PROT_READ PROT_WRITE = C.PROT_WRITE @@ -95,6 +102,17 @@ const ( ITIMER_REAL = C.ITIMER_REAL ITIMER_VIRTUAL = C.ITIMER_VIRTUAL ITIMER_PROF = C.ITIMER_PROF + + EPOLLIN = C.POLLIN + EPOLLOUT = C.POLLOUT + EPOLLERR = C.POLLERR + EPOLLHUP = C.POLLHUP + EPOLLRDHUP = C.POLLRDHUP + EPOLLET = C.EPOLLET + EPOLL_CLOEXEC = C.EPOLL_CLOEXEC + EPOLL_CTL_ADD = C.EPOLL_CTL_ADD + EPOLL_CTL_DEL = C.EPOLL_CTL_DEL + EPOLL_CTL_MOD = C.EPOLL_CTL_MOD ) type Timespec C.struct_timespec @@ -102,3 +120,4 @@ type Timeval C.struct_timeval type Sigaction C.struct_sigaction type Siginfo C.siginfo_t type Itimerval C.struct_itimerval +type EpollEvent C.struct_epoll_event diff --git a/src/pkg/runtime/defs_linux_386.h b/src/pkg/runtime/defs_linux_386.h index e257a6f85f..27dae9e82b 100644 --- a/src/pkg/runtime/defs_linux_386.h +++ b/src/pkg/runtime/defs_linux_386.h @@ -1,8 +1,12 @@ // Created by cgo -cdefs - DO NOT EDIT -// cgo -cdefs defs2.go +// cgo -cdefs defs2_linux.go enum { + EINTR = 0x4, + EAGAIN = 0xb, + ENOMEM = 0xc, + PROT_NONE = 0x0, PROT_READ = 0x1, PROT_WRITE = 0x2, @@ -72,6 +76,17 @@ enum { O_RDONLY = 0x0, O_CLOEXEC = 0x80000, + + EPOLLIN = 0x1, + EPOLLOUT = 0x4, + EPOLLERR = 0x8, + EPOLLHUP = 0x10, + EPOLLRDHUP = 0x2000, + EPOLLET = -0x80000000, + EPOLL_CLOEXEC = 0x80000, + EPOLL_CTL_ADD = 0x1, + EPOLL_CTL_DEL = 0x2, + EPOLL_CTL_MOD = 0x3, }; typedef struct Fpreg Fpreg; @@ -86,6 +101,7 @@ typedef struct Sigaltstack Sigaltstack; typedef struct Sigcontext Sigcontext; typedef struct Ucontext Ucontext; typedef struct Itimerval Itimerval; +typedef struct EpollEvent EpollEvent; #pragma pack on @@ -186,6 +202,10 @@ struct Itimerval { Timeval it_interval; Timeval it_value; }; +struct EpollEvent { + uint32 events; + uint64 data; +}; #pragma pack off diff --git a/src/pkg/runtime/defs_linux_amd64.h b/src/pkg/runtime/defs_linux_amd64.h index bf5f79b0e4..3e87df68a1 100644 --- a/src/pkg/runtime/defs_linux_amd64.h +++ b/src/pkg/runtime/defs_linux_amd64.h @@ -1,8 +1,12 @@ // Created by cgo -cdefs - DO NOT EDIT -// cgo -cdefs defs.go defs1.go +// cgo -cdefs defs_linux.go defs1_linux.go enum { + EINTR = 0x4, + EAGAIN = 0xb, + ENOMEM = 0xc, + PROT_NONE = 0x0, PROT_READ = 0x1, PROT_WRITE = 0x2, @@ -69,6 +73,17 @@ enum { ITIMER_REAL = 0x0, ITIMER_VIRTUAL = 0x1, ITIMER_PROF = 0x2, + + EPOLLIN = 0x1, + EPOLLOUT = 0x4, + EPOLLERR = 0x8, + EPOLLHUP = 0x10, + EPOLLRDHUP = 0x2000, + EPOLLET = -0x80000000, + EPOLL_CLOEXEC = 0x80000, + EPOLL_CTL_ADD = 0x1, + EPOLL_CTL_DEL = 0x2, + EPOLL_CTL_MOD = 0x3, }; typedef struct Timespec Timespec; @@ -76,6 +91,7 @@ typedef struct Timeval Timeval; typedef struct Sigaction Sigaction; typedef struct Siginfo Siginfo; typedef struct Itimerval Itimerval; +typedef struct EpollEvent EpollEvent; #pragma pack on @@ -104,11 +120,15 @@ struct Itimerval { Timeval it_interval; Timeval it_value; }; +struct EpollEvent { + uint32 events; + uint64 data; +}; #pragma pack off // Created by cgo -cdefs - DO NOT EDIT -// cgo -cdefs defs.go defs1.go +// cgo -cdefs defs_linux.go defs1_linux.go enum { diff --git a/src/pkg/runtime/mem_linux.c b/src/pkg/runtime/mem_linux.c index ebcec1e863..1bae755faf 100644 --- a/src/pkg/runtime/mem_linux.c +++ b/src/pkg/runtime/mem_linux.c @@ -10,8 +10,6 @@ enum { - EAGAIN = 11, - ENOMEM = 12, _PAGE_SIZE = 4096, }; diff --git a/src/pkg/runtime/netpoll.goc b/src/pkg/runtime/netpoll.goc index 84810003ea..152fa174e9 100644 --- a/src/pkg/runtime/netpoll.goc +++ b/src/pkg/runtime/netpoll.goc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin +// +build darwin linux,386 linux,amd64 package net diff --git a/src/pkg/runtime/netpoll_epoll.c b/src/pkg/runtime/netpoll_epoll.c new file mode 100644 index 0000000000..53916e1d2e --- /dev/null +++ b/src/pkg/runtime/netpoll_epoll.c @@ -0,0 +1,80 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux,386 linux,amd64 + +#include "runtime.h" +#include "defs_GOOS_GOARCH.h" + +int32 runtime·epollcreate(int32 size); +int32 runtime·epollcreate1(int32 flags); +int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev); +int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout); +void runtime·closeonexec(int32 fd); + +static int32 epfd = -1; // epoll descriptor + +void +runtime·netpollinit(void) +{ + epfd = runtime·epollcreate1(EPOLL_CLOEXEC); + if(epfd >= 0) + return; + epfd = runtime·epollcreate(1024); + if(epfd >= 0) { + runtime·closeonexec(epfd); + return; + } + runtime·printf("netpollinit: failed to create descriptor (%d)\n", -epfd); + runtime·throw("netpollinit: failed to create descriptor"); +} + +int32 +runtime·netpollopen(int32 fd, PollDesc *pd) +{ + EpollEvent ev; + + ev.events = EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET; + ev.data = (uint64)pd; + return runtime·epollctl(epfd, EPOLL_CTL_ADD, fd, &ev); +} + +// polls for ready network connections +// returns list of goroutines that become runnable +G* +runtime·netpoll(bool block) +{ + EpollEvent events[128], *ev; + int32 n, i, waitms, mode; + G *gp; + + if(epfd == -1) + return nil; + waitms = -1; + if(!block) + waitms = 0; +retry: + n = runtime·epollwait(epfd, events, nelem(events), waitms); + if(n < 0) { + if(n != -EINTR) + runtime·printf("epollwait failed with %d\n", -n); + goto retry; + } + gp = nil; + for(i = 0; i < n; i++) { + ev = &events[i]; + if(ev->events == 0) + continue; + mode = 0; + if(ev->events & (EPOLLIN|EPOLLRDHUP|EPOLLHUP|EPOLLERR)) + mode += 'r'; + if(ev->events & (EPOLLOUT|EPOLLHUP|EPOLLERR)) + mode += 'w'; + if(mode) + runtime·netpollready(&gp, (void*)ev->data, mode); + } + if(block && gp == nil) + goto retry; + return gp; +} diff --git a/src/pkg/runtime/netpoll_stub.c b/src/pkg/runtime/netpoll_stub.c index db2b1ee13f..a70b2f772f 100644 --- a/src/pkg/runtime/netpoll_stub.c +++ b/src/pkg/runtime/netpoll_stub.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build freebsd linux netbsd openbsd plan9 windows +// +build freebsd linux,arm netbsd openbsd plan9 windows #include "runtime.h" diff --git a/src/pkg/runtime/sys_linux_386.s b/src/pkg/runtime/sys_linux_386.s index f27fd47130..19dfbf3847 100644 --- a/src/pkg/runtime/sys_linux_386.s +++ b/src/pkg/runtime/sys_linux_386.s @@ -430,3 +430,46 @@ TEXT runtime·sched_getaffinity(SB),7,$0 MOVL 12(SP), DX CALL *runtime·_vdso(SB) RET + +// int32 runtime·epollcreate(int32 size); +TEXT runtime·epollcreate(SB),7,$0 + MOVL $254, AX + MOVL 4(SP), BX + CALL *runtime·_vdso(SB) + RET + +// int32 runtime·epollcreate1(int32 flags); +TEXT runtime·epollcreate1(SB),7,$0 + MOVL $329, AX + MOVL 4(SP), BX + CALL *runtime·_vdso(SB) + RET + +// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev); +TEXT runtime·epollctl(SB),7,$0 + MOVL $255, AX + MOVL 4(SP), BX + MOVL 8(SP), CX + MOVL 12(SP), DX + MOVL 16(SP), SI + CALL *runtime·_vdso(SB) + RET + +// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout); +TEXT runtime·epollwait(SB),7,$0 + MOVL $256, AX + MOVL 4(SP), BX + MOVL 8(SP), CX + MOVL 12(SP), DX + MOVL 16(SP), SI + CALL *runtime·_vdso(SB) + RET + +// void runtime·closeonexec(int32 fd); +TEXT runtime·closeonexec(SB),7,$0 + MOVL $55, AX // fcntl + MOVL 4(SP), BX // fd + MOVL $2, CX // F_SETFD + MOVL $1, DX // FD_CLOEXEC + CALL *runtime·_vdso(SB) + RET diff --git a/src/pkg/runtime/sys_linux_amd64.s b/src/pkg/runtime/sys_linux_amd64.s index e459437582..f1591b8e7a 100644 --- a/src/pkg/runtime/sys_linux_amd64.s +++ b/src/pkg/runtime/sys_linux_amd64.s @@ -347,3 +347,46 @@ TEXT runtime·sched_getaffinity(SB),7,$0 MOVL $204, AX // syscall entry SYSCALL RET + +// int32 runtime·epollcreate(int32 size); +TEXT runtime·epollcreate(SB),7,$0 + MOVL 8(SP), DI + MOVL $213, AX // syscall entry + SYSCALL + RET + +// int32 runtime·epollcreate1(int32 flags); +TEXT runtime·epollcreate1(SB),7,$0 + MOVL 8(SP), DI + MOVL $291, AX // syscall entry + SYSCALL + RET + +// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev); +TEXT runtime·epollctl(SB),7,$0 + MOVL 8(SP), DI + MOVL 12(SP), SI + MOVL 16(SP), DX + MOVQ 24(SP), R10 + MOVL $233, AX // syscall entry + SYSCALL + RET + +// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout); +TEXT runtime·epollwait(SB),7,$0 + MOVL 8(SP), DI + MOVQ 16(SP), SI + MOVL 24(SP), DX + MOVL 28(SP), R10 + MOVL $232, AX // syscall entry + SYSCALL + RET + +// void runtime·closeonexec(int32 fd); +TEXT runtime·closeonexec(SB),7,$0 + MOVL 8(SP), DI // fd + MOVQ $2, SI // F_SETFD + MOVQ $1, DX // FD_CLOEXEC + MOVL $72, AX // fcntl + SYSCALL + RET diff --git a/src/pkg/runtime/thread_linux.c b/src/pkg/runtime/thread_linux.c index fe924b264a..7fdc757dfc 100644 --- a/src/pkg/runtime/thread_linux.c +++ b/src/pkg/runtime/thread_linux.c @@ -25,9 +25,6 @@ enum { FUTEX_WAIT = 0, FUTEX_WAKE = 1, - - EINTR = 4, - EAGAIN = 11, }; // Atomically, -- cgit v1.3-5-g9baa