runtime: inline several float64 routines to speed up complex128 division

Depends on CL 6197045. Result obtained on Core i7 620M, Darwin/amd64: benchmark old ns/op new ns/op delta BenchmarkComplex128DivNormal 57 28 -50.78% BenchmarkComplex128DivNisNaN 49 15 -68.90% BenchmarkComplex128DivDisNaN 49 15 -67.88% BenchmarkComplex128DivNisInf 40 12 -68.50% BenchmarkComplex128DivDisInf 33 13 -61.06% Result obtained on Core i7 620M, Darwin/386: benchmark old ns/op new ns/op delta BenchmarkComplex128DivNormal 89 50 -44.05% BenchmarkComplex128DivNisNaN 307 802 +161.24% BenchmarkComplex128DivDisNaN 309 788 +155.02% BenchmarkComplex128DivNisInf 278 237 -14.75% BenchmarkComplex128DivDisInf 46 22 -52.46% Result obtained on 700MHz OMAP4460, Linux/ARM: benchmark old ns/op new ns/op delta BenchmarkComplex128DivNormal 1557 465 -70.13% BenchmarkComplex128DivNisNaN 1443 220 -84.75% BenchmarkComplex128DivDisNaN 1481 218 -85.28% BenchmarkComplex128DivNisInf 952 216 -77.31% BenchmarkComplex128DivDisInf 861 231 -73.17% The 386 version has a performance regression, but as we have decided to use SSE2 instead of x87 FPU for 386 too (issue 3912), I won't address this issue. R=dsymonds, mchaten, iant, dave, mtj, rsc, r CC=golang-dev https://golang.org/cl/6024045
author: Shenghou Ma <minux.ma@gmail.com> 2012-08-07 23:45:50 +0800
committer: Shenghou Ma <minux.ma@gmail.com> 2012-08-07 23:45:50 +0800
commit: 0157c72d133471631c13419f61117b75dcd7c255 (patch)
tree: e9ff8dc487ad88e0fd438d76170e507ae97890d1 /src/pkg/runtime/float.c
parent: c8423f90029e24b9b714678f9d23f7497f97a1e6 (diff)
download: go-0157c72d133471631c13419f61117b75dcd7c255.tar.xz
1 files changed, 4 insertions, 167 deletions
diff --git a/src/pkg/runtime/float.c b/src/pkg/runtime/float.c
index 4d9f125977..42082e4347 100644
--- a/src/pkg/runtime/float.c
+++ b/src/pkg/runtime/float.c
@@ -4,170 +4,7 @@
 
 #include "runtime.h"
 
-static	uint64	uvnan		= 0x7FF8000000000001ULL;
-static	uint64	uvinf		= 0x7FF0000000000000ULL;
-static	uint64	uvneginf	= 0xFFF0000000000000ULL;
-
-uint32
-runtime·float32tobits(float32 f)
-{
-	// The obvious cast-and-pointer code is technically
-	// not valid, and gcc miscompiles it.  Use a union instead.
-	union {
-		float32 f;
-		uint32 i;
-	} u;
-	u.f = f;
-	return u.i;
-}
-
-uint64
-runtime·float64tobits(float64 f)
-{
-	// The obvious cast-and-pointer code is technically
-	// not valid, and gcc miscompiles it.  Use a union instead.
-	union {
-		float64 f;
-		uint64 i;
-	} u;
-	u.f = f;
-	return u.i;
-}
-
-float64
-runtime·float64frombits(uint64 i)
-{
-	// The obvious cast-and-pointer code is technically
-	// not valid, and gcc miscompiles it.  Use a union instead.
-	union {
-		float64 f;
-		uint64 i;
-	} u;
-	u.i = i;
-	return u.f;
-}
-
-float32
-runtime·float32frombits(uint32 i)
-{
-	// The obvious cast-and-pointer code is technically
-	// not valid, and gcc miscompiles it.  Use a union instead.
-	union {
-		float32 f;
-		uint32 i;
-	} u;
-	u.i = i;
-	return u.f;
-}
-
-bool
-runtime·isInf(float64 f, int32 sign)
-{
-	uint64 x;
-
-	x = runtime·float64tobits(f);
-	if(sign == 0)
-		return x == uvinf || x == uvneginf;
-	if(sign > 0)
-		return x == uvinf;
-	return x == uvneginf;
-}
-
-float64
-runtime·NaN(void)
-{
-	return runtime·float64frombits(uvnan);
-}
-
-bool
-runtime·isNaN(float64 f)
-{
-	uint64 x;
-
-	x = runtime·float64tobits(f);
-	return ((uint32)(x>>52) & 0x7FF) == 0x7FF && !runtime·isInf(f, 0);
-}
-
-float64
-runtime·Inf(int32 sign)
-{
-	if(sign >= 0)
-		return runtime·float64frombits(uvinf);
-	else
-		return runtime·float64frombits(uvneginf);
-}
-
-enum
-{
-	MASK	= 0x7ffL,
-	SHIFT	= 64-11-1,
-	BIAS	= 1022L,
-};
-
-float64
-runtime·frexp(float64 d, int32 *ep)
-{
-	uint64 x;
-
-	if(d == 0) {
-		*ep = 0;
-		return 0;
-	}
-	x = runtime·float64tobits(d);
-	*ep = (int32)((x >> SHIFT) & MASK) - BIAS;
-	x &= ~((uint64)MASK << SHIFT);
-	x |= (uint64)BIAS << SHIFT;
-	return runtime·float64frombits(x);
-}
-
-float64
-runtime·ldexp(float64 d, int32 e)
-{
-	uint64 x;
-
-	if(d == 0)
-		return 0;
-	x = runtime·float64tobits(d);
-	e += (int32)(x >> SHIFT) & MASK;
-	if(e <= 0)
-		return 0;	/* underflow */
-	if(e >= MASK){		/* overflow */
-		if(d < 0)
-			return runtime·Inf(-1);
-		return runtime·Inf(1);
-	}
-	x &= ~((uint64)MASK << SHIFT);
-	x |= (uint64)e << SHIFT;
-	return runtime·float64frombits(x);
-}
-
-float64
-runtime·modf(float64 d, float64 *ip)
-{
-	float64 dd;
-	uint64 x;
-	int32 e;
-
-	if(d < 1) {
-		if(d < 0) {
-			d = runtime·modf(-d, ip);
-			*ip = -*ip;
-			return -d;
-		}
-		*ip = 0;
-		return d;
-	}
-
-	x = runtime·float64tobits(d);
-	e = (int32)((x >> SHIFT) & MASK) - BIAS;
-
-	/*
-	 * Keep the top 11+e bits; clear the rest.
-	 */
-	if(e <= 64-11)
-		x &= ~(((uint64)1 << (64LL-11LL-e))-1);
-	dd = runtime·float64frombits(x);
-	*ip = dd;
-	return d - dd;
-}
-
+// used as float64 via runtime· names
+uint64	·nan		= 0x7FF8000000000001ULL;
+uint64	·posinf	= 0x7FF0000000000000ULL;
+uint64	·neginf	= 0xFFF0000000000000ULL;
author	Shenghou Ma <minux.ma@gmail.com>	2012-08-07 23:45:50 +0800
committer	Shenghou Ma <minux.ma@gmail.com>	2012-08-07 23:45:50 +0800
commit	0157c72d133471631c13419f61117b75dcd7c255 (patch)
tree	e9ff8dc487ad88e0fd438d76170e507ae97890d1 /src/pkg/runtime/float.c
parent	c8423f90029e24b9b714678f9d23f7497f97a1e6 (diff)
download	go-0157c72d133471631c13419f61117b75dcd7c255.tar.xz