diff options
| author | Russ Cox <rsc@golang.org> | 2014-06-26 11:54:39 -0400 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2014-06-26 11:54:39 -0400 |
| commit | 89f185fe8a036b0fabce30b20c480cf1c832bdd7 (patch) | |
| tree | cd8c84fca5164747bebd852da7edfed132ce4e70 /src/pkg/runtime/malloc.goc | |
| parent | 2565b5c06086488b2b23d48929803c8c3cec4400 (diff) | |
| download | go-89f185fe8a036b0fabce30b20c480cf1c832bdd7.tar.xz | |
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
Diffstat (limited to 'src/pkg/runtime/malloc.goc')
| -rw-r--r-- | src/pkg/runtime/malloc.goc | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index 7b7e350d8d..0b56d1fdb0 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -53,17 +53,17 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) // have distinct values. return &runtime·zerobase; } - if(m->mallocing) + if(g->m->mallocing) runtime·throw("malloc/free - deadlock"); // Disable preemption during settype. // We can not use m->mallocing for this, because settype calls mallocgc. - m->locks++; - m->mallocing = 1; + g->m->locks++; + g->m->mallocing = 1; if(DebugTypeAtBlockEnd) size += sizeof(uintptr); - c = m->mcache; + c = g->m->mcache; if(!runtime·debug.efence && size <= MaxSmallSize) { if((flag&(FlagNoScan|FlagNoGC)) == FlagNoScan && size < TinySize) { // Tiny allocator. @@ -112,9 +112,9 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) v = (MLink*)tiny; c->tiny += size1; c->tinysize -= size1; - m->mallocing = 0; - m->locks--; - if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->m->mallocing = 0; + g->m->locks--; + if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack g->stackguard0 = StackPreempt; return v; } @@ -178,7 +178,7 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) if(DebugTypeAtBlockEnd) *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ; - m->mallocing = 0; + g->m->mallocing = 0; // TODO: save type even if FlagNoScan? Potentially expensive but might help // heap profiling/tracing. if(UseSpanType && !(flag & FlagNoScan) && typ != 0) @@ -197,8 +197,8 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) profilealloc(v, size); } - m->locks--; - if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->m->locks--; + if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack g->stackguard0 = StackPreempt; if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc) @@ -239,7 +239,7 @@ profilealloc(void *v, uintptr size) int32 next; MCache *c; - c = m->mcache; + c = g->m->mcache; rate = runtime·MemProfileRate; if(size < rate) { // pick next profile time @@ -279,9 +279,9 @@ runtime·free(void *v) // If you change this also change mgc0.c:/^sweep, // which has a copy of the guts of free. - if(m->mallocing) + if(g->m->mallocing) runtime·throw("malloc/free - deadlock"); - m->mallocing = 1; + g->m->mallocing = 1; if(!runtime·mlookup(v, nil, nil, &s)) { runtime·printf("free %p: not an allocated block\n", v); @@ -304,7 +304,7 @@ runtime·free(void *v) if(s->specials != nil) runtime·freeallspecials(s, v, size); - c = m->mcache; + c = g->m->mcache; if(sizeclass == 0) { // Large object. s->needzero = 1; @@ -354,7 +354,7 @@ runtime·free(void *v) runtime·MCache_Free(c, v, sizeclass, size); } } - m->mallocing = 0; + g->m->mallocing = 0; } int32 @@ -364,11 +364,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) byte *p; MSpan *s; - m->mcache->local_nlookup++; - if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) { + g->m->mcache->local_nlookup++; + if (sizeof(void*) == 4 && g->m->mcache->local_nlookup >= (1<<30)) { // purge cache stats to prevent overflow runtime·lock(&runtime·mheap); - runtime·purgecachedstats(m->mcache); + runtime·purgecachedstats(g->m->mcache); runtime·unlock(&runtime·mheap); } @@ -569,7 +569,7 @@ runtime·mallocinit(void) // Initialize the rest of the allocator. runtime·MHeap_Init(&runtime·mheap); - m->mcache = runtime·allocmcache(); + g->m->mcache = runtime·allocmcache(); // See if it works. runtime·free(runtime·malloc(TinySize)); |
