diff options
| author | Russ Cox <rsc@golang.org> | 2013-02-26 22:38:14 -0500 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2013-02-26 22:38:14 -0500 |
| commit | c8dcaeb25deddac52cfca6ae6882ce94780582d3 (patch) | |
| tree | d12f5b99253e3e8a8f1606afebc8d5908470da88 /src/pkg | |
| parent | 15cce227c78ac2b499e9ad3c49ccb67d2f870169 (diff) | |
| download | go-c8dcaeb25deddac52cfca6ae6882ce94780582d3.tar.xz | |
cmd/ld, runtime: adjust symbol table representation
This CL changes the encoding used for the Go symbol table,
stored in the binary and used at run time. It does not change
any of the semantics or structure: the bits are just packed
a little differently.
The comment at the top of runtime/symtab.c describes the new format.
Compared to the Go 1.0 format, the main changes are:
* Store symbol addresses as full-pointer-sized host-endian values.
(For 6g, this means addresses are 64-bit little-endian.)
* Store other values (frame sizes and so on) varint-encoded.
The second change more than compensates for the first:
for the godoc binary on OS X/amd64, the new symbol table
is 8% smaller than the old symbol table (1,425,668 down from 1,546,276).
This is a required step for allowing the host linker (gcc) to write
the final Go binary, since it will have to fill in the symbol address slots
(so the slots must be host-endian) and on 64-bit systems it may
choose addresses above 4 GB.
R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7403054
Diffstat (limited to 'src/pkg')
| -rw-r--r-- | src/pkg/debug/gosym/symtab.go | 123 | ||||
| -rw-r--r-- | src/pkg/runtime/arch_386.h | 1 | ||||
| -rw-r--r-- | src/pkg/runtime/arch_amd64.h | 1 | ||||
| -rw-r--r-- | src/pkg/runtime/arch_arm.h | 1 | ||||
| -rw-r--r-- | src/pkg/runtime/symtab.c | 151 |
5 files changed, 234 insertions, 43 deletions
diff --git a/src/pkg/debug/gosym/symtab.go b/src/pkg/debug/gosym/symtab.go index cc01e0b9d6..81ed4fb27d 100644 --- a/src/pkg/debug/gosym/symtab.go +++ b/src/pkg/debug/gosym/symtab.go @@ -99,31 +99,116 @@ type Table struct { } type sym struct { - value uint32 - gotype uint32 + value uint64 + gotype uint64 typ byte name []byte } -var littleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00} +var littleEndianSymtab = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00} +var bigEndianSymtab = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00} + +var oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00} func walksymtab(data []byte, fn func(sym) error) error { var order binary.ByteOrder = binary.BigEndian - if bytes.HasPrefix(data, littleEndianSymtab) { + newTable := false + switch { + case bytes.HasPrefix(data, oldLittleEndianSymtab): + // Same as Go 1.0, but little endian. + // Format was used during interim development between Go 1.0 and Go 1.1. + // Should not be widespread, but easy to support. data = data[6:] order = binary.LittleEndian + case bytes.HasPrefix(data, bigEndianSymtab): + newTable = true + case bytes.HasPrefix(data, littleEndianSymtab): + newTable = true + order = binary.LittleEndian + } + var ptrsz int + if newTable { + if len(data) < 8 { + return &DecodingError{len(data), "unexpected EOF", nil} + } + ptrsz = int(data[7]) + if ptrsz != 4 && ptrsz != 8 { + return &DecodingError{7, "invalid pointer size", ptrsz} + } + data = data[8:] } var s sym p := data - for len(p) >= 6 { - s.value = order.Uint32(p[0:4]) - typ := p[4] - if typ&0x80 == 0 { - return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ} + for len(p) >= 4 { + var typ byte + if newTable { + // Symbol type, value, Go type. + typ = p[0] & 0x3F + wideValue := p[0]&0x40 != 0 + goType := p[0]&0x80 != 0 + if typ < 26 { + typ += 'A' + } else { + typ += 'a' - 26 + } + s.typ = typ + p = p[1:] + if wideValue { + if len(p) < ptrsz { + return &DecodingError{len(data), "unexpected EOF", nil} + } + // fixed-width value + if ptrsz == 8 { + s.value = order.Uint64(p[0:8]) + p = p[8:] + } else { + s.value = uint64(order.Uint32(p[0:4])) + p = p[4:] + } + } else { + // varint value + s.value = 0 + shift := uint(0) + for len(p) > 0 && p[0]&0x80 != 0 { + s.value |= uint64(p[0]&0x7F) << shift + shift += 7 + p = p[1:] + } + if len(p) == 0 { + return &DecodingError{len(data), "unexpected EOF", nil} + } + s.value |= uint64(p[0]) << shift + p = p[1:] + } + if goType { + if len(p) < ptrsz { + return &DecodingError{len(data), "unexpected EOF", nil} + } + // fixed-width go type + if ptrsz == 8 { + s.gotype = order.Uint64(p[0:8]) + p = p[8:] + } else { + s.gotype = uint64(order.Uint32(p[0:4])) + p = p[4:] + } + } + } else { + // Value, symbol type. + s.value = uint64(order.Uint32(p[0:4])) + if len(p) < 5 { + return &DecodingError{len(data), "unexpected EOF", nil} + } + typ = p[4] + if typ&0x80 == 0 { + return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ} + } + typ &^= 0x80 + s.typ = typ + p = p[5:] } - typ &^= 0x80 - s.typ = typ - p = p[5:] + + // Name. var i int var nnul int for i = 0; i < len(p); i++ { @@ -142,13 +227,21 @@ func walksymtab(data []byte, fn func(sym) error) error { } } } - if i+nnul+4 > len(p) { + if len(p) < i+nnul { return &DecodingError{len(data), "unexpected EOF", nil} } s.name = p[0:i] i += nnul - s.gotype = order.Uint32(p[i : i+4]) - p = p[i+4:] + p = p[i:] + + if !newTable { + if len(p) < 4 { + return &DecodingError{len(data), "unexpected EOF", nil} + } + // Go type. + s.gotype = uint64(order.Uint32(p[:4])) + p = p[4:] + } fn(s) } return nil diff --git a/src/pkg/runtime/arch_386.h b/src/pkg/runtime/arch_386.h index a0798f99e9..cb9d64a70c 100644 --- a/src/pkg/runtime/arch_386.h +++ b/src/pkg/runtime/arch_386.h @@ -1,4 +1,5 @@ enum { thechar = '8', + BigEndian = 0, CacheLineSize = 64 }; diff --git a/src/pkg/runtime/arch_amd64.h b/src/pkg/runtime/arch_amd64.h index dd1cfc18d1..35ed1560a2 100644 --- a/src/pkg/runtime/arch_amd64.h +++ b/src/pkg/runtime/arch_amd64.h @@ -1,4 +1,5 @@ enum { thechar = '6', + BigEndian = 0, CacheLineSize = 64 }; diff --git a/src/pkg/runtime/arch_arm.h b/src/pkg/runtime/arch_arm.h index c1a7a0f379..21dc1a692c 100644 --- a/src/pkg/runtime/arch_arm.h +++ b/src/pkg/runtime/arch_arm.h @@ -1,4 +1,5 @@ enum { thechar = '5', + BigEndian = 0, CacheLineSize = 32 }; diff --git a/src/pkg/runtime/symtab.c b/src/pkg/runtime/symtab.c index 85a1096d10..2485586855 100644 --- a/src/pkg/runtime/symtab.c +++ b/src/pkg/runtime/symtab.c @@ -2,15 +2,53 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Runtime symbol table access. Work in progress. -// The Plan 9 symbol table is not in a particularly convenient form. -// The routines here massage it into a more usable form; eventually -// we'll change 6l to do this for us, but it is easier to experiment -// here than to change 6l and all the other tools. +// Runtime symbol table parsing. // -// The symbol table also needs to be better integrated with the type -// strings table in the future. This is just a quick way to get started -// and figure out exactly what we want. +// The Go tools use a symbol table derived from the Plan 9 symbol table +// format. The symbol table is kept in its own section treated as +// read-only memory when the binary is running: the binary consults the +// table. +// +// The format used by Go 1.0 was basically the Plan 9 format. Each entry +// is variable sized but had this format: +// +// 4-byte value, big endian +// 1-byte type ([A-Za-z] + 0x80) +// name, NUL terminated (or for 'z' and 'Z' entries, double-NUL terminated) +// 4-byte Go type address, big endian (new in Go) +// +// In order to support greater interoperation with standard toolchains, +// Go 1.1 uses a more flexible yet smaller encoding of the entries. +// The overall structure is unchanged from Go 1.0 and, for that matter, +// from Plan 9. +// +// The Go 1.1 table is a re-encoding of the data in a Go 1.0 table. +// To identify a new table as new, it begins one of two eight-byte +// sequences: +// +// FF FF FF FD 00 00 00 xx - big endian new table +// FD FF FF FF 00 00 00 xx - little endian new table +// +// This sequence was chosen because old tables stop at an entry with type +// 0, so old code reading a new table will see only an empty table. The +// first four bytes are the target-endian encoding of 0xfffffffd. The +// final xx gives AddrSize, the width of a full-width address. +// +// After that header, each entry is encoded as follows. +// +// 1-byte type (0-51 + two flag bits) +// AddrSize-byte value, host-endian OR varint-encoded value +// AddrSize-byte Go type address OR nothing +// [n] name, terminated as before +// +// The type byte comes first, but 'A' encodes as 0 and 'a' as 26, so that +// the type itself is only in the low 6 bits. The upper two bits specify +// the format of the next two fields. If the 0x40 bit is set, the value +// is encoded as an full-width 4- or 8-byte target-endian word. Otherwise +// the value is a varint-encoded number. If the 0x80 bit is set, the Go +// type is present, again as a 4- or 8-byte target-endian word. If not, +// there is no Go type in this entry. The NUL-terminated name ends the +// entry. #include "runtime.h" #include "defs_GOOS_GOARCH.h" @@ -29,42 +67,100 @@ struct Sym // byte *gotype; }; +static uintptr mainoffset; + // A dynamically allocated string containing multiple substrings. // Individual strings are slices of hugestring. static String hugestring; static int32 hugestring_len; +extern void main·main(void); + +static uintptr +readword(byte **pp, byte *ep) +{ + byte *p; + + p = *pp; + if(ep - p < sizeof(void*)) { + *pp = ep; + return 0; + } + *pp = p + sizeof(void*); + + // Hairy, but only one of these four cases gets compiled. + if(sizeof(void*) == 8) { + if(BigEndian) { + return ((uint64)p[0]<<56) | ((uint64)p[1]<<48) | ((uint64)p[2]<<40) | ((uint64)p[3]<<32) | + ((uint64)p[4]<<24) | ((uint64)p[5]<<16) | ((uint64)p[6]<<8) | ((uint64)p[7]); + } + return ((uint64)p[7]<<56) | ((uint64)p[6]<<48) | ((uint64)p[5]<<40) | ((uint64)p[4]<<32) | + ((uint64)p[3]<<24) | ((uint64)p[2]<<16) | ((uint64)p[1]<<8) | ((uint64)p[0]); + } + if(BigEndian) { + return ((uint32)p[0]<<24) | ((uint32)p[1]<<16) | ((uint32)p[2]<<8) | ((uint32)p[3]); + } + return ((uint32)p[3]<<24) | ((uint32)p[2]<<16) | ((uint32)p[1]<<8) | ((uint32)p[0]); +} + // Walk over symtab, calling fn(&s) for each symbol. static void walksymtab(void (*fn)(Sym*)) { byte *p, *ep, *q; Sym s; - int32 bigend; + int32 widevalue, havetype, shift; p = symtab; ep = esymtab; - // Default is big-endian value encoding. - // If table begins fe ff ff ff 00 00, little-endian. - bigend = 1; - if(symtab[0] == 0xfe && symtab[1] == 0xff && symtab[2] == 0xff && symtab[3] == 0xff && symtab[4] == 0x00 && symtab[5] == 0x00) { - p += 6; - bigend = 0; + // Table must begin with correct magic number. + if(ep - p < 8 || p[4] != 0x00 || p[5] != 0x00 || p[6] != 0x00 || p[7] != sizeof(void*)) + return; + if(BigEndian) { + if(p[0] != 0xff || p[1] != 0xff || p[2] != 0xff || p[3] != 0xfd) + return; + } else { + if(p[0] != 0xfd || p[1] != 0xff || p[2] != 0xff || p[3] != 0xff) + return; } - while(p < ep) { - if(p + 7 > ep) - break; + p += 8; - if(bigend) - s.value = ((uint32)p[0]<<24) | ((uint32)p[1]<<16) | ((uint32)p[2]<<8) | ((uint32)p[3]); + while(p < ep) { + s.symtype = p[0]&0x3F; + widevalue = p[0]&0x40; + havetype = p[0]&0x80; + if(s.symtype < 26) + s.symtype += 'A'; else - s.value = ((uint32)p[3]<<24) | ((uint32)p[2]<<16) | ((uint32)p[1]<<8) | ((uint32)p[0]); + s.symtype += 'a' - 26; + p++; - if(!(p[4]&0x80)) + // Value, either full-width or varint-encoded. + if(widevalue) { + s.value = readword(&p, ep); + } else { + s.value = 0; + shift = 0; + while(p < ep && (p[0]&0x80) != 0) { + s.value |= (uintptr)(p[0]&0x7F)<<shift; + shift += 7; + p++; + } + if(p >= ep) + break; + s.value |= (uintptr)p[0]<<shift; + p++; + } + + // Go type, if present. Ignored but must skip over. + if(havetype) + readword(&p, ep); + + // Name. + if(ep - p < 2) break; - s.symtype = p[4] & ~0x80; - p += 5; + s.name = p; if(s.symtype == 'z' || s.symtype == 'Z') { // path reference string - skip first byte, @@ -84,7 +180,7 @@ walksymtab(void (*fn)(Sym*)) break; p = q+1; } - p += 4; // go type + fn(&s); } } @@ -93,7 +189,6 @@ walksymtab(void (*fn)(Sym*)) static Func *func; static int32 nfunc; -extern byte reloffset[]; static byte **fname; static int32 nfname; @@ -119,7 +214,7 @@ dofunc(Sym *sym) } f = &func[nfunc++]; f->name = runtime·gostringnocopy(sym->name); - f->entry = sym->value + (uint64)reloffset; + f->entry = sym->value; if(sym->symtype == 'L' || sym->symtype == 'l') f->frame = -sizeof(uintptr); break; @@ -141,7 +236,7 @@ dofunc(Sym *sym) if(fname == nil) { if(sym->value >= nfname) { if(sym->value >= 0x10000) { - runtime·printf("invalid symbol file index %p\n", sym->value); + runtime·printf("runtime: invalid symbol file index %p\n", sym->value); runtime·throw("mangled symbol table"); } nfname = sym->value+1; |
