cmd/buildid: add new tool factoring out code needed by go command

This CL does a few things. 1. It moves the existing "read a build ID" code out of the go command and into cmd/internal/buildid. 2. It adds new code there to "write a build ID". 3. It adds better tests. 4. It encapsulates cmd/internal/buildid into a new standalone program "go tool buildid". The go command is going to use the new "write a build ID" functionality in a future CL. Adding the separate "go tool buildid" gives "go build -x" a printable command to explain what it is doing in that new step. (This is similar to the go command printing "go tool pack" commands equivalent to the actions it is taking, even though it's not invoking pack directly.) Keeping go build -x honest means that other build systems can potentially keep up with the go command. Change-Id: I01c0a66e30a80fa7254e3f2879283d3cd7aa03b4 Reviewed-on: https://go-review.googlesource.com/69053 Reviewed-by: David Crawshaw <crawshaw@golang.org>
author: Russ Cox <rsc@golang.org> 2017-10-06 14:03:55 -0400
committer: Russ Cox <rsc@golang.org> 2017-10-11 18:16:02 +0000
commit: 9ad2319bbca45b0750366e99b79db8889f0dfc5b (patch)
tree: 2e8b606a92b962dce1d397cf9eb5600b24965eb0 /src/cmd/internal/buildid
parent: 0bede7f34e20a77052b433a3c8ee402aa731183e (diff)
download: go-9ad2319bbca45b0750366e99b79db8889f0dfc5b.tar.xz
8 files changed, 596 insertions, 0 deletions
diff --git a/src/cmd/internal/buildid/buildid.go b/src/cmd/internal/buildid/buildid.go
new file mode 100644
index 0000000000..883790e41b
--- /dev/null
+++ b/src/cmd/internal/buildid/buildid.go
@@ -0,0 +1,181 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package buildid
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+)
+
+var (
+	errBuildIDToolchain = fmt.Errorf("build ID only supported in gc toolchain")
+	errBuildIDMalformed = fmt.Errorf("malformed object file")
+	errBuildIDUnknown   = fmt.Errorf("lost build ID")
+)
+
+var (
+	bangArch = []byte("!<arch>")
+	pkgdef   = []byte("__.PKGDEF")
+	goobject = []byte("go object ")
+	buildid  = []byte("build id ")
+)
+
+// ReadFile reads the build ID from an archive or executable file.
+// It only supports archives from the gc toolchain.
+// TODO(rsc): Figure out what gccgo and llvm are going to do for archives.
+func ReadFile(name string) (id string, err error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return "", err
+	}
+
+	buf := make([]byte, 8)
+	if _, err := f.ReadAt(buf, 0); err != nil {
+		return "", err
+	}
+	if string(buf) != "!<arch>\n" {
+		return readBinary(name, f)
+	}
+
+	// Read just enough of the target to fetch the build ID.
+	// The archive is expected to look like:
+	//
+	//	!<arch>
+	//	__.PKGDEF       0           0     0     644     7955      `
+	//	go object darwin amd64 devel X:none
+	//	build id "b41e5c45250e25c9fd5e9f9a1de7857ea0d41224"
+	//
+	// The variable-sized strings are GOOS, GOARCH, and the experiment list (X:none).
+	// Reading the first 1024 bytes should be plenty.
+	data := make([]byte, 1024)
+	n, err := io.ReadFull(f, data)
+	f.Close()
+
+	if err != nil && n == 0 {
+		return "", err
+	}
+
+	bad := func() (string, error) {
+		return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
+	}
+
+	// Archive header.
+	for i := 0; ; i++ { // returns during i==3
+		j := bytes.IndexByte(data, '\n')
+		if j < 0 {
+			return bad()
+		}
+		line := data[:j]
+		data = data[j+1:]
+		switch i {
+		case 0:
+			if !bytes.Equal(line, bangArch) {
+				return bad()
+			}
+		case 1:
+			if !bytes.HasPrefix(line, pkgdef) {
+				return bad()
+			}
+		case 2:
+			if !bytes.HasPrefix(line, goobject) {
+				return bad()
+			}
+		case 3:
+			if !bytes.HasPrefix(line, buildid) {
+				// Found the object header, just doesn't have a build id line.
+				// Treat as successful, with empty build id.
+				return "", nil
+			}
+			id, err := strconv.Unquote(string(line[len(buildid):]))
+			if err != nil {
+				return bad()
+			}
+			return id, nil
+		}
+	}
+}
+
+var (
+	goBuildPrefix = []byte("\xff Go build ID: \"")
+	goBuildEnd    = []byte("\"\n \xff")
+
+	elfPrefix = []byte("\x7fELF")
+
+	machoPrefixes = [][]byte{
+		{0xfe, 0xed, 0xfa, 0xce},
+		{0xfe, 0xed, 0xfa, 0xcf},
+		{0xce, 0xfa, 0xed, 0xfe},
+		{0xcf, 0xfa, 0xed, 0xfe},
+	}
+)
+
+var readSize = 32 * 1024 // changed for testing
+
+// readBinary reads the build ID from a binary.
+//
+// ELF binaries store the build ID in a proper PT_NOTE section.
+//
+// Other binary formats are not so flexible. For those, the linker
+// stores the build ID as non-instruction bytes at the very beginning
+// of the text segment, which should appear near the beginning
+// of the file. This is clumsy but fairly portable. Custom locations
+// can be added for other binary types as needed, like we did for ELF.
+func readBinary(name string, f *os.File) (id string, err error) {
+	// Read the first 32 kB of the binary file.
+	// That should be enough to find the build ID.
+	// In ELF files, the build ID is in the leading headers,
+	// which are typically less than 4 kB, not to mention 32 kB.
+	// In Mach-O files, there's no limit, so we have to parse the file.
+	// On other systems, we're trying to read enough that
+	// we get the beginning of the text segment in the read.
+	// The offset where the text segment begins in a hello
+	// world compiled for each different object format today:
+	//
+	//	Plan 9: 0x20
+	//	Windows: 0x600
+	//
+	data := make([]byte, readSize)
+	_, err = io.ReadFull(f, data)
+	if err == io.ErrUnexpectedEOF {
+		err = nil
+	}
+	if err != nil {
+		return "", err
+	}
+
+	if bytes.HasPrefix(data, elfPrefix) {
+		return readELF(name, f, data)
+	}
+	for _, m := range machoPrefixes {
+		if bytes.HasPrefix(data, m) {
+			return readMacho(name, f, data)
+		}
+	}
+	return readRaw(name, data)
+}
+
+// readRaw finds the raw build ID stored in text segment data.
+func readRaw(name string, data []byte) (id string, err error) {
+	i := bytes.Index(data, goBuildPrefix)
+	if i < 0 {
+		// Missing. Treat as successful but build ID empty.
+		return "", nil
+	}
+
+	j := bytes.Index(data[i+len(goBuildPrefix):], goBuildEnd)
+	if j < 0 {
+		return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
+	}
+
+	quoted := data[i+len(goBuildPrefix)-1 : i+len(goBuildPrefix)+j+1]
+	id, err = strconv.Unquote(string(quoted))
+	if err != nil {
+		return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
+	}
+	return id, nil
+}
diff --git a/src/cmd/internal/buildid/buildid_test.go b/src/cmd/internal/buildid/buildid_test.go
new file mode 100644
index 0000000000..15481dd762
--- /dev/null
+++ b/src/cmd/internal/buildid/buildid_test.go
@@ -0,0 +1,137 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package buildid
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"io/ioutil"
+	"os"
+	"reflect"
+	"testing"
+)
+
+const (
+	expectedID = "abcdefghijklmnopqrstuvwxyz.1234567890123456789012345678901234567890123456789012345678901234"
+	newID      = "bcdefghijklmnopqrstuvwxyza.2345678901234567890123456789012345678901234567890123456789012341"
+)
+
+func TestReadFile(t *testing.T) {
+	var files = []string{
+		"p.a",
+		"a.elf",
+		"a.macho",
+		"a.pe",
+	}
+
+	f, err := ioutil.TempFile("", "buildid-test-")
+	if err != nil {
+		t.Fatal(err)
+	}
+	tmp := f.Name()
+	defer os.Remove(tmp)
+	f.Close()
+
+	for _, f := range files {
+		id, err := ReadFile("testdata/" + f)
+		if id != expectedID || err != nil {
+			t.Errorf("ReadFile(testdata/%s) = %q, %v, want %q, nil", f, id, err, expectedID)
+		}
+		old := readSize
+		readSize = 2048
+		id, err = ReadFile("testdata/" + f)
+		readSize = old
+		if id != expectedID || err != nil {
+			t.Errorf("ReadFile(testdata/%s) [readSize=2k] = %q, %v, want %q, nil", f, id, err, expectedID)
+		}
+
+		data, err := ioutil.ReadFile("testdata/" + f)
+		if err != nil {
+			t.Fatal(err)
+		}
+		m, _, err := FindAndHash(bytes.NewReader(data), expectedID, 1024)
+		if err != nil {
+			t.Errorf("FindAndHash(testdata/%s): %v", f, err)
+			continue
+		}
+		if err := ioutil.WriteFile(tmp, data, 0666); err != nil {
+			t.Error(err)
+			continue
+		}
+		tf, err := os.OpenFile(tmp, os.O_WRONLY, 0)
+		if err != nil {
+			t.Error(err)
+			continue
+		}
+		err = Rewrite(tf, m, newID)
+		err2 := tf.Close()
+		if err != nil {
+			t.Errorf("Rewrite(testdata/%s): %v", f, err)
+			continue
+		}
+		if err2 != nil {
+			t.Fatal(err2)
+		}
+
+		id, err = ReadFile(tmp)
+		if id != newID || err != nil {
+			t.Errorf("ReadFile(testdata/%s after Rewrite) = %q, %v, want %q, nil", f, id, err, newID)
+		}
+	}
+}
+
+func TestFindAndHash(t *testing.T) {
+	buf := make([]byte, 64)
+	buf2 := make([]byte, 64)
+	id := make([]byte, 8)
+	zero := make([]byte, 8)
+	for i := range id {
+		id[i] = byte(i)
+	}
+	numError := 0
+	errorf := func(msg string, args ...interface{}) {
+		t.Errorf(msg, args...)
+		if numError++; numError > 20 {
+			t.Logf("stopping after too many errors")
+			t.FailNow()
+		}
+	}
+	for bufSize := len(id); bufSize <= len(buf); bufSize++ {
+		for j := range buf {
+			for k := 0; k < 2*len(id) && j+k < len(buf); k++ {
+				for i := range buf {
+					buf[i] = 1
+				}
+				copy(buf[j:], id)
+				copy(buf[j+k:], id)
+				var m []int64
+				if j+len(id) <= j+k {
+					m = append(m, int64(j))
+				}
+				if j+k+len(id) <= len(buf) {
+					m = append(m, int64(j+k))
+				}
+				copy(buf2, buf)
+				for _, p := range m {
+					copy(buf2[p:], zero)
+				}
+				h := sha256.Sum256(buf2)
+
+				matches, hash, err := FindAndHash(bytes.NewReader(buf), string(id), bufSize)
+				if err != nil {
+					errorf("bufSize=%d j=%d k=%d: findAndHash: %v", bufSize, j, k, err)
+					continue
+				}
+				if !reflect.DeepEqual(matches, m) {
+					errorf("bufSize=%d j=%d k=%d: findAndHash: matches=%v, want %v", bufSize, j, k, matches, m)
+					continue
+				}
+				if hash != h {
+					errorf("bufSize=%d j=%d k=%d: findAndHash: matches correct, but hash=%x, want %x", bufSize, j, k, hash, h)
+				}
+			}
+		}
+	}
+}
diff --git a/src/cmd/internal/buildid/note.go b/src/cmd/internal/buildid/note.go
new file mode 100644
index 0000000000..5156cbd88c
--- /dev/null
+++ b/src/cmd/internal/buildid/note.go
@@ -0,0 +1,187 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package buildid
+
+import (
+	"bytes"
+	"debug/elf"
+	"debug/macho"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+)
+
+func readAligned4(r io.Reader, sz int32) ([]byte, error) {
+	full := (sz + 3) &^ 3
+	data := make([]byte, full)
+	_, err := io.ReadFull(r, data)
+	if err != nil {
+		return nil, err
+	}
+	data = data[:sz]
+	return data, nil
+}
+
+func ReadELFNote(filename, name string, typ int32) ([]byte, error) {
+	f, err := elf.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	for _, sect := range f.Sections {
+		if sect.Type != elf.SHT_NOTE {
+			continue
+		}
+		r := sect.Open()
+		for {
+			var namesize, descsize, noteType int32
+			err = binary.Read(r, f.ByteOrder, &namesize)
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				return nil, fmt.Errorf("read namesize failed: %v", err)
+			}
+			err = binary.Read(r, f.ByteOrder, &descsize)
+			if err != nil {
+				return nil, fmt.Errorf("read descsize failed: %v", err)
+			}
+			err = binary.Read(r, f.ByteOrder, &noteType)
+			if err != nil {
+				return nil, fmt.Errorf("read type failed: %v", err)
+			}
+			noteName, err := readAligned4(r, namesize)
+			if err != nil {
+				return nil, fmt.Errorf("read name failed: %v", err)
+			}
+			desc, err := readAligned4(r, descsize)
+			if err != nil {
+				return nil, fmt.Errorf("read desc failed: %v", err)
+			}
+			if name == string(noteName) && typ == noteType {
+				return desc, nil
+			}
+		}
+	}
+	return nil, nil
+}
+
+var elfGoNote = []byte("Go\x00\x00")
+
+// The Go build ID is stored in a note described by an ELF PT_NOTE prog
+// header. The caller has already opened filename, to get f, and read
+// at least 4 kB out, in data.
+func readELF(name string, f *os.File, data []byte) (buildid string, err error) {
+	// Assume the note content is in the data, already read.
+	// Rewrite the ELF header to set shnum to 0, so that we can pass
+	// the data to elf.NewFile and it will decode the Prog list but not
+	// try to read the section headers and the string table from disk.
+	// That's a waste of I/O when all we care about is the Prog list
+	// and the one ELF note.
+	switch elf.Class(data[elf.EI_CLASS]) {
+	case elf.ELFCLASS32:
+		data[48] = 0
+		data[49] = 0
+	case elf.ELFCLASS64:
+		data[60] = 0
+		data[61] = 0
+	}
+
+	const elfGoBuildIDTag = 4
+
+	ef, err := elf.NewFile(bytes.NewReader(data))
+	if err != nil {
+		return "", &os.PathError{Path: name, Op: "parse", Err: err}
+	}
+	for _, p := range ef.Progs {
+		if p.Type != elf.PT_NOTE || p.Filesz < 16 {
+			continue
+		}
+
+		var note []byte
+		if p.Off+p.Filesz < uint64(len(data)) {
+			note = data[p.Off : p.Off+p.Filesz]
+		} else {
+			// For some linkers, such as the Solaris linker,
+			// the buildid may not be found in data (which
+			// likely contains the first 16kB of the file)
+			// or even the first few megabytes of the file
+			// due to differences in note segment placement;
+			// in that case, extract the note data manually.
+			_, err = f.Seek(int64(p.Off), io.SeekStart)
+			if err != nil {
+				return "", err
+			}
+
+			note = make([]byte, p.Filesz)
+			_, err = io.ReadFull(f, note)
+			if err != nil {
+				return "", err
+			}
+		}
+
+		filesz := p.Filesz
+		for filesz >= 16 {
+			nameSize := ef.ByteOrder.Uint32(note)
+			valSize := ef.ByteOrder.Uint32(note[4:])
+			tag := ef.ByteOrder.Uint32(note[8:])
+			name := note[12:16]
+			if nameSize == 4 && 16+valSize <= uint32(len(note)) && tag == elfGoBuildIDTag && bytes.Equal(name, elfGoNote) {
+				return string(note[16 : 16+valSize]), nil
+			}
+
+			nameSize = (nameSize + 3) &^ 3
+			valSize = (valSize + 3) &^ 3
+			notesz := uint64(12 + nameSize + valSize)
+			if filesz <= notesz {
+				break
+			}
+			filesz -= notesz
+			note = note[notesz:]
+		}
+	}
+
+	// No note. Treat as successful but build ID empty.
+	return "", nil
+}
+
+// The Go build ID is stored at the beginning of the Mach-O __text segment.
+// The caller has already opened filename, to get f, and read a few kB out, in data.
+// Sadly, that's not guaranteed to hold the note, because there is an arbitrary amount
+// of other junk placed in the file ahead of the main text.
+func readMacho(name string, f *os.File, data []byte) (buildid string, err error) {
+	// If the data we want has already been read, don't worry about Mach-O parsing.
+	// This is both an optimization and a hedge against the Mach-O parsing failing
+	// in the future due to, for example, the name of the __text section changing.
+	if b, err := readRaw(name, data); b != "" && err == nil {
+		return b, err
+	}
+
+	mf, err := macho.NewFile(f)
+	if err != nil {
+		return "", &os.PathError{Path: name, Op: "parse", Err: err}
+	}
+
+	sect := mf.Section("__text")
+	if sect == nil {
+		// Every binary has a __text section. Something is wrong.
+		return "", &os.PathError{Path: name, Op: "parse", Err: fmt.Errorf("cannot find __text section")}
+	}
+
+	// It should be in the first few bytes, but read a lot just in case,
+	// especially given our past problems on OS X with the build ID moving.
+	// There shouldn't be much difference between reading 4kB and 32kB:
+	// the hard part is getting to the data, not transferring it.
+	n := sect.Size
+	if n > uint64(readSize) {
+		n = uint64(readSize)
+	}
+	buf := make([]byte, n)
+	if _, err := f.ReadAt(buf, int64(sect.Offset)); err != nil {
+		return "", err
+	}
+
+	return readRaw(name, buf)
+}
diff --git a/src/cmd/internal/buildid/rewrite.go b/src/cmd/internal/buildid/rewrite.go
new file mode 100644
index 0000000000..5be54552a6
--- /dev/null
+++ b/src/cmd/internal/buildid/rewrite.go
@@ -0,0 +1,91 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package buildid
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"fmt"
+	"io"
+)
+
+// FindAndHash reads all of r and returns the offsets of occurrences of id.
+// While reading, findAndHash also computes and returns
+// a hash of the content of r, but with occurrences of id replaced by zeros.
+// FindAndHash reads bufSize bytes from r at a time.
+// If bufSize == 0, FindAndHash uses a reasonable default.
+func FindAndHash(r io.Reader, id string, bufSize int) (matches []int64, hash [32]byte, err error) {
+	if bufSize == 0 {
+		bufSize = 31 * 1024 // bufSize+little will likely fit in 32 kB
+	}
+	if len(id) > bufSize {
+		return nil, [32]byte{}, fmt.Errorf("buildid.FindAndHash: buffer too small")
+	}
+	zeros := make([]byte, len(id))
+	idBytes := []byte(id)
+
+	// The strategy is to read the file through buf, looking for id,
+	// but we need to worry about what happens if id is broken up
+	// and returned in parts by two different reads.
+	// We allocate a tiny buffer (at least len(id)) and a big buffer (bufSize bytes)
+	// next to each other in memory and then copy the tail of
+	// one read into the tiny buffer before reading new data into the big buffer.
+	// The search for id is over the entire tiny+big buffer.
+	tiny := (len(id) + 127) &^ 127 // round up to 128-aligned
+	buf := make([]byte, tiny+bufSize)
+	h := sha256.New()
+	start := tiny
+	for offset := int64(0); ; {
+		// The file offset maintained by the loop corresponds to &buf[tiny].
+		// buf[start:tiny] is left over from previous iteration.
+		// After reading n bytes into buf[tiny:], we process buf[start:tiny+n].
+		n, err := io.ReadFull(r, buf[tiny:])
+		if err != io.ErrUnexpectedEOF && err != io.EOF && err != nil {
+			return nil, [32]byte{}, err
+		}
+
+		// Process any matches.
+		for {
+			i := bytes.Index(buf[start:tiny+n], idBytes)
+			if i < 0 {
+				break
+			}
+			matches = append(matches, offset+int64(start+i-tiny))
+			h.Write(buf[start : start+i])
+			h.Write(zeros)
+			start += i + len(id)
+		}
+		if n < bufSize {
+			// Did not fill buffer, must be at end of file.
+			h.Write(buf[start : tiny+n])
+			break
+		}
+
+		// Process all but final tiny bytes of buf (bufSize = len(buf)-tiny).
+		// Note that start > len(buf)-tiny is possible, if the search above
+		// found an id ending in the final tiny fringe. That's OK.
+		if start < len(buf)-tiny {
+			h.Write(buf[start : len(buf)-tiny])
+			start = len(buf) - tiny
+		}
+
+		// Slide ending tiny-sized fringe to beginning of buffer.
+		copy(buf[0:], buf[bufSize:])
+		start -= bufSize
+		offset += int64(bufSize)
+	}
+	h.Sum(hash[:0])
+	return matches, hash, nil
+}
+
+func Rewrite(w io.WriterAt, pos []int64, id string) error {
+	b := []byte(id)
+	for _, p := range pos {
+		if _, err := w.WriteAt(b, p); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/src/cmd/internal/buildid/testdata/a.elf b/src/cmd/internal/buildid/testdata/a.elf
new file mode 100755
index 0000000000..f63128921a
--- /dev/null
+++ b/src/cmd/internal/buildid/testdata/a.elf
diff --git a/src/cmd/internal/buildid/testdata/a.macho b/src/cmd/internal/buildid/testdata/a.macho
new file mode 100755
index 0000000000..fbbd57c1fe
--- /dev/null
+++ b/src/cmd/internal/buildid/testdata/a.macho
diff --git a/src/cmd/internal/buildid/testdata/a.pe b/src/cmd/internal/buildid/testdata/a.pe
new file mode 100755
index 0000000000..91202728c3
--- /dev/null
+++ b/src/cmd/internal/buildid/testdata/a.pe
diff --git a/src/cmd/internal/buildid/testdata/p.a b/src/cmd/internal/buildid/testdata/p.a
new file mode 100644
index 0000000000..dcc3e76ef8
--- /dev/null
+++ b/src/cmd/internal/buildid/testdata/p.a
author	Russ Cox <rsc@golang.org>	2017-10-06 14:03:55 -0400
committer	Russ Cox <rsc@golang.org>	2017-10-11 18:16:02 +0000
commit	9ad2319bbca45b0750366e99b79db8889f0dfc5b (patch)
tree	2e8b606a92b962dce1d397cf9eb5600b24965eb0 /src/cmd/internal/buildid
parent	0bede7f34e20a77052b433a3c8ee402aa731183e (diff)
download	go-9ad2319bbca45b0750366e99b79db8889f0dfc5b.tar.xz