From 7bb721b9384bdd196befeaed593b185f7f2a5589 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 7 Jul 2020 13:49:21 -0400 Subject: all: update references to symbols moved from os to io/fs The old os references are still valid, but update our code to reflect best practices and get used to the new locations. Code compiled with the bootstrap toolchain (cmd/asm, cmd/dist, cmd/compile, debug/elf) must remain Go 1.4-compatible and is excluded. For #41190. Change-Id: I8f9526977867c10a221e2f392f78d7dec073f1bd Reviewed-on: https://go-review.googlesource.com/c/go/+/243907 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Rob Pike --- src/cmd/internal/buildid/buildid.go | 9 +++++---- src/cmd/internal/buildid/note.go | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'src/cmd/internal/buildid') diff --git a/src/cmd/internal/buildid/buildid.go b/src/cmd/internal/buildid/buildid.go index ac238d70ea..1d6563cafc 100644 --- a/src/cmd/internal/buildid/buildid.go +++ b/src/cmd/internal/buildid/buildid.go @@ -10,6 +10,7 @@ import ( "fmt" "internal/xcoff" "io" + "io/fs" "os" "strconv" "strings" @@ -109,7 +110,7 @@ func ReadFile(name string) (id string, err error) { // in cmd/go/internal/work/exec.go. func readGccgoArchive(name string, f *os.File) (string, error) { bad := func() (string, error) { - return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} + return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} } off := int64(8) @@ -167,7 +168,7 @@ func readGccgoArchive(name string, f *os.File) (string, error) { // in cmd/go/internal/work/exec.go. func readGccgoBigArchive(name string, f *os.File) (string, error) { bad := func() (string, error) { - return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} + return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} } // Read fixed-length header. @@ -309,13 +310,13 @@ func readRaw(name string, data []byte) (id string, err error) { j := bytes.Index(data[i+len(goBuildPrefix):], goBuildEnd) if j < 0 { - return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} + return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} } quoted := data[i+len(goBuildPrefix)-1 : i+len(goBuildPrefix)+j+1] id, err = strconv.Unquote(string(quoted)) if err != nil { - return "", &os.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} + return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed} } return id, nil } diff --git a/src/cmd/internal/buildid/note.go b/src/cmd/internal/buildid/note.go index 2d26ea9961..f5b6fc565f 100644 --- a/src/cmd/internal/buildid/note.go +++ b/src/cmd/internal/buildid/note.go @@ -11,6 +11,7 @@ import ( "encoding/binary" "fmt" "io" + "io/fs" "os" ) @@ -96,7 +97,7 @@ func readELF(name string, f *os.File, data []byte) (buildid string, err error) { ef, err := elf.NewFile(bytes.NewReader(data)) if err != nil { - return "", &os.PathError{Path: name, Op: "parse", Err: err} + return "", &fs.PathError{Path: name, Op: "parse", Err: err} } var gnu string for _, p := range ef.Progs { @@ -181,13 +182,13 @@ func readMacho(name string, f *os.File, data []byte) (buildid string, err error) mf, err := macho.NewFile(f) if err != nil { - return "", &os.PathError{Path: name, Op: "parse", Err: err} + return "", &fs.PathError{Path: name, Op: "parse", Err: err} } sect := mf.Section("__text") if sect == nil { // Every binary has a __text section. Something is wrong. - return "", &os.PathError{Path: name, Op: "parse", Err: fmt.Errorf("cannot find __text section")} + return "", &fs.PathError{Path: name, Op: "parse", Err: fmt.Errorf("cannot find __text section")} } // It should be in the first few bytes, but read a lot just in case, -- cgit v1.3 From 615c7c18a70e0d6638accdb0fcc5f60c57a2118b Mon Sep 17 00:00:00 2001 From: Mikhail Fesenko Date: Wed, 28 Oct 2020 20:35:23 +0000 Subject: cmd/buildid: move and reuse duplicated HashToString code to cmd/internal/buildid/buildid Change-Id: I1e1ac770d4aac12d7d7ec57ef95f77a3e14a678c GitHub-Last-Rev: c01db4346eb08ffe0c1953892fb4222764048e30 GitHub-Pull-Request: golang/go#42052 Reviewed-on: https://go-review.googlesource.com/c/go/+/263418 Run-TryBot: Jay Conrod TryBot-Result: Go Bot Reviewed-by: Jay Conrod Trust: Jay Conrod Trust: Michael Matloob --- src/cmd/buildid/buildid.go | 19 ++----------------- src/cmd/go/internal/work/buildid.go | 35 +++++------------------------------ src/cmd/internal/buildid/buildid.go | 29 +++++++++++++++++++++++++---- 3 files changed, 32 insertions(+), 51 deletions(-) (limited to 'src/cmd/internal/buildid') diff --git a/src/cmd/buildid/buildid.go b/src/cmd/buildid/buildid.go index 1c7b228c98..699d977950 100644 --- a/src/cmd/buildid/buildid.go +++ b/src/cmd/buildid/buildid.go @@ -22,21 +22,6 @@ func usage() { var wflag = flag.Bool("w", false, "write build ID") -// taken from cmd/go/internal/work/buildid.go -func hashToString(h [32]byte) string { - const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - const chunks = 5 - var dst [chunks * 4]byte - for i := 0; i < chunks; i++ { - v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2]) - dst[4*i+0] = b64[(v>>18)&0x3F] - dst[4*i+1] = b64[(v>>12)&0x3F] - dst[4*i+2] = b64[(v>>6)&0x3F] - dst[4*i+3] = b64[v&0x3F] - } - return string(dst[:]) -} - func main() { log.SetPrefix("buildid: ") log.SetFlags(0) @@ -63,12 +48,12 @@ func main() { log.Fatal(err) } matches, hash, err := buildid.FindAndHash(f, id, 0) + f.Close() if err != nil { log.Fatal(err) } - f.Close() - newID := id[:strings.LastIndex(id, "/")] + "/" + hashToString(hash) + newID := id[:strings.LastIndex(id, "/")] + "/" + buildid.HashToString(hash) if len(newID) != len(id) { log.Fatalf("%s: build ID length mismatch %q vs %q", file, id, newID) } diff --git a/src/cmd/go/internal/work/buildid.go b/src/cmd/go/internal/work/buildid.go index 5cd3124e54..9ef141c619 100644 --- a/src/cmd/go/internal/work/buildid.go +++ b/src/cmd/go/internal/work/buildid.go @@ -31,7 +31,7 @@ import ( // // actionID/[.../]contentID // -// where the actionID and contentID are prepared by hashToString below. +// where the actionID and contentID are prepared by buildid.HashToString below. // and are found by looking for the first or last slash. // Usually the buildID is simply actionID/contentID, but see below for an // exception. @@ -108,31 +108,6 @@ func contentID(buildID string) string { return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:] } -// hashToString converts the hash h to a string to be recorded -// in package archives and binaries as part of the build ID. -// We use the first 120 bits of the hash (5 chunks of 24 bits each) and encode -// it in base64, resulting in a 20-byte string. Because this is only used for -// detecting the need to rebuild installed files (not for lookups -// in the object file cache), 120 bits are sufficient to drive the -// probability of a false "do not need to rebuild" decision to effectively zero. -// We embed two different hashes in archives and four in binaries, -// so cutting to 20 bytes is a significant savings when build IDs are displayed. -// (20*4+3 = 83 bytes compared to 64*4+3 = 259 bytes for the -// more straightforward option of printing the entire h in base64). -func hashToString(h [cache.HashSize]byte) string { - const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - const chunks = 5 - var dst [chunks * 4]byte - for i := 0; i < chunks; i++ { - v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2]) - dst[4*i+0] = b64[(v>>18)&0x3F] - dst[4*i+1] = b64[(v>>12)&0x3F] - dst[4*i+2] = b64[(v>>6)&0x3F] - dst[4*i+3] = b64[v&0x3F] - } - return string(dst[:]) -} - // toolID returns the unique ID to use for the current copy of the // named tool (asm, compile, cover, link). // @@ -404,7 +379,7 @@ func (b *Builder) fileHash(file string) string { if err != nil { return "" } - return hashToString(sum) + return buildid.HashToString(sum) } // useCache tries to satisfy the action a, which has action ID actionHash, @@ -427,7 +402,7 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string) // the actionID half; if it also appeared in the input that would be like an // engineered 120-bit partial SHA256 collision. a.actionID = actionHash - actionID := hashToString(actionHash) + actionID := buildid.HashToString(actionHash) if a.json != nil { a.json.ActionID = actionID } @@ -480,7 +455,7 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string) // build IDs of completed actions. oldBuildID := a.buildID a.buildID = id[1] + buildIDSeparator + id[2] - linkID := hashToString(b.linkActionID(a.triggers[0])) + linkID := buildid.HashToString(b.linkActionID(a.triggers[0])) if id[0] == linkID { // Best effort attempt to display output from the compile and link steps. // If it doesn't work, it doesn't work: reusing the cached binary is more @@ -654,7 +629,7 @@ func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error { if err != nil { return err } - newID := a.buildID[:strings.LastIndex(a.buildID, buildIDSeparator)] + buildIDSeparator + hashToString(hash) + newID := a.buildID[:strings.LastIndex(a.buildID, buildIDSeparator)] + buildIDSeparator + buildid.HashToString(hash) if len(newID) != len(a.buildID) { return fmt.Errorf("internal error: build ID length mismatch %q vs %q", a.buildID, newID) } diff --git a/src/cmd/internal/buildid/buildid.go b/src/cmd/internal/buildid/buildid.go index 1d6563cafc..1e8855d3ac 100644 --- a/src/cmd/internal/buildid/buildid.go +++ b/src/cmd/internal/buildid/buildid.go @@ -17,12 +17,8 @@ import ( ) var ( - errBuildIDToolchain = fmt.Errorf("build ID only supported in gc toolchain") errBuildIDMalformed = fmt.Errorf("malformed object file") - errBuildIDUnknown = fmt.Errorf("lost build ID") -) -var ( bangArch = []byte("!") pkgdef = []byte("__.PKGDEF") goobject = []byte("go object ") @@ -320,3 +316,28 @@ func readRaw(name string, data []byte) (id string, err error) { } return id, nil } + +// HashToString converts the hash h to a string to be recorded +// in package archives and binaries as part of the build ID. +// We use the first 120 bits of the hash (5 chunks of 24 bits each) and encode +// it in base64, resulting in a 20-byte string. Because this is only used for +// detecting the need to rebuild installed files (not for lookups +// in the object file cache), 120 bits are sufficient to drive the +// probability of a false "do not need to rebuild" decision to effectively zero. +// We embed two different hashes in archives and four in binaries, +// so cutting to 20 bytes is a significant savings when build IDs are displayed. +// (20*4+3 = 83 bytes compared to 64*4+3 = 259 bytes for the +// more straightforward option of printing the entire h in base64). +func HashToString(h [32]byte) string { + const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + const chunks = 5 + var dst [chunks * 4]byte + for i := 0; i < chunks; i++ { + v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2]) + dst[4*i+0] = b64[(v>>18)&0x3F] + dst[4*i+1] = b64[(v>>12)&0x3F] + dst[4*i+2] = b64[(v>>6)&0x3F] + dst[4*i+3] = b64[v&0x3F] + } + return string(dst[:]) +} -- cgit v1.3 From 7fca39aa05ad3c60abac1ae51ae9847dfbe017d6 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 21 Nov 2020 17:43:16 -0500 Subject: cmd/internal/buildid: exclude Mach-O code signature in hash calculation The code signature contains hashes of the entire file (except the signature itself), including the buildid. Therefore, the buildid cannot depend on the signature. Otherwise updating buildid will invalidate the signature, and vice versa. As we cannot change the code-signing algorithm, we can only change buildid calculation. This CL changes the buildid calculation to exclude the Mach-O code signature. So updating code signature after stamping the buildid will not invalidate the buildid. Updates #38485, #42684. Change-Id: I8a9e2e25ca9dc00d9556d13b81652f43bbf6a084 Reviewed-on: https://go-review.googlesource.com/c/go/+/272255 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Austin Clements Reviewed-by: Than McIntosh --- src/cmd/internal/buildid/buildid_test.go | 31 ++++++++++++++++++++ src/cmd/internal/buildid/rewrite.go | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) (limited to 'src/cmd/internal/buildid') diff --git a/src/cmd/internal/buildid/buildid_test.go b/src/cmd/internal/buildid/buildid_test.go index 904c2c6f37..e832f9987e 100644 --- a/src/cmd/internal/buildid/buildid_test.go +++ b/src/cmd/internal/buildid/buildid_test.go @@ -11,6 +11,7 @@ import ( "io/ioutil" "os" "reflect" + "strings" "testing" ) @@ -146,3 +147,33 @@ func TestFindAndHash(t *testing.T) { } } } + +func TestExcludedReader(t *testing.T) { + const s = "0123456789abcdefghijklmn" + tests := []struct { + start, end int64 // excluded range + results []string // expected results of reads + }{ + {12, 15, []string{"0123456789", "ab\x00\x00\x00fghij", "klmn"}}, // within one read + {8, 21, []string{"01234567\x00\x00", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", "\x00lmn"}}, // across multiple reads + {10, 20, []string{"0123456789", "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", "klmn"}}, // a whole read + {0, 5, []string{"\x00\x00\x00\x00\x0056789", "abcdefghij", "klmn"}}, // start + {12, 24, []string{"0123456789", "ab\x00\x00\x00\x00\x00\x00\x00\x00", "\x00\x00\x00\x00"}}, // end + } + p := make([]byte, 10) + for _, test := range tests { + r := &excludedReader{strings.NewReader(s), 0, test.start, test.end} + for _, res := range test.results { + n, err := r.Read(p) + if err != nil { + t.Errorf("read failed: %v", err) + } + if n != len(res) { + t.Errorf("unexpected number of bytes read: want %d, got %d", len(res), n) + } + if string(p[:n]) != res { + t.Errorf("unexpected bytes: want %q, got %q", res, p[:n]) + } + } + } +} diff --git a/src/cmd/internal/buildid/rewrite.go b/src/cmd/internal/buildid/rewrite.go index 5be54552a6..d3d2009d1c 100644 --- a/src/cmd/internal/buildid/rewrite.go +++ b/src/cmd/internal/buildid/rewrite.go @@ -6,7 +6,9 @@ package buildid import ( "bytes" + "cmd/internal/codesign" "crypto/sha256" + "debug/macho" "fmt" "io" ) @@ -26,6 +28,11 @@ func FindAndHash(r io.Reader, id string, bufSize int) (matches []int64, hash [32 zeros := make([]byte, len(id)) idBytes := []byte(id) + // For Mach-O files, we want to exclude the code signature. + // The code signature contains hashes of the whole file (except the signature + // itself), including the buildid. So the buildid cannot contain the signature. + r = excludeMachoCodeSignature(r) + // The strategy is to read the file through buf, looking for id, // but we need to worry about what happens if id is broken up // and returned in parts by two different reads. @@ -89,3 +96,46 @@ func Rewrite(w io.WriterAt, pos []int64, id string) error { } return nil } + +func excludeMachoCodeSignature(r io.Reader) io.Reader { + ra, ok := r.(io.ReaderAt) + if !ok { + return r + } + f, err := macho.NewFile(ra) + if err != nil { + return r + } + cmd, ok := codesign.FindCodeSigCmd(f) + if !ok { + return r + } + return &excludedReader{r, 0, int64(cmd.Dataoff), int64(cmd.Dataoff + cmd.Datasize)} +} + +// excludedReader wraps an io.Reader. Reading from it returns the bytes from +// the underlying reader, except that when the byte offset is within the +// range between start and end, it returns zero bytes. +type excludedReader struct { + r io.Reader + off int64 // current offset + start, end int64 // the range to be excluded (read as zero) +} + +func (r *excludedReader) Read(p []byte) (int, error) { + n, err := r.r.Read(p) + if n > 0 && r.off+int64(n) > r.start && r.off < r.end { + cstart := r.start - r.off + if cstart < 0 { + cstart = 0 + } + cend := r.end - r.off + if cend > int64(n) { + cend = int64(n) + } + zeros := make([]byte, cend-cstart) + copy(p[cstart:cend], zeros) + } + r.off += int64(n) + return n, err +} -- cgit v1.3 From 8cd35e00bd413e68e6b9ae6403aa4209fc89b90f Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 21 Nov 2020 21:24:57 -0500 Subject: cmd/internal/buildid: update Mach-O code signature when rewriting buildid As the code signature contains hashes of the entire file (except the signature itself), rewriting buildid will invalidate the signature. This CL makes it regenerate the signature when rewriting the buildid. It only does it when the file already has a code signature, with proper size (darwin/arm64 binaries generated by the Go linker should have). Updates #38485, #42684. Change-Id: I082d9e5808b0ee6a35f9c362d7262aadd9113c81 Reviewed-on: https://go-review.googlesource.com/c/go/+/272257 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Austin Clements Reviewed-by: Than McIntosh --- src/cmd/buildid/buildid.go | 2 +- src/cmd/go/internal/work/buildid.go | 2 +- src/cmd/internal/buildid/rewrite.go | 39 ++++++++++++++++++++++++++++--------- 3 files changed, 32 insertions(+), 11 deletions(-) (limited to 'src/cmd/internal/buildid') diff --git a/src/cmd/buildid/buildid.go b/src/cmd/buildid/buildid.go index 699d977950..8e02a7ae10 100644 --- a/src/cmd/buildid/buildid.go +++ b/src/cmd/buildid/buildid.go @@ -62,7 +62,7 @@ func main() { return } - f, err = os.OpenFile(file, os.O_WRONLY, 0) + f, err = os.OpenFile(file, os.O_RDWR, 0) if err != nil { log.Fatal(err) } diff --git a/src/cmd/go/internal/work/buildid.go b/src/cmd/go/internal/work/buildid.go index a88544e1af..3c7be5a3e3 100644 --- a/src/cmd/go/internal/work/buildid.go +++ b/src/cmd/go/internal/work/buildid.go @@ -647,7 +647,7 @@ func (b *Builder) updateBuildID(a *Action, target string, rewrite bool) error { } if rewrite { - w, err := os.OpenFile(target, os.O_WRONLY, 0) + w, err := os.OpenFile(target, os.O_RDWR, 0) if err != nil { return err } diff --git a/src/cmd/internal/buildid/rewrite.go b/src/cmd/internal/buildid/rewrite.go index d3d2009d1c..a7928959c4 100644 --- a/src/cmd/internal/buildid/rewrite.go +++ b/src/cmd/internal/buildid/rewrite.go @@ -94,19 +94,27 @@ func Rewrite(w io.WriterAt, pos []int64, id string) error { return err } } + + // Update Mach-O code signature, if any. + if f, cmd, ok := findMachoCodeSignature(w); ok { + if codesign.Size(int64(cmd.Dataoff), "a.out") == int64(cmd.Datasize) { + // Update the signature if the size matches, so we don't need to + // fix up headers. Binaries generated by the Go linker should have + // the expected size. Otherwise skip. + text := f.Segment("__TEXT") + cs := make([]byte, cmd.Datasize) + codesign.Sign(cs, w.(io.Reader), "a.out", int64(cmd.Dataoff), int64(text.Offset), int64(text.Filesz), f.Type == macho.TypeExec) + if _, err := w.WriteAt(cs, int64(cmd.Dataoff)); err != nil { + return err + } + } + } + return nil } func excludeMachoCodeSignature(r io.Reader) io.Reader { - ra, ok := r.(io.ReaderAt) - if !ok { - return r - } - f, err := macho.NewFile(ra) - if err != nil { - return r - } - cmd, ok := codesign.FindCodeSigCmd(f) + _, cmd, ok := findMachoCodeSignature(r) if !ok { return r } @@ -139,3 +147,16 @@ func (r *excludedReader) Read(p []byte) (int, error) { r.off += int64(n) return n, err } + +func findMachoCodeSignature(r interface{}) (*macho.File, codesign.CodeSigCmd, bool) { + ra, ok := r.(io.ReaderAt) + if !ok { + return nil, codesign.CodeSigCmd{}, false + } + f, err := macho.NewFile(ra) + if err != nil { + return nil, codesign.CodeSigCmd{}, false + } + cmd, ok := codesign.FindCodeSigCmd(f) + return f, cmd, ok +} -- cgit v1.3