internal/exportdata, cmd/compile/internal/noder: merge export data handling

Unify how go/types, types2, and noder read in unified export data from GC-created files. This splits FindExportData into smaller pieces for improved code sharing. - FindPackageDefinition finds the package definition file in the ar archive. - ReadObjectHeaders reads the object headers. - ReadExportDataHeader reads the export data format header. There is a new convenience wrapper ReadUnified that combines all of these. This documents the expected archive contents. Updates noder and the importers to use these. This also adjusts when end-of-section marker ("\n$$\n") checking happens. Change-Id: Iec2179b0a1ae7f69eb12d077018f731116a77f13 Reviewed-on: https://go-review.googlesource.com/c/go/+/628155 Reviewed-by: Robert Griesemer <gri@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Commit-Queue: Tim King <taking@google.com>
author: Tim King <taking@google.com> 2024-11-14 12:04:39 -0800
committer: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> 2024-11-22 00:04:39 +0000
commit: 0edea47f264a4185d78e00e1e9e977d99f5c997b (patch)
tree: d826d7f38902c80dd36ced8d273bde047b737a95 /src/cmd/compile/internal/noder
parent: d306003ff8fa4d5fbbebdd2eb186137b6328dca4 (diff)
download: go-0edea47f264a4185d78e00e1e9e977d99f5c997b.tar.xz
1 files changed, 42 insertions, 81 deletions
diff --git a/src/cmd/compile/internal/noder/import.go b/src/cmd/compile/internal/noder/import.go
index 964b01ec42..910988f061 100644
--- a/src/cmd/compile/internal/noder/import.go
+++ b/src/cmd/compile/internal/noder/import.go
@@ -8,6 +8,7 @@ import (
 	"errors"
 	"fmt"
 	"internal/buildcfg"
+	"internal/exportdata"
 	"internal/pkgbits"
 	"os"
 	pathpkg "path"
@@ -22,7 +23,6 @@ import (
 	"cmd/compile/internal/typecheck"
 	"cmd/compile/internal/types"
 	"cmd/compile/internal/types2"
-	"cmd/internal/archive"
 	"cmd/internal/bio"
 	"cmd/internal/goobj"
 	"cmd/internal/objabi"
@@ -207,7 +207,7 @@ func readImportFile(path string, target *ir.Package, env *types2.Context, packag
 	}
 	defer f.Close()
 
-	r, end, err := findExportData(f)
+	data, err := readExportData(f)
 	if err != nil {
 		return
 	}
@@ -216,94 +216,63 @@ func readImportFile(path string, target *ir.Package, env *types2.Context, packag
 		fmt.Printf("importing %s (%s)\n", path, f.Name())
 	}
 
-	c, err := r.ReadByte()
-	if err != nil {
-		return
-	}
+	pr := pkgbits.NewPkgDecoder(pkg1.Path, data)
 
-	pos := r.Offset()
-
-	// Map export data section into memory as a single large
-	// string. This reduces heap fragmentation and allows returning
-	// individual substrings very efficiently.
-	var data string
-	data, err = base.MapFile(r.File(), pos, end-pos)
-	if err != nil {
-		return
-	}
-
-	switch c {
-	case 'u':
-		// TODO(mdempsky): This seems a bit clunky.
-		data = strings.TrimSuffix(data, "\n$$\n")
-
-		pr := pkgbits.NewPkgDecoder(pkg1.Path, data)
-
-		// Read package descriptors for both types2 and compiler backend.
-		readPackage(newPkgReader(pr), pkg1, false)
-		pkg2 = importer.ReadPackage(env, packages, pr)
-
-	default:
-		// Indexed format is distinguished by an 'i' byte,
-		// whereas previous export formats started with 'c', 'd', or 'v'.
-		err = fmt.Errorf("unexpected package format byte: %v", c)
-		return
-	}
+	// Read package descriptors for both types2 and compiler backend.
+	readPackage(newPkgReader(pr), pkg1, false)
+	pkg2 = importer.ReadPackage(env, packages, pr)
 
-	err = addFingerprint(path, f, end)
+	err = addFingerprint(path, data)
 	return
 }
 
-// findExportData returns a *bio.Reader positioned at the start of the
-// binary export data section, and a file offset for where to stop
-// reading.
-func findExportData(f *os.File) (r *bio.Reader, end int64, err error) {
-	r = bio.NewReader(f)
+// readExportData returns the contents of GC-created unified export data.
+func readExportData(f *os.File) (data string, err error) {
+	r := bio.NewReader(f)
 
-	// check object header
-	line, err := r.ReadString('\n')
+	sz, err := exportdata.FindPackageDefinition(r.Reader)
 	if err != nil {
 		return
 	}
+	end := r.Offset() + int64(sz)
 
-	// Is the first line an archive file signature?
-	if line != "!<arch>\n" {
-		err = fmt.Errorf("not the start of an archive file (%q)", line)
+	abihdr, _, err := exportdata.ReadObjectHeaders(r.Reader)
+	if err != nil {
 		return
 	}
 
-	// package export block should be first
-	sz := int64(archive.ReadHeader(r.Reader, "__.PKGDEF"))
-	if sz <= 0 {
-		err = errors.New("not a package file")
+	if expect := objabi.HeaderString(); abihdr != expect {
+		err = fmt.Errorf("object is [%s] expected [%s]", abihdr, expect)
 		return
 	}
-	end = r.Offset() + sz
-	line, err = r.ReadString('\n')
+
+	_, err = exportdata.ReadExportDataHeader(r.Reader)
 	if err != nil {
 		return
 	}
 
-	if !strings.HasPrefix(line, "go object ") {
-		err = fmt.Errorf("not a go object file: %s", line)
-		return
-	}
-	if expect := objabi.HeaderString(); line != expect {
-		err = fmt.Errorf("object is [%s] expected [%s]", line, expect)
+	pos := r.Offset()
+
+	// Map export data section (+ end-of-section marker) into memory
+	// as a single large string. This reduces heap fragmentation and
+	// allows returning individual substrings very efficiently.
+	var mapped string
+	mapped, err = base.MapFile(r.File(), pos, end-pos)
+	if err != nil {
 		return
 	}
 
-	// process header lines
-	for !strings.HasPrefix(line, "$$") {
-		line, err = r.ReadString('\n')
-		if err != nil {
-			return
-		}
-	}
+	// check for end-of-section marker "\n$$\n" and remove it
+	const marker = "\n$$\n"
 
-	// Expect $$B\n to signal binary import format.
-	if line != "$$B\n" {
-		err = errors.New("old export format no longer supported (recompile package)")
+	var ok bool
+	data, ok = strings.CutSuffix(mapped, marker)
+	if !ok {
+		cutoff := data // include last 10 bytes in error message
+		if len(cutoff) >= 10 {
+			cutoff = cutoff[len(cutoff)-10:]
+		}
+		err = fmt.Errorf("expected $$ marker, but found %q (recompile package)", cutoff)
 		return
 	}
 
@@ -312,24 +281,16 @@ func findExportData(f *os.File) (r *bio.Reader, end int64, err error) {
 
 // addFingerprint reads the linker fingerprint included at the end of
 // the exportdata.
-func addFingerprint(path string, f *os.File, end int64) error {
-	const eom = "\n$$\n"
+func addFingerprint(path string, data string) error {
 	var fingerprint goobj.FingerprintType
 
-	var buf [len(fingerprint) + len(eom)]byte
-	if _, err := f.ReadAt(buf[:], end-int64(len(buf))); err != nil {
-		return err
-	}
-
-	// Caller should have given us the end position of the export data,
-	// which should end with the "\n$$\n" marker. As a consistency check
-	// to make sure we're reading at the right offset, make sure we
-	// found the marker.
-	if s := string(buf[len(fingerprint):]); s != eom {
-		return fmt.Errorf("expected $$ marker, but found %q", s)
+	pos := len(data) - len(fingerprint)
+	if pos < 0 {
+		return fmt.Errorf("missing linker fingerprint in exportdata, but found %q", data)
 	}
+	buf := []byte(data[pos:])
 
-	copy(fingerprint[:], buf[:])
+	copy(fingerprint[:], buf)
 	base.Ctxt.AddImport(path, fingerprint)
 
 	return nil
author	Tim King <taking@google.com>	2024-11-14 12:04:39 -0800
committer	Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>	2024-11-22 00:04:39 +0000
commit	0edea47f264a4185d78e00e1e9e977d99f5c997b (patch)
tree	d826d7f38902c80dd36ced8d273bde047b737a95 /src/cmd/compile/internal/noder
parent	d306003ff8fa4d5fbbebdd2eb186137b6328dca4 (diff)
download	go-0edea47f264a4185d78e00e1e9e977d99f5c997b.tar.xz