aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorunbyte <i@shangyes.net>2021-06-15 17:11:05 +0000
committerBryan C. Mills <bcmills@google.com>2021-06-15 18:42:11 +0000
commit219fe9d547d09d3de1b024c6c8b8314dd0bf12e4 (patch)
treea2df411cf593dc3a165da112c6909417dd32f381 /src
parent723f199edd8619c8eedc1c4e8df1e5f96599d51a (diff)
downloadgo-219fe9d547d09d3de1b024c6c8b8314dd0bf12e4.tar.xz
cmd/go: ignore UTF8 BOM when reading source code
Fix the problem that UTF8 BOM can cause the parsing of import path and directives to fail. Fixes #46198 Fixes #46290 Fixes #35726 Change-Id: I2d9995ee82b094bcfa5583f0cb4e8547cb973077 GitHub-Last-Rev: 98abf91377f155266fa60505c0c12beccad38eeb GitHub-Pull-Request: golang/go#46643 Reviewed-on: https://go-review.googlesource.com/c/go/+/325990 Reviewed-by: Bryan C. Mills <bcmills@google.com> Trust: Bryan C. Mills <bcmills@google.com> Trust: Robert Findley <rfindley@google.com> Run-TryBot: Bryan C. Mills <bcmills@google.com> TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/go/internal/imports/read.go18
-rw-r--r--src/cmd/go/internal/imports/read_test.go26
-rw-r--r--src/cmd/go/testdata/script/build_ignore_leading_bom.txt27
-rw-r--r--src/go/build/read.go13
-rw-r--r--src/go/build/read_test.go36
5 files changed, 117 insertions, 3 deletions
diff --git a/src/cmd/go/internal/imports/read.go b/src/cmd/go/internal/imports/read.go
index 5e270781d7..70d5190450 100644
--- a/src/cmd/go/internal/imports/read.go
+++ b/src/cmd/go/internal/imports/read.go
@@ -8,6 +8,7 @@ package imports
import (
"bufio"
+ "bytes"
"errors"
"io"
"unicode/utf8"
@@ -22,6 +23,19 @@ type importReader struct {
nerr int
}
+var bom = []byte{0xef, 0xbb, 0xbf}
+
+func newImportReader(b *bufio.Reader) *importReader {
+ // Remove leading UTF-8 BOM.
+ // Per https://golang.org/ref/spec#Source_code_representation:
+ // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+ // if it is the first Unicode code point in the source text.
+ if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
+ b.Discard(3)
+ }
+ return &importReader{b: b}
+}
+
func isIdent(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
}
@@ -201,7 +215,7 @@ func (r *importReader) readImport(imports *[]string) {
// ReadComments is like io.ReadAll, except that it only reads the leading
// block of comments in the file.
func ReadComments(f io.Reader) ([]byte, error) {
- r := &importReader{b: bufio.NewReader(f)}
+ r := newImportReader(bufio.NewReader(f))
r.peekByte(true)
if r.err == nil && !r.eof {
// Didn't reach EOF, so must have found a non-space byte. Remove it.
@@ -213,7 +227,7 @@ func ReadComments(f io.Reader) ([]byte, error) {
// ReadImports is like io.ReadAll, except that it expects a Go file as input
// and stops reading the input once the imports have completed.
func ReadImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) {
- r := &importReader{b: bufio.NewReader(f)}
+ r := newImportReader(bufio.NewReader(f))
r.readKeyword("package")
r.readIdent()
diff --git a/src/cmd/go/internal/imports/read_test.go b/src/cmd/go/internal/imports/read_test.go
index 6ea356f1ff..6a1a6524a1 100644
--- a/src/cmd/go/internal/imports/read_test.go
+++ b/src/cmd/go/internal/imports/read_test.go
@@ -66,6 +66,10 @@ var readImportsTests = []readTest{
`,
"",
},
+ {
+ "\ufeff𝔻" + `package p; import "x";ℙvar x = 1`,
+ "",
+ },
}
var readCommentsTests = []readTest{
@@ -82,6 +86,10 @@ var readCommentsTests = []readTest{
"",
},
{
+ "\ufeff𝔻" + `ℙpackage p; import . "x"`,
+ "",
+ },
+ {
`// foo
/* bar */
@@ -94,6 +102,19 @@ var readCommentsTests = []readTest{
ā„™Hello, world`,
"",
},
+ {
+ "\ufeff𝔻" + `// foo
+
+ /* bar */
+
+ /* quux */ // baz
+
+ /*/ zot */
+
+ // asdf
+ ā„™Hello, world`,
+ "",
+ },
}
func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, error)) {
@@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
in = tt.in[:j] + tt.in[j+len("ā„™"):]
testOut = tt.in[:j]
}
+ d := strings.Index(tt.in, "𝔻")
+ if d >= 0 {
+ in = in[:d] + in[d+len("𝔻"):]
+ testOut = testOut[d+len("𝔻"):]
+ }
r := strings.NewReader(in)
buf, err := read(r)
if err != nil {
diff --git a/src/cmd/go/testdata/script/build_ignore_leading_bom.txt b/src/cmd/go/testdata/script/build_ignore_leading_bom.txt
new file mode 100644
index 0000000000..37141f3466
--- /dev/null
+++ b/src/cmd/go/testdata/script/build_ignore_leading_bom.txt
@@ -0,0 +1,27 @@
+# Per https://golang.org/ref/spec#Source_code_representation:
+# a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+# if it is the first Unicode code point in the source text.
+
+go list -f 'Imports: {{.Imports}} EmbedFiles: {{.EmbedFiles}}' .
+stdout '^Imports: \[embed m/hello\] EmbedFiles: \[.*file\]$'
+
+-- go.mod --
+module m
+
+go 1.16
+-- m.go --
+package main
+
+import (
+ _ "embed"
+
+ "m/hello"
+)
+
+//go:embed file
+var s string
+
+-- hello/hello.go --
+package hello
+
+-- file --
diff --git a/src/go/build/read.go b/src/go/build/read.go
index aa7c6ee59e..b98c7938a8 100644
--- a/src/go/build/read.go
+++ b/src/go/build/read.go
@@ -6,6 +6,7 @@ package build
import (
"bufio"
+ "bytes"
"errors"
"fmt"
"go/ast"
@@ -28,9 +29,19 @@ type importReader struct {
pos token.Position
}
+var bom = []byte{0xef, 0xbb, 0xbf}
+
func newImportReader(name string, r io.Reader) *importReader {
+ b := bufio.NewReader(r)
+ // Remove leading UTF-8 BOM.
+ // Per https://golang.org/ref/spec#Source_code_representation:
+ // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
+ // if it is the first Unicode code point in the source text.
+ if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
+ b.Discard(3)
+ }
return &importReader{
- b: bufio.NewReader(r),
+ b: b,
pos: token.Position{
Filename: name,
Line: 1,
diff --git a/src/go/build/read_test.go b/src/go/build/read_test.go
index 32e6bae008..1e5e1c2de2 100644
--- a/src/go/build/read_test.go
+++ b/src/go/build/read_test.go
@@ -66,6 +66,10 @@ var readGoInfoTests = []readTest{
`,
"",
},
+ {
+ "\ufeff𝔻" + `package p; import "x";ℙvar x = 1`,
+ "",
+ },
}
var readCommentsTests = []readTest{
@@ -82,6 +86,10 @@ var readCommentsTests = []readTest{
"",
},
{
+ "\ufeff𝔻" + `ℙpackage p; import . "x"`,
+ "",
+ },
+ {
`// foo
/* bar */
@@ -94,6 +102,19 @@ var readCommentsTests = []readTest{
ā„™Hello, world`,
"",
},
+ {
+ "\ufeff𝔻" + `// foo
+
+ /* bar */
+
+ /* quux */ // baz
+
+ /*/ zot */
+
+ // asdf
+ ā„™Hello, world`,
+ "",
+ },
}
func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, error)) {
@@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
in = tt.in[:j] + tt.in[j+len("ā„™"):]
testOut = tt.in[:j]
}
+ d := strings.Index(tt.in, "𝔻")
+ if d >= 0 {
+ in = in[:d] + in[d+len("𝔻"):]
+ testOut = testOut[d+len("𝔻"):]
+ }
r := strings.NewReader(in)
buf, err := read(r)
if err != nil {
@@ -265,6 +291,12 @@ var readEmbedTests = []struct {
test:3:16:z`,
},
{
+ "\ufeffpackage p\nimport \"embed\"\n//go:embed x y z\nvar files embed.FS",
+ `test:3:12:x
+ test:3:14:y
+ test:3:16:z`,
+ },
+ {
"package p\nimport \"embed\"\nvar s = \"/*\"\n//go:embed x\nvar files embed.FS",
`test:4:12:x`,
},
@@ -292,6 +324,10 @@ var readEmbedTests = []struct {
"package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
"",
},
+ {
+ "\ufeffpackage p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
+ "",
+ },
}
func TestReadEmbed(t *testing.T) {