diff options
| author | Michael Podtserkovskii <michaelpo@meta.com> | 2026-01-20 15:40:42 +0000 |
|---|---|---|
| committer | Alex Brainman <alex.brainman@gmail.com> | 2026-03-18 19:58:52 -0700 |
| commit | 4b7b1be731e35c2f9c5f3af6677b164ccb006568 (patch) | |
| tree | 07446a56b350ed67dc603aef199ca65cc5a4e860 /src/cmd/internal | |
| parent | b48b6fa2fa1b0632546bc203035bf63c9d042f02 (diff) | |
| download | go-4b7b1be731e35c2f9c5f3af6677b164ccb006568.tar.xz | |
cmd/internal/objabi: implement GCC-compatible response file parsing
Replace line-based response file parsing with GCC-compatible one
that supports:
- Whitespace-delimited arguments (multiple args per line)
- Single quotes for literal content (no escape processing)
- Double quotes with escape sequences: \\, \", \$, \`
- Backslash-newline line continuation (both LF and CRLF)
Update encodeArg in cmd/go/internal/work to produce GCC-compatible
output with proper escaping of special characters.
Add response file support to cmd/pack.
For #77177
Change-Id: Ib8dbe2761ebe139b3949dcaeb1de4bb5b15ddabb
Reviewed-on: https://go-review.googlesource.com/c/go/+/737500
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Mark Freeman <markfreeman@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Matloob <matloob@golang.org>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
Diffstat (limited to 'src/cmd/internal')
| -rw-r--r-- | src/cmd/internal/objabi/flag.go | 172 | ||||
| -rw-r--r-- | src/cmd/internal/objabi/flag_test.go | 60 |
2 files changed, 177 insertions, 55 deletions
diff --git a/src/cmd/internal/objabi/flag.go b/src/cmd/internal/objabi/flag.go index 603bf8746c..ae56e2b0b0 100644 --- a/src/cmd/internal/objabi/flag.go +++ b/src/cmd/internal/objabi/flag.go @@ -40,11 +40,12 @@ func Flagparse(usage func()) { // expandArgs expands "response files" arguments in the provided slice. // // A "response file" argument starts with '@' and the rest of that -// argument is a filename with CR-or-CRLF-separated arguments. Each -// argument in the named files can also contain response file -// arguments. See Issue 18468. +// argument is a filename with arguments. Arguments are separated by +// whitespace, and can use single quotes (literal) or double quotes +// (with escape sequences). Each argument in the named files can also +// contain response file arguments. See Issue 77177. // -// The returned slice 'out' aliases 'in' iff the input did not contain +// The returned slice 'out' aliases 'in' if the input did not contain // any response file arguments. // // TODO: handle relative paths of recursive expansions in different directories? @@ -61,10 +62,7 @@ func expandArgs(in []string) (out []string) { if err != nil { log.Fatal(err) } - args := strings.Split(strings.TrimSpace(strings.ReplaceAll(string(slurp), "\r", "")), "\n") - for i, arg := range args { - args[i] = DecodeArg(arg) - } + args := ParseArgs(slurp) out = append(out, expandArgs(args)...) } else if out != nil { out = append(out, s) @@ -76,6 +74,130 @@ func expandArgs(in []string) (out []string) { return } +// ParseArgs parses response file content into arguments using GCC-compatible rules. +// Arguments are separated by whitespace. Single quotes preserve content literally. +// Double quotes allow escape sequences: \\, \", \$, \`, and backslash-newline +// for line continuation (both LF and CRLF). Outside quotes, backslash escapes the +// next character, backslash-newline is line continuation (both LF and CRLF). +// We aim to follow GCC's buildargv implementation. +// Source code: https://github.com/gcc-mirror/gcc/blob/releases/gcc-15.2.0/libiberty/argv.c#L167 +// Known deviations from GCC: +// - CRLF is treated as line continuation to be Windows-friendly; GCC only recognizes LF. +// - Obsolete \f and \v are not treated as whitespaces +// This function is public to test with cmd/go/internal/work.encodeArg +func ParseArgs(s []byte) []string { + var args []string + var arg strings.Builder + hasArg := false // tracks if we've started an argument (for empty quotes) + inSingleQuote := false + inDoubleQuote := false + i := 0 + + for i < len(s) { + c := s[i] + + if inSingleQuote { + if c == '\'' { + inSingleQuote = false + } else { + arg.WriteByte(c) // No escape processing in single quotes + } + i++ + continue + } + + if inDoubleQuote { + if c == '\\' && i+1 < len(s) { + next := s[i+1] + switch next { + case '\\': + arg.WriteByte('\\') + i += 2 + case '"': + arg.WriteByte('"') + i += 2 + case '$': + arg.WriteByte('$') + i += 2 + case '`': + arg.WriteByte('`') + i += 2 + case '\n': + // Line continuation - skip backslash and newline + i += 2 + case '\r': + // Line continuation for CRLF - skip backslash, CR, and LF + if i+2 < len(s) && s[i+2] == '\n' { + i += 3 + } else { + arg.WriteByte(c) + i++ + } + default: + // Unknown escape - keep backslash and char + arg.WriteByte(c) + i++ + } + } else if c == '"' { + inDoubleQuote = false + i++ + } else { + arg.WriteByte(c) + i++ + } + continue + } + + // Normal mode (outside quotes) + switch c { + case ' ', '\t', '\n', '\r': + if arg.Len() > 0 || hasArg { + args = append(args, arg.String()) + arg.Reset() + hasArg = false + } + case '\'': + inSingleQuote = true + hasArg = true // Empty quotes still produce an arg + case '"': + inDoubleQuote = true + hasArg = true // Empty quotes still produce an arg + case '\\': + // Backslash escapes the next character outside quotes. + // Backslash-newline is line continuation (handles both LF and CRLF). + if i+1 < len(s) { + next := s[i+1] + if next == '\n' { + i += 2 + continue + } + if next == '\r' && i+2 < len(s) && s[i+2] == '\n' { + i += 3 + continue + } + // Backslash escapes the next character + arg.WriteByte(next) + hasArg = true + i += 2 + continue + } + // Trailing backslash at end of input — consumed and discarded + i++ + continue + default: + arg.WriteByte(c) + } + i++ + } + + // Don't forget the last argument + if arg.Len() > 0 || hasArg { + args = append(args, arg.String()) + } + + return args +} + func AddVersionFlag() { flag.Var(versionFlag{}, "V", "print version and exit") } @@ -163,40 +285,6 @@ func (f fn1) Set(s string) error { func (f fn1) String() string { return "" } -// DecodeArg decodes an argument. -// -// This function is public for testing with the parallel encoder. -func DecodeArg(arg string) string { - // If no encoding, fastpath out. - if !strings.ContainsAny(arg, "\\\n") { - return arg - } - - var b strings.Builder - var wasBS bool - for _, r := range arg { - if wasBS { - switch r { - case '\\': - b.WriteByte('\\') - case 'n': - b.WriteByte('\n') - default: - // This shouldn't happen. The only backslashes that reach here - // should encode '\n' and '\\' exclusively. - panic("badly formatted input") - } - } else if r == '\\' { - wasBS = true - continue - } else { - b.WriteRune(r) - } - wasBS = false - } - return b.String() -} - type debugField struct { name string help string diff --git a/src/cmd/internal/objabi/flag_test.go b/src/cmd/internal/objabi/flag_test.go index 935b9c2193..f9bedbb08e 100644 --- a/src/cmd/internal/objabi/flag_test.go +++ b/src/cmd/internal/objabi/flag_test.go @@ -4,23 +4,57 @@ package objabi -import "testing" +import ( + "slices" + "testing" +) -func TestDecodeArg(t *testing.T) { +func TestParseArgs(t *testing.T) { t.Parallel() tests := []struct { - arg, want string + name string + input string + want []string }{ - {"", ""}, - {"hello", "hello"}, - {"hello\\n", "hello\n"}, - {"hello\\nthere", "hello\nthere"}, - {"hello\\\\there", "hello\\there"}, - {"\\\\\\n", "\\\n"}, + // GCC-compatibility test cases from test-expandargv.c + // Source code: https://github.com/gcc-mirror/gcc/blob/releases/gcc-15.2.0/libiberty/testsuite/test-expandargv.c#L72 + {`crlf`, "a\r\nb", []string{"a", "b"}}, // test 0 + {"newline", "a\nb", []string{"a", "b"}}, // test 1 + {"null byte in arg", "a\x00b", []string{"a\x00b"}}, // test 2: GCC parser gives ["a"] + {"null byte only", "\x00", []string{"\x00"}}, // test 3: GCC parser gives [] + {"leading newline", "\na\nb", []string{"a", "b"}}, // test 4 + {"empty quotes", "a\n''\nb", []string{"a", "", "b"}}, // test 5 + {"quoted newlines", "a\n'a\n\nb'\nb", []string{"a", "a\n\nb", "b"}}, // test 6 + {"single quote no escapes", "'a\\$VAR' '\\\"'", []string{"a\\$VAR", "\\\""}}, // test 7 + {"line continuation", "\"ab\\\ncd\" ef\\\ngh", []string{"abcd", "efgh"}}, // test 8 + // test 8.1 (additional verification for Windows line separators) + {"line continuation crlf", "\"ab\\\r\ncd\" ef\\\r\ngh", []string{"abcd", "efgh"}}, + {"double quote escapes", "\"\\$VAR\" \"\\`\" \"\\\"\" \"\\\\\" \"\\n\" \"\\t\"", + []string{"$VAR", "`", `"`, `\`, `\n`, `\t`}}, // test 9 + {"whitespace only", "\t \n \t ", nil}, // test 10 + {"single space", " ", nil}, // test 11 + {"multiple spaces", " ", nil}, // test 12 + + // Additional edge cases for peace of mind + {"basic split", "a b c", []string{"a", "b", "c"}}, + {"tabs", "a\tb\tc", []string{"a", "b", "c"}}, + {"mixed quotes", `a 'b c' "d e"`, []string{"a", "b c", "d e"}}, + {"adjacent quotes", `'a'"b"`, []string{"ab"}}, // no whitespace - no split + {"empty input", "", nil}, + {"empty single quotes", "''", []string{""}}, + {"empty double quotes", `""`, []string{""}}, + {"nested quotes in single", `'"hello"'`, []string{`"hello"`}}, + {"nested quotes in double", `"'hello'"`, []string{"'hello'"}}, + // GCC-specific (differs from LLVM): backslash outside quotes escapes the next character + {"backslash escape outside quotes", `\abc`, []string{"abc"}}, + {"trailing backslash", `abc\`, []string{"abc"}}, } - for _, test := range tests { - if got := DecodeArg(test.arg); got != test.want { - t.Errorf("decodoeArg(%q) = %q, want %q", test.arg, got, test.want) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParseArgs([]byte(tt.input)) + if !slices.Equal(got, tt.want) { + t.Errorf("parseArgs(%q) = %q, want %q", tt.input, got, tt.want) + } + }) } } |
