aboutsummaryrefslogtreecommitdiff
path: root/src/strconv
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2021-04-28 17:39:46 -0700
committerJoe Tsai <thebrokentoaster@gmail.com>2021-05-04 00:56:00 +0000
commite3769299cd3484e018e0e2a6e1b95c2b18ce4f41 (patch)
treeef742ff2179e853f4da7026ffc41c98d0f172b91 /src/strconv
parent2422c5eae5dc6539b4b7657ef7bfe58a65acb61d (diff)
downloadgo-e3769299cd3484e018e0e2a6e1b95c2b18ce4f41.tar.xz
strconv: add QuotedPrefix
QuotedPrefix is similar to Unquote, but returns the quoted string verbatim and ignores any data after the quoted string. Fixes #45033 Change-Id: I9f69fe9e3e45cbe9e63581cf1b457facb625045d Reviewed-on: https://go-review.googlesource.com/c/go/+/314775 Trust: Joe Tsai <joetsai@digital-static.net> Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/strconv')
-rw-r--r--src/strconv/bytealg.go6
-rw-r--r--src/strconv/bytealg_bootstrap.go8
-rw-r--r--src/strconv/quote.go161
-rw-r--r--src/strconv/quote_test.go63
4 files changed, 153 insertions, 85 deletions
diff --git a/src/strconv/bytealg.go b/src/strconv/bytealg.go
index 9780c28ef3..a2bb12c5f2 100644
--- a/src/strconv/bytealg.go
+++ b/src/strconv/bytealg.go
@@ -9,7 +9,7 @@ package strconv
import "internal/bytealg"
-// contains reports whether the string contains the byte c.
-func contains(s string, c byte) bool {
- return bytealg.IndexByteString(s, c) != -1
+// index returns the index of the first instance of c in s, or -1 if missing.
+func index(s string, c byte) int {
+ return bytealg.IndexByteString(s, c)
}
diff --git a/src/strconv/bytealg_bootstrap.go b/src/strconv/bytealg_bootstrap.go
index 875a0eb147..0ed79f4de7 100644
--- a/src/strconv/bytealg_bootstrap.go
+++ b/src/strconv/bytealg_bootstrap.go
@@ -7,12 +7,12 @@
package strconv
-// contains reports whether the string contains the byte c.
-func contains(s string, c byte) bool {
+// index returns the index of the first instance of c in s, or -1 if missing.
+func index(s string, c byte) int {
for i := 0; i < len(s); i++ {
if s[i] == c {
- return true
+ return i
}
}
- return false
+ return -1
}
diff --git a/src/strconv/quote.go b/src/strconv/quote.go
index db0dbb288b..b3bbb1612b 100644
--- a/src/strconv/quote.go
+++ b/src/strconv/quote.go
@@ -15,6 +15,11 @@ const (
upperhex = "0123456789ABCDEF"
)
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+ return index(s, c) != -1
+}
+
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
}
@@ -359,80 +364,132 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
return
}
+// QuotedPrefix returns the quoted string (as understood by Unquote) at the prefix of s.
+// If s does not start with a valid quoted string, QuotedPrefix returns an error.
+func QuotedPrefix(s string) (string, error) {
+ out, _, err := unquote(s, false)
+ return out, err
+}
+
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (string, error) {
- n := len(s)
- if n < 2 {
+ out, rem, err := unquote(s, true)
+ if len(rem) > 0 {
return "", ErrSyntax
}
- quote := s[0]
- if quote != s[n-1] {
- return "", ErrSyntax
+ return out, err
+}
+
+// unquote parses a quoted string at the start of the input,
+// returning the parsed prefix, the remaining suffix, and any parse errors.
+// If unescape is true, the parsed prefix is unescaped,
+// otherwise the input prefix is provided verbatim.
+func unquote(in string, unescape bool) (out, rem string, err error) {
+ // Determine the quote form and optimistically find the terminating quote.
+ if len(in) < 2 {
+ return "", in, ErrSyntax
+ }
+ quote := in[0]
+ end := index(in[1:], quote)
+ if end < 0 {
+ return "", in, ErrSyntax
}
- s = s[1 : n-1]
+ end += 2 // position after terminating quote; may be wrong if escape sequences are present
- if quote == '`' {
- if contains(s, '`') {
- return "", ErrSyntax
+ switch quote {
+ case '`':
+ switch {
+ case !unescape:
+ out = in[:end] // include quotes
+ case !contains(in[:end], '\r'):
+ out = in[len("`") : end-len("`")] // exclude quotes
+ default:
+ // Carriage return characters ('\r') inside raw string literals
+ // are discarded from the raw string value.
+ buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
+ for i := len("`"); i < end-len("`"); i++ {
+ if in[i] != '\r' {
+ buf = append(buf, in[i])
+ }
+ }
+ out = string(buf)
}
- if contains(s, '\r') {
- // -1 because we know there is at least one \r to remove.
- buf := make([]byte, 0, len(s)-1)
- for i := 0; i < len(s); i++ {
- if s[i] != '\r' {
- buf = append(buf, s[i])
+ // NOTE: Prior implementations did not verify that raw strings consist
+ // of valid UTF-8 characters and we continue to not verify it as such.
+ // The Go specification does not explicitly require valid UTF-8,
+ // but only mention that it is implicitly valid for Go source code
+ // (which must be valid UTF-8).
+ return out, in[end:], nil
+ case '"', '\'':
+ // Handle quoted strings without any escape sequences.
+ if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
+ var valid bool
+ switch quote {
+ case '"':
+ valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
+ case '\'':
+ r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
+ valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
+ }
+ if valid {
+ out = in[:end]
+ if unescape {
+ out = out[1 : end-1] // exclude quotes
}
+ return out, in[end:], nil
}
- return string(buf), nil
}
- return s, nil
- }
- if quote != '"' && quote != '\'' {
- return "", ErrSyntax
- }
- if contains(s, '\n') {
- return "", ErrSyntax
- }
- // Is it trivial? Avoid allocation.
- if !contains(s, '\\') && !contains(s, quote) {
- switch quote {
- case '"':
- if utf8.ValidString(s) {
- return s, nil
+ // Handle quoted strings with escape sequences.
+ var buf []byte
+ in0 := in
+ in = in[1:] // skip starting quote
+ if unescape {
+ buf = make([]byte, 0, 3*end/2) // try to avoid more allocations
+ }
+ for len(in) > 0 && in[0] != quote {
+ // Process the next character,
+ // rejecting any unescaped newline characters which are invalid.
+ r, multibyte, rem, err := UnquoteChar(in, quote)
+ if in[0] == '\n' || err != nil {
+ return "", in0, ErrSyntax
}
- case '\'':
- r, size := utf8.DecodeRuneInString(s)
- if size == len(s) && (r != utf8.RuneError || size != 1) {
- return s, nil
+ in = rem
+
+ // Append the character if unescaping the input.
+ if unescape {
+ if r < utf8.RuneSelf || !multibyte {
+ buf = append(buf, byte(r))
+ } else {
+ var arr [utf8.UTFMax]byte
+ n := utf8.EncodeRune(arr[:], r)
+ buf = append(buf, arr[:n]...)
+ }
}
- }
- }
- var runeTmp [utf8.UTFMax]byte
- buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
- for len(s) > 0 {
- c, multibyte, ss, err := UnquoteChar(s, quote)
- if err != nil {
- return "", err
+ // Single quoted strings must be a single character.
+ if quote == '\'' {
+ break
+ }
}
- s = ss
- if c < utf8.RuneSelf || !multibyte {
- buf = append(buf, byte(c))
- } else {
- n := utf8.EncodeRune(runeTmp[:], c)
- buf = append(buf, runeTmp[:n]...)
+
+ // Verify that the string ends with a terminating quote.
+ if !(len(in) > 0 && in[0] == quote) {
+ return "", in0, ErrSyntax
}
- if quote == '\'' && len(s) != 0 {
- // single-quoted must be single character
- return "", ErrSyntax
+ in = in[1:] // skip terminating quote
+
+ if unescape {
+ return string(buf), in, nil
}
+ return in0[:len(in0)-len(in)], in, nil
+ default:
+ return "", in, ErrSyntax
}
- return string(buf), nil
}
// bsearch16 returns the smallest i such that a[i] >= x.
diff --git a/src/strconv/quote_test.go b/src/strconv/quote_test.go
index f1faf137bd..4750be2740 100644
--- a/src/strconv/quote_test.go
+++ b/src/strconv/quote_test.go
@@ -6,6 +6,7 @@ package strconv_test
import (
. "strconv"
+ "strings"
"testing"
"unicode"
)
@@ -297,6 +298,7 @@ var misquoted = []string{
`"\z"`,
"`",
"`xxx",
+ "``x\r",
"`\"",
`"\'"`,
`'\"'`,
@@ -307,22 +309,13 @@ var misquoted = []string{
func TestUnquote(t *testing.T) {
for _, tt := range unquotetests {
- if out, err := Unquote(tt.in); err != nil || out != tt.out {
- t.Errorf("Unquote(%#q) = %q, %v want %q, nil", tt.in, out, err, tt.out)
- }
+ testUnquote(t, tt.in, tt.out, nil)
}
-
- // run the quote tests too, backward
for _, tt := range quotetests {
- if in, err := Unquote(tt.out); in != tt.in {
- t.Errorf("Unquote(%#q) = %q, %v, want %q, nil", tt.out, in, err, tt.in)
- }
+ testUnquote(t, tt.out, tt.in, nil)
}
-
for _, s := range misquoted {
- if out, err := Unquote(s); out != "" || err != ErrSyntax {
- t.Errorf("Unquote(%#q) = %q, %v want %q, %v", s, out, err, "", ErrSyntax)
- }
+ testUnquote(t, s, "", ErrSyntax)
}
}
@@ -333,26 +326,44 @@ func TestUnquoteInvalidUTF8(t *testing.T) {
// one of:
want string
- wantErr string
+ wantErr error
}{
{in: `"foo"`, want: "foo"},
- {in: `"foo`, wantErr: "invalid syntax"},
+ {in: `"foo`, wantErr: ErrSyntax},
{in: `"` + "\xc0" + `"`, want: "\xef\xbf\xbd"},
{in: `"a` + "\xc0" + `"`, want: "a\xef\xbf\xbd"},
{in: `"\t` + "\xc0" + `"`, want: "\t\xef\xbf\xbd"},
}
- for i, tt := range tests {
- got, err := Unquote(tt.in)
- var gotErr string
- if err != nil {
- gotErr = err.Error()
- }
- if gotErr != tt.wantErr {
- t.Errorf("%d. Unquote(%q) = err %v; want %q", i, tt.in, err, tt.wantErr)
- }
- if tt.wantErr == "" && err == nil && got != tt.want {
- t.Errorf("%d. Unquote(%q) = %02x; want %02x", i, tt.in, []byte(got), []byte(tt.want))
- }
+ for _, tt := range tests {
+ testUnquote(t, tt.in, tt.want, tt.wantErr)
+ }
+}
+
+func testUnquote(t *testing.T, in, want string, wantErr error) {
+ // Test Unquote.
+ got, gotErr := Unquote(in)
+ if got != want || gotErr != wantErr {
+ t.Errorf("Unquote(%q) = (%q, %v), want (%q, %v)", in, got, gotErr, want, wantErr)
+ }
+
+ // Test QuotedPrefix.
+ // Adding an arbitrary suffix should not change the result of QuotedPrefix
+ // assume that the suffix doesn't accidentally terminate a truncated input.
+ if gotErr == nil {
+ want = in
+ }
+ suffix := "\n\r\\\"`'" // special characters for quoted strings
+ if len(in) > 0 {
+ suffix = strings.ReplaceAll(suffix, in[:1], "")
+ }
+ in += suffix
+ got, gotErr = QuotedPrefix(in)
+ if gotErr == nil && wantErr != nil {
+ _, wantErr = Unquote(got) // original input had trailing junk, reparse with only valid prefix
+ want = got
+ }
+ if got != want || gotErr != wantErr {
+ t.Errorf("QuotedPrefix(%q) = (%q, %v), want (%q, %v)", in, got, gotErr, want, wantErr)
}
}