aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJulien Cretel <jub0bsinthecloud@gmail.com>2025-10-23 16:44:15 +0000
committerSean Liao <sean@liao.dev>2025-10-23 11:58:12 -0700
commitc4e910895b3d91e4c7d4d6b5cd0af5e0eb787b72 (patch)
tree4b35db15cca9f8c5bb855c6122f3ecb3636f46e3 /src
parent3f6ac3a10fb09db9cf7c55d84fbbf0bd3968d25d (diff)
downloadgo-c4e910895b3d91e4c7d4d6b5cd0af5e0eb787b72.tar.xz
net/url: speed up escape and unescape
This change adds a generated 8-bit bitmask for use in functions shouldEscape and ishex. Function shouldEscape is now inlineable. Function escape is now much faster; function unescape is a bit faster. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: net/url cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ QueryEscape/#00-8 58.38n ± 1% 35.98n ± 1% -38.38% (p=0.000 n=20) QueryEscape/#01-8 303.50n ± 0% 94.77n ± 0% -68.77% (p=0.000 n=20) QueryEscape/#02-8 202.90n ± 0% 78.66n ± 1% -61.23% (p=0.000 n=20) QueryEscape/#03-8 444.5n ± 0% 145.9n ± 0% -67.17% (p=0.000 n=20) QueryEscape/#04-8 2678.0n ± 0% 913.7n ± 0% -65.88% (p=0.000 n=20) PathEscape/#00-8 81.34n ± 0% 44.64n ± 1% -45.12% (p=0.000 n=20) PathEscape/#01-8 307.65n ± 0% 96.71n ± 1% -68.56% (p=0.000 n=20) PathEscape/#02-8 200.80n ± 1% 78.25n ± 0% -61.03% (p=0.000 n=20) PathEscape/#03-8 450.1n ± 1% 145.5n ± 0% -67.67% (p=0.000 n=20) PathEscape/#04-8 2663.5n ± 0% 876.5n ± 0% -67.09% (p=0.000 n=20) QueryUnescape/#00-8 53.32n ± 1% 51.67n ± 1% -3.09% (p=0.000 n=20) QueryUnescape/#01-8 161.0n ± 1% 136.2n ± 1% -15.40% (p=0.000 n=20) QueryUnescape/#02-8 126.1n ± 1% 118.3n ± 1% -6.23% (p=0.000 n=20) QueryUnescape/#03-8 294.6n ± 0% 273.1n ± 0% -7.30% (p=0.000 n=20) QueryUnescape/#04-8 1.511µ ± 0% 1.411µ ± 0% -6.62% (p=0.000 n=20) PathUnescape/#00-8 63.84n ± 1% 53.59n ± 1% -16.05% (p=0.000 n=20) PathUnescape/#01-8 163.6n ± 3% 137.9n ± 1% -15.71% (p=0.000 n=20) PathUnescape/#02-8 126.4n ± 1% 119.1n ± 1% -5.78% (p=0.000 n=20) PathUnescape/#03-8 294.2n ± 0% 273.3n ± 0% -7.12% (p=0.000 n=20) PathUnescape/#04-8 1.554µ ± 0% 1.417µ ± 0% -8.78% (p=0.000 n=20) geomean 277.8n 162.7n -41.44% This change draws heavy inspiration from CL 174998, which showed promise but stalled years ago. Updates #17860 Change-Id: Idcbb1696608998b9e2fc91e1f2a488d8f1f6028c GitHub-Last-Rev: ff360c2f1b51b1e725d10c0864a6b698d3a5ffc3 GitHub-Pull-Request: golang/go#75914 Reviewed-on: https://go-review.googlesource.com/c/go/+/712200 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org> Reviewed-by: Jorropo <jorropo.pgm@gmail.com> Reviewed-by: Takuto Nagami <logica0419@gmail.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/net/url/encoding_table.go114
-rw-r--r--src/net/url/gen_encoding_table.go234
-rw-r--r--src/net/url/url.go106
3 files changed, 354 insertions, 100 deletions
diff --git a/src/net/url/encoding_table.go b/src/net/url/encoding_table.go
new file mode 100644
index 0000000000..60b3564948
--- /dev/null
+++ b/src/net/url/encoding_table.go
@@ -0,0 +1,114 @@
+// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package url
+
+type encoding uint8
+
+const (
+ encodePath encoding = 1 << iota
+ encodePathSegment
+ encodeHost
+ encodeZone
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+
+ // hexChar is actually NOT an encoding mode, but there are only seven
+ // encoding modes. We might as well abuse the otherwise unused most
+ // significant bit in uint8 to indicate whether a character is
+ // hexadecimal.
+ hexChar
+)
+
+var table = [256]encoding{
+ '!': encodeFragment | encodeZone | encodeHost,
+ '"': encodeZone | encodeHost,
+ '$': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '&': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '\'': encodeZone | encodeHost,
+ '(': encodeFragment | encodeZone | encodeHost,
+ ')': encodeFragment | encodeZone | encodeHost,
+ '*': encodeFragment | encodeZone | encodeHost,
+ '+': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ',': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
+ '-': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '.': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '/': encodeFragment | encodePath,
+ '0': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '1': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '2': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '3': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '4': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '5': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '6': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '7': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '8': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '9': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ':': encodeFragment | encodeZone | encodeHost | encodePathSegment | encodePath,
+ ';': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePath,
+ '<': encodeZone | encodeHost,
+ '=': encodeFragment | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '>': encodeZone | encodeHost,
+ '?': encodeFragment,
+ '@': encodeFragment | encodePathSegment | encodePath,
+ 'A': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'B': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'C': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'D': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'E': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'F': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'G': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'H': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'I': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'J': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'K': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'L': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'M': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'N': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'O': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'P': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'R': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'S': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'T': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'U': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'V': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'W': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'X': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'Z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '[': encodeZone | encodeHost,
+ ']': encodeZone | encodeHost,
+ '_': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'a': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'b': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'c': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'd': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'e': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'f': hexChar | encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'g': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'h': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'i': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'j': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'k': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'l': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'm': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'n': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'o': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'p': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'q': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'r': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 's': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 't': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'u': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'v': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'w': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'x': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'y': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ 'z': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+ '~': encodeFragment | encodeQueryComponent | encodeUserPassword | encodeZone | encodeHost | encodePathSegment | encodePath,
+}
diff --git a/src/net/url/gen_encoding_table.go b/src/net/url/gen_encoding_table.go
new file mode 100644
index 0000000000..5defe5046b
--- /dev/null
+++ b/src/net/url/gen_encoding_table.go
@@ -0,0 +1,234 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+package main
+
+import (
+ "bytes"
+ _ "embed"
+ "fmt"
+ "go/format"
+ "io"
+ "log"
+ "maps"
+ "os"
+ "slices"
+ "strconv"
+ "strings"
+)
+
+// We embed this source file in the resulting code-generation program in order
+// to extract the definitions of the encoding type and constants from it and
+// include them in the generated file.
+//
+//go:embed gen_encoding_table.go
+var genSource string
+
+const filename = "encoding_table.go"
+
+func main() {
+ var out bytes.Buffer
+ fmt.Fprintln(&out, "// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.")
+ fmt.Fprintln(&out)
+ fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.")
+ fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style")
+ fmt.Fprintln(&out, "// license that can be found in the LICENSE file.")
+ fmt.Fprintln(&out)
+ fmt.Fprintln(&out, "package url")
+ fmt.Fprintln(&out)
+ generateEnc(&out, genSource)
+ generateTable(&out)
+
+ formatted, err := format.Source(out.Bytes())
+ if err != nil {
+ log.Fatal("format:", err)
+ }
+
+ err = os.WriteFile(filename, formatted, 0644)
+ if err != nil {
+ log.Fatal("WriteFile:", err)
+ }
+}
+
+func generateEnc(w io.Writer, src string) {
+ var writeLine bool
+ for line := range strings.Lines(src) {
+ if strings.HasPrefix(line, "// START encoding") {
+ writeLine = true
+ continue
+ }
+ if strings.HasPrefix(line, "// END encoding") {
+ return
+ }
+ if writeLine {
+ fmt.Fprint(w, line)
+ }
+ }
+}
+
+func generateTable(w io.Writer) {
+ fmt.Fprintln(w, "var table = [256]encoding{")
+
+ // Sort the encodings (in decreasing order) to guarantee a stable output.
+ sortedEncs := slices.Sorted(maps.Keys(encNames))
+ slices.Reverse(sortedEncs)
+
+ for i := range 256 {
+ c := byte(i)
+ var lineBuf bytes.Buffer
+
+ // Write key to line buffer.
+ lineBuf.WriteString(strconv.QuoteRune(rune(c)))
+
+ lineBuf.WriteByte(':')
+
+ // Write value to line buffer.
+ blankVal := true
+ if ishex(c) {
+ // Set the hexChar bit if this char is hexadecimal.
+ lineBuf.WriteString("hexChar")
+ blankVal = false
+ }
+ for _, enc := range sortedEncs {
+ if !shouldEscape(c, enc) {
+ if !blankVal {
+ lineBuf.WriteByte('|')
+ }
+ // Set this encoding mode's bit if this char should NOT be
+ // escaped.
+ name := encNames[enc]
+ lineBuf.WriteString(name)
+ blankVal = false
+ }
+ }
+
+ if !blankVal {
+ lineBuf.WriteString(",\n")
+ w.Write(lineBuf.Bytes())
+ }
+ }
+ fmt.Fprintln(w, "}")
+}
+
+// START encoding (keep this marker comment in sync with genEnc)
+type encoding uint8
+
+const (
+ encodePath encoding = 1 << iota
+ encodePathSegment
+ encodeHost
+ encodeZone
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+
+ // hexChar is actually NOT an encoding mode, but there are only seven
+ // encoding modes. We might as well abuse the otherwise unused most
+ // significant bit in uint8 to indicate whether a character is
+ // hexadecimal.
+ hexChar
+)
+
+// END encoding (keep this marker comment in sync with genEnc)
+
+// Keep this in sync with the definitions of encoding mode constants.
+var encNames = map[encoding]string{
+ encodePath: "encodePath",
+ encodePathSegment: "encodePathSegment",
+ encodeHost: "encodeHost",
+ encodeZone: "encodeZone",
+ encodeUserPassword: "encodeUserPassword",
+ encodeQueryComponent: "encodeQueryComponent",
+ encodeFragment: "encodeFragment",
+}
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 3986.
+//
+// Please be informed that for now shouldEscape does not check all
+// reserved characters correctly. See golang.org/issue/5684.
+func shouldEscape(c byte, mode encoding) bool {
+ // §2.3 Unreserved characters (alphanum)
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+ return false
+ }
+
+ if mode == encodeHost || mode == encodeZone {
+ // §3.2.2 Host allows
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ // as part of reg-name.
+ // We add : because we include :port as part of host.
+ // We add [ ] because we include [ipv6]:port as part of host.
+ // We add < > because they're the only characters left that
+ // we could possibly allow, and Parse will reject them if we
+ // escape them (because hosts can't use %-encoding for
+ // ASCII bytes).
+ switch c {
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
+ return false
+ }
+ }
+
+ switch c {
+ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
+ return false
+
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
+ // Different sections of the URL allow a few of
+ // the reserved characters to appear unescaped.
+ switch mode {
+ case encodePath: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments. This package
+ // only manipulates the path as a whole, so we allow those
+ // last three as well. That leaves only ? to escape.
+ return c == '?'
+
+ case encodePathSegment: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments.
+ return c == '/' || c == ';' || c == ',' || c == '?'
+
+ case encodeUserPassword: // §3.2.1
+ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
+ // userinfo, so we must escape only '@', '/', and '?'.
+ // The parsing of userinfo treats ':' as special so we must escape
+ // that too.
+ return c == '@' || c == '/' || c == '?' || c == ':'
+
+ case encodeQueryComponent: // §3.4
+ // The RFC reserves (so we must escape) everything.
+ return true
+
+ case encodeFragment: // §4.1
+ // The RFC text is silent but the grammar allows
+ // everything, so escape nothing.
+ return false
+ }
+ }
+
+ if mode == encodeFragment {
+ // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
+ // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
+ // need to be escaped. To minimize potential breakage, we apply two restrictions:
+ // (1) we always escape sub-delims outside of the fragment, and (2) we always
+ // escape single quote to avoid breaking callers that had previously assumed that
+ // single quotes would be escaped. See issue #19917.
+ switch c {
+ case '!', '(', ')', '*':
+ return false
+ }
+ }
+
+ // Everything else must be escaped.
+ return true
+}
+
+func ishex(c byte) bool {
+ return '0' <= c && c <= '9' ||
+ 'a' <= c && c <= 'f' ||
+ 'A' <= c && c <= 'F'
+}
diff --git a/src/net/url/url.go b/src/net/url/url.go
index 4508f26608..71fd8f59b3 100644
--- a/src/net/url/url.go
+++ b/src/net/url/url.go
@@ -7,6 +7,9 @@
// See RFC 3986. This package generally follows RFC 3986, except where
// it deviates for compatibility reasons.
// RFC 6874 followed for IPv6 zone literals.
+
+//go:generate go run gen_encoding_table.go
+
package url
// When sending changes, first search old issues for history on decisions.
@@ -50,15 +53,7 @@ func (e *Error) Temporary() bool {
const upperhex = "0123456789ABCDEF"
func ishex(c byte) bool {
- switch {
- case '0' <= c && c <= '9':
- return true
- case 'a' <= c && c <= 'f':
- return true
- case 'A' <= c && c <= 'F':
- return true
- }
- return false
+ return table[c]&hexChar != 0
}
func unhex(c byte) byte {
@@ -74,18 +69,6 @@ func unhex(c byte) byte {
}
}
-type encoding int
-
-const (
- encodePath encoding = 1 + iota
- encodePathSegment
- encodeHost
- encodeZone
- encodeUserPassword
- encodeQueryComponent
- encodeFragment
-)
-
type EscapeError string
func (e EscapeError) Error() string {
@@ -98,86 +81,9 @@ func (e InvalidHostError) Error() string {
return "invalid character " + strconv.Quote(string(e)) + " in host name"
}
-// Return true if the specified character should be escaped when
-// appearing in a URL string, according to RFC 3986.
-//
-// Please be informed that for now shouldEscape does not check all
-// reserved characters correctly. See golang.org/issue/5684.
+// See the reference implementation in gen_encoding_table.go.
func shouldEscape(c byte, mode encoding) bool {
- // §2.3 Unreserved characters (alphanum)
- if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
- return false
- }
-
- if mode == encodeHost || mode == encodeZone {
- // §3.2.2 Host allows
- // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
- // as part of reg-name.
- // We add : because we include :port as part of host.
- // We add [ ] because we include [ipv6]:port as part of host.
- // We add < > because they're the only characters left that
- // we could possibly allow, and Parse will reject them if we
- // escape them (because hosts can't use %-encoding for
- // ASCII bytes).
- switch c {
- case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
- return false
- }
- }
-
- switch c {
- case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
- return false
-
- case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
- // Different sections of the URL allow a few of
- // the reserved characters to appear unescaped.
- switch mode {
- case encodePath: // §3.3
- // The RFC allows : @ & = + $ but saves / ; , for assigning
- // meaning to individual path segments. This package
- // only manipulates the path as a whole, so we allow those
- // last three as well. That leaves only ? to escape.
- return c == '?'
-
- case encodePathSegment: // §3.3
- // The RFC allows : @ & = + $ but saves / ; , for assigning
- // meaning to individual path segments.
- return c == '/' || c == ';' || c == ',' || c == '?'
-
- case encodeUserPassword: // §3.2.1
- // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
- // userinfo, so we must escape only '@', '/', and '?'.
- // The parsing of userinfo treats ':' as special so we must escape
- // that too.
- return c == '@' || c == '/' || c == '?' || c == ':'
-
- case encodeQueryComponent: // §3.4
- // The RFC reserves (so we must escape) everything.
- return true
-
- case encodeFragment: // §4.1
- // The RFC text is silent but the grammar allows
- // everything, so escape nothing.
- return false
- }
- }
-
- if mode == encodeFragment {
- // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
- // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
- // need to be escaped. To minimize potential breakage, we apply two restrictions:
- // (1) we always escape sub-delims outside of the fragment, and (2) we always
- // escape single quote to avoid breaking callers that had previously assumed that
- // single quotes would be escaped. See issue #19917.
- switch c {
- case '!', '(', ')', '*':
- return false
- }
- }
-
- // Everything else must be escaped.
- return true
+ return table[c]&mode == 0
}
// QueryUnescape does the inverse transformation of [QueryEscape],