aboutsummaryrefslogtreecommitdiff
path: root/src/html/template
diff options
context:
space:
mode:
Diffstat (limited to 'src/html/template')
-rw-r--r--src/html/template/context.go14
-rw-r--r--src/html/template/escape.go26
-rw-r--r--src/html/template/escape_test.go47
-rw-r--r--src/html/template/transition.go15
4 files changed, 101 insertions, 1 deletions
diff --git a/src/html/template/context.go b/src/html/template/context.go
index e07a0c4a02..16b5e65317 100644
--- a/src/html/template/context.go
+++ b/src/html/template/context.go
@@ -174,6 +174,20 @@ func isInTag(s state) bool {
return false
}
+// isInScriptLiteral returns true if s is one of the literal states within a
+// <script> tag, and as such occurrences of "<!--", "<script", and "</script"
+// need to be treated specially.
+func isInScriptLiteral(s state) bool {
+ // Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
+ // stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
+ // omitted from the output.
+ switch s {
+ case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp:
+ return true
+ }
+ return false
+}
+
// delim is the delimiter that will end the current HTML attribute.
type delim uint8
diff --git a/src/html/template/escape.go b/src/html/template/escape.go
index 473564c37a..01f6303a44 100644
--- a/src/html/template/escape.go
+++ b/src/html/template/escape.go
@@ -10,6 +10,7 @@ import (
"html"
"internal/godebug"
"io"
+ "regexp"
"text/template"
"text/template/parse"
)
@@ -729,6 +730,26 @@ var delimEnds = [...]string{
delimSpaceOrTagEnd: " \t\n\f\r>",
}
+var (
+ // Per WHATWG HTML specification, section 4.12.1.3, there are extremely
+ // complicated rules for how to handle the set of opening tags <!--,
+ // <script, and </script when they appear in JS literals (i.e. strings,
+ // regexs, and comments). The specification suggests a simple solution,
+ // rather than implementing the arcane ABNF, which involves simply escaping
+ // the opening bracket with \x3C. We use the below regex for this, since it
+ // makes doing the case-insensitive find-replace much simpler.
+ specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)")
+ specialScriptTagReplacement = []byte("\\x3C$1")
+)
+
+func containsSpecialScriptTag(s []byte) bool {
+ return specialScriptTagRE.Match(s)
+}
+
+func escapeSpecialScriptTags(s []byte) []byte {
+ return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
+}
+
var doctypeBytes = []byte("<!DOCTYPE")
// escapeText escapes a text template node.
@@ -787,6 +808,11 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
b.Write(s[written:cs])
written = i1
}
+ if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
+ b.Write(s[written:i])
+ b.Write(escapeSpecialScriptTags(s[i:i1]))
+ written = i1
+ }
if i == i1 && c.state == c1.state {
panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
}
diff --git a/src/html/template/escape_test.go b/src/html/template/escape_test.go
index f60c875927..8a4f62e92f 100644
--- a/src/html/template/escape_test.go
+++ b/src/html/template/escape_test.go
@@ -514,6 +514,21 @@ func TestEscape(t *testing.T) {
"<script>\n</script>",
},
{
+ "Special tags in <script> string literals",
+ `<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`,
+ `<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`,
+ },
+ {
+ "Special tags in <script> string literals (mixed case)",
+ `<script>var a = "<!-- <ScripT </ScripT"</script>`,
+ `<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`,
+ },
+ {
+ "Special tags in <script> regex literals (mixed case)",
+ `<script>var a = /<!-- <ScripT </ScripT/</script>`,
+ `<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`,
+ },
+ {
"CSS comments",
"<style>p// paragraph\n" +
`{border: 1px/* color */{{"#00f"}}}</style>`,
@@ -1533,8 +1548,38 @@ func TestEscapeText(t *testing.T) {
context{state: stateJS, element: elementScript},
},
{
+ // <script and </script tags are escaped, so </script> should not
+ // cause us to exit the JS state.
`<script>document.write("<script>alert(1)</script>");`,
- context{state: stateText},
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>`,
+ context{state: stateJSDqStr, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)</script>`,
+ context{state: stateJSDqStr, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)<!--`,
+ context{state: stateJSDqStr, element: elementScript},
+ },
+ {
+ `<script>document.write("<script>alert(1)</Script>");`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>document.write("<!--");`,
+ context{state: stateJS, element: elementScript},
+ },
+ {
+ `<script>let a = /</script`,
+ context{state: stateJSRegexp, element: elementScript},
+ },
+ {
+ `<script>let a = /</script/`,
+ context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp},
},
{
`<script type="text/template">`,
diff --git a/src/html/template/transition.go b/src/html/template/transition.go
index d8ff18abb0..432c365d3c 100644
--- a/src/html/template/transition.go
+++ b/src/html/template/transition.go
@@ -214,6 +214,11 @@ var (
// element states.
func tSpecialTagEnd(c context, s []byte) (context, int) {
if c.element != elementNone {
+ // script end tags ("</script") within script literals are ignored, so that
+ // we can properly escape them.
+ if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
+ return c, len(s)
+ }
if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
return context{}, i
}
@@ -353,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) {
inCharset = true
case ']':
inCharset = false
+ case '/':
+ // If "</script" appears in a regex literal, the '/' should not
+ // close the regex literal, and it will later be escaped to
+ // "\x3C/script" in escapeText.
+ if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
+ i++
+ } else if !inCharset {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, i + 1
+ }
default:
// end delimiter
if !inCharset {