aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNigel Tao <nigeltao@golang.org>2012-01-10 11:06:09 +1100
committerNigel Tao <nigeltao@golang.org>2012-01-10 11:06:09 +1100
commit748fab9d11e23b8f8c17cd583f995252ec86bfd3 (patch)
tree69b48c8af6324a79021585a1561f525c8f74c1ca
parent0ad241dd550d224f65c634f767f5e73a025fc2bc (diff)
downloadgo-748fab9d11e23b8f8c17cd583f995252ec86bfd3.tar.xz
html: foreign element HTML integration points, tag name adjustment,
shorten the MathML namespace abbreviation from "mathml" to "math". Python's html5lib uses "mathml", but I think that that is an internal implementation detail; the test cases use "math". Pass tests10.dat, test 30: <div><svg><path><foreignObject><math></div>a | <html> | <head> | <body> | <div> | <svg svg> | <svg path> | <svg foreignObject> | <math math> | "a" R=andybalholm CC=golang-dev https://golang.org/cl/5529044
-rw-r--r--src/pkg/html/foreign.go59
-rw-r--r--src/pkg/html/node.go4
-rw-r--r--src/pkg/html/parse.go222
-rw-r--r--src/pkg/html/parse_test.go2
4 files changed, 196 insertions, 91 deletions
diff --git a/src/pkg/html/foreign.go b/src/pkg/html/foreign.go
index 9a0520398c..3ba81ce4d6 100644
--- a/src/pkg/html/foreign.go
+++ b/src/pkg/html/foreign.go
@@ -23,6 +23,23 @@ func adjustForeignAttributes(aa []Attribute) {
}
}
+func htmlIntegrationPoint(n *Node) bool {
+ if n.Type != ElementNode {
+ return false
+ }
+ switch n.Namespace {
+ case "math":
+ // TODO: annotation-xml elements whose start tags have "text/html" or
+ // "application/xhtml+xml" encodings.
+ case "svg":
+ switch n.Data {
+ case "desc", "foreignObject", "title":
+ return true
+ }
+ }
+ return false
+}
+
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
@@ -72,4 +89,44 @@ var breakout = map[string]bool{
"var": true,
}
-// TODO: add look-up tables for MathML and SVG adjustments.
+// Section 12.2.5.5.
+var svgTagNameAdjustments = map[string]string{
+ "altglyph": "altGlyph",
+ "altglyphdef": "altGlyphDef",
+ "altglyphitem": "altGlyphItem",
+ "animatecolor": "animateColor",
+ "animatemotion": "animateMotion",
+ "animatetransform": "animateTransform",
+ "clippath": "clipPath",
+ "feblend": "feBlend",
+ "fecolormatrix": "feColorMatrix",
+ "fecomponenttransfer": "feComponentTransfer",
+ "fecomposite": "feComposite",
+ "feconvolvematrix": "feConvolveMatrix",
+ "fediffuselighting": "feDiffuseLighting",
+ "fedisplacementmap": "feDisplacementMap",
+ "fedistantlight": "feDistantLight",
+ "feflood": "feFlood",
+ "fefunca": "feFuncA",
+ "fefuncb": "feFuncB",
+ "fefuncg": "feFuncG",
+ "fefuncr": "feFuncR",
+ "fegaussianblur": "feGaussianBlur",
+ "feimage": "feImage",
+ "femerge": "feMerge",
+ "femergenode": "feMergeNode",
+ "femorphology": "feMorphology",
+ "feoffset": "feOffset",
+ "fepointlight": "fePointLight",
+ "fespecularlighting": "feSpecularLighting",
+ "fespotlight": "feSpotLight",
+ "fetile": "feTile",
+ "feturbulence": "feTurbulence",
+ "foreignobject": "foreignObject",
+ "glyphref": "glyphRef",
+ "lineargradient": "linearGradient",
+ "radialgradient": "radialGradient",
+ "textpath": "textPath",
+}
+
+// TODO: add look-up tables for MathML and SVG attribute adjustments.
diff --git a/src/pkg/html/node.go b/src/pkg/html/node.go
index 4ba3f5fb62..83f17308b1 100644
--- a/src/pkg/html/node.go
+++ b/src/pkg/html/node.go
@@ -26,6 +26,10 @@ var scopeMarker = Node{Type: scopeMarkerNode}
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b".
+//
+// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
+// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
+// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent *Node
Child []*Node
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
index 380df83427..7077612e7a 100644
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -51,58 +51,87 @@ func (p *parser) top() *Node {
return p.doc
}
-// stopTags for use in popUntil. These come from section 12.2.3.2.
+// Stop tags for use in popUntil. These come from section 12.2.3.2.
var (
- defaultScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}
- listItemScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "ol", "ul"}
- buttonScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "button"}
- tableScopeStopTags = []string{"html", "table"}
+ defaultScopeStopTags = map[string][]string{
+ "": {"applet", "caption", "html", "table", "td", "th", "marquee", "object"},
+ "math": {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"},
+ "svg": {"desc", "foreignObject", "title"},
+ }
)
-// stopTags for use in clearStackToContext.
-var (
- tableRowContextStopTags = []string{"tr", "html"}
+type scope int
+
+const (
+ defaultScope scope = iota
+ listItemScope
+ buttonScope
+ tableScope
+ tableRowScope
)
// popUntil pops the stack of open elements at the highest element whose tag
-// is in matchTags, provided there is no higher element in stopTags. It returns
-// whether or not there was such an element. If there was not, popUntil leaves
-// the stack unchanged.
+// is in matchTags, provided there is no higher element in the scope's stop
+// tags (as defined in section 12.2.3.2). It returns whether or not there was
+// such an element. If there was not, popUntil leaves the stack unchanged.
//
-// For example, if the stack was:
+// For example, the set of stop tags for table scope is: "html", "table". If
+// the stack was:
// ["html", "body", "font", "table", "b", "i", "u"]
-// then popUntil([]string{"html, "table"}, "font") would return false, but
-// popUntil([]string{"html, "table"}, "i") would return true and the resultant
-// stack would be:
+// then popUntil(tableScope, "font") would return false, but
+// popUntil(tableScope, "i") would return true and the stack would become:
// ["html", "body", "font", "table", "b"]
//
-// If an element's tag is in both stopTags and matchTags, then the stack will
-// be popped and the function returns true (provided, of course, there was no
-// higher element in the stack that was also in stopTags). For example,
-// popUntil([]string{"html, "table"}, "table") would return true and leave:
+// If an element's tag is in both the stop tags and matchTags, then the stack
+// will be popped and the function returns true (provided, of course, there was
+// no higher element in the stack that was also in the stop tags). For example,
+// popUntil(tableScope, "table") returns true and leaves:
// ["html", "body", "font"]
-func (p *parser) popUntil(stopTags []string, matchTags ...string) bool {
- if i := p.indexOfElementInScope(stopTags, matchTags...); i != -1 {
+func (p *parser) popUntil(s scope, matchTags ...string) bool {
+ if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
p.oe = p.oe[:i]
return true
}
return false
}
-// indexOfElementInScope returns the index in p.oe of the highest element
-// whose tag is in matchTags that is in scope according to stopTags.
-// If no matching element is in scope, it returns -1.
-func (p *parser) indexOfElementInScope(stopTags []string, matchTags ...string) int {
+// indexOfElementInScope returns the index in p.oe of the highest element whose
+// tag is in matchTags that is in scope. If no matching element is in scope, it
+// returns -1.
+func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
for i := len(p.oe) - 1; i >= 0; i-- {
tag := p.oe[i].Data
- for _, t := range matchTags {
- if t == tag {
- return i
+ if p.oe[i].Namespace == "" {
+ for _, t := range matchTags {
+ if t == tag {
+ return i
+ }
+ }
+ switch s {
+ case defaultScope:
+ // No-op.
+ case listItemScope:
+ if tag == "ol" || tag == "ul" {
+ return -1
+ }
+ case buttonScope:
+ if tag == "button" {
+ return -1
+ }
+ case tableScope:
+ if tag == "html" || tag == "table" {
+ return -1
+ }
+ default:
+ panic("unreachable")
}
}
- for _, t := range stopTags {
- if t == tag {
- return -1
+ switch s {
+ case defaultScope, listItemScope, buttonScope:
+ for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
+ if t == tag {
+ return -1
+ }
}
}
}
@@ -111,8 +140,30 @@ func (p *parser) indexOfElementInScope(stopTags []string, matchTags ...string) i
// elementInScope is like popUntil, except that it doesn't modify the stack of
// open elements.
-func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool {
- return p.indexOfElementInScope(stopTags, matchTags...) != -1
+func (p *parser) elementInScope(s scope, matchTags ...string) bool {
+ return p.indexOfElementInScope(s, matchTags...) != -1
+}
+
+// clearStackToContext pops elements off the stack of open elements until a
+// scope-defined element is found.
+func (p *parser) clearStackToContext(s scope) {
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ tag := p.oe[i].Data
+ switch s {
+ case tableScope:
+ if tag == "html" || tag == "table" {
+ p.oe = p.oe[:i+1]
+ return
+ }
+ case tableRowScope:
+ if tag == "html" || tag == "tr" {
+ p.oe = p.oe[:i+1]
+ return
+ }
+ default:
+ panic("unreachable")
+ }
+ }
}
// addChild adds a child node n to the top element, and pushes n onto the stack
@@ -624,10 +675,10 @@ func inBodyIM(p *parser) bool {
case "html":
copyAttributes(p.oe[0], p.tok)
case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "h1", "h2", "h3", "h4", "h5", "h6":
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
switch n := p.top(); n.Data {
case "h1", "h2", "h3", "h4", "h5", "h6":
p.oe.pop()
@@ -649,7 +700,7 @@ func inBodyIM(p *parser) bool {
p.addFormattingElement(p.tok.Data, p.tok.Attr)
case "nobr":
p.reconstructActiveFormattingElements()
- if p.elementInScope(defaultScopeStopTags, "nobr") {
+ if p.elementInScope(defaultScope, "nobr") {
p.inBodyEndTagFormatting("nobr")
p.reconstructActiveFormattingElements()
}
@@ -667,14 +718,14 @@ func inBodyIM(p *parser) bool {
p.framesetOK = false
case "table":
if !p.quirks {
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
}
p.addElement(p.tok.Data, p.tok.Attr)
p.framesetOK = false
p.im = inTableIM
return true
case "hr":
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
p.oe.pop()
p.acknowledgeSelfClosingTag()
@@ -687,7 +738,7 @@ func inBodyIM(p *parser) bool {
return true
case "form":
if p.form == nil {
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
p.form = p.top()
}
@@ -697,7 +748,7 @@ func inBodyIM(p *parser) bool {
node := p.oe[i]
switch node.Data {
case "li":
- p.popUntil(listItemScopeStopTags, "li")
+ p.popUntil(listItemScope, "li")
case "address", "div", "p":
continue
default:
@@ -707,7 +758,7 @@ func inBodyIM(p *parser) bool {
}
break
}
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "dd", "dt":
p.framesetOK = false
@@ -725,13 +776,13 @@ func inBodyIM(p *parser) bool {
}
break
}
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "plaintext":
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "button":
- p.popUntil(defaultScopeStopTags, "button")
+ p.popUntil(defaultScope, "button")
p.reconstructActiveFormattingElements()
p.addElement(p.tok.Data, p.tok.Attr)
p.framesetOK = false
@@ -788,7 +839,7 @@ func inBodyIM(p *parser) bool {
}
}
p.acknowledgeSelfClosingTag()
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.addElement("form", nil)
p.form = p.top()
if action != "" {
@@ -806,23 +857,20 @@ func inBodyIM(p *parser) bool {
p.oe.pop()
p.form = nil
case "xmp":
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
p.reconstructActiveFormattingElements()
p.framesetOK = false
p.addElement(p.tok.Data, p.tok.Attr)
case "math", "svg":
p.reconstructActiveFormattingElements()
- namespace := ""
if p.tok.Data == "math" {
// TODO: adjust MathML attributes.
- namespace = "mathml"
} else {
// TODO: adjust SVG attributes.
- namespace = "svg"
}
adjustForeignAttributes(p.tok.Attr)
p.addElement(p.tok.Data, p.tok.Attr)
- p.top().Namespace = namespace
+ p.top().Namespace = p.tok.Data
return true
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token.
@@ -837,16 +885,16 @@ func inBodyIM(p *parser) bool {
p.im = afterBodyIM
return true
case "p":
- if !p.elementInScope(buttonScopeStopTags, "p") {
+ if !p.elementInScope(buttonScope, "p") {
p.addElement("p", nil)
}
- p.popUntil(buttonScopeStopTags, "p")
+ p.popUntil(buttonScope, "p")
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
p.inBodyEndTagFormatting(p.tok.Data)
case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
- p.popUntil(defaultScopeStopTags, p.tok.Data)
+ p.popUntil(defaultScope, p.tok.Data)
case "applet", "marquee", "object":
- if p.popUntil(defaultScopeStopTags, p.tok.Data) {
+ if p.popUntil(defaultScope, p.tok.Data) {
p.clearActiveFormattingElements()
}
case "br":
@@ -895,7 +943,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
p.afe.remove(formattingElement)
return
}
- if !p.elementInScope(defaultScopeStopTags, tag) {
+ if !p.elementInScope(defaultScope, tag) {
// Ignore the tag.
return
}
@@ -1029,35 +1077,35 @@ func inTableIM(p *parser) bool {
case StartTagToken:
switch p.tok.Data {
case "caption":
- p.clearStackToContext(tableScopeStopTags)
+ p.clearStackToContext(tableScope)
p.afe = append(p.afe, &scopeMarker)
p.addElement(p.tok.Data, p.tok.Attr)
p.im = inCaptionIM
return true
case "tbody", "tfoot", "thead":
- p.clearStackToContext(tableScopeStopTags)
+ p.clearStackToContext(tableScope)
p.addElement(p.tok.Data, p.tok.Attr)
p.im = inTableBodyIM
return true
case "td", "th", "tr":
- p.clearStackToContext(tableScopeStopTags)
+ p.clearStackToContext(tableScope)
p.addElement("tbody", nil)
p.im = inTableBodyIM
return false
case "table":
- if p.popUntil(tableScopeStopTags, "table") {
+ if p.popUntil(tableScope, "table") {
p.resetInsertionMode()
return false
}
// Ignore the token.
return true
case "colgroup":
- p.clearStackToContext(tableScopeStopTags)
+ p.clearStackToContext(tableScope)
p.addElement(p.tok.Data, p.tok.Attr)
p.im = inColumnGroupIM
return true
case "col":
- p.clearStackToContext(tableScopeStopTags)
+ p.clearStackToContext(tableScope)
p.addElement("colgroup", p.tok.Attr)
p.im = inColumnGroupIM
return false
@@ -1078,7 +1126,7 @@ func inTableIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "table":
- if p.popUntil(tableScopeStopTags, "table") {
+ if p.popUntil(tableScope, "table") {
p.resetInsertionMode()
return true
}
@@ -1105,26 +1153,13 @@ func inTableIM(p *parser) bool {
return inBodyIM(p)
}
-// clearStackToContext pops elements off the stack of open elements
-// until an element listed in stopTags is found.
-func (p *parser) clearStackToContext(stopTags []string) {
- for i := len(p.oe) - 1; i >= 0; i-- {
- for _, tag := range stopTags {
- if p.oe[i].Data == tag {
- p.oe = p.oe[:i+1]
- return
- }
- }
- }
-}
-
// Section 12.2.5.4.11.
func inCaptionIM(p *parser) bool {
switch p.tok.Type {
case StartTagToken:
switch p.tok.Data {
case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr":
- if p.popUntil(tableScopeStopTags, "caption") {
+ if p.popUntil(tableScope, "caption") {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
@@ -1142,13 +1177,13 @@ func inCaptionIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "caption":
- if p.popUntil(tableScopeStopTags, "caption") {
+ if p.popUntil(tableScope, "caption") {
p.clearActiveFormattingElements()
p.im = inTableIM
}
return true
case "table":
- if p.popUntil(tableScopeStopTags, "caption") {
+ if p.popUntil(tableScope, "caption") {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
@@ -1232,7 +1267,7 @@ func inTableBodyIM(p *parser) bool {
data = "tr"
consumed = false
case "caption", "col", "colgroup", "tbody", "tfoot", "thead":
- if !p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
+ if !p.popUntil(tableScope, "tbody", "thead", "tfoot") {
// Ignore the token.
return true
}
@@ -1244,7 +1279,7 @@ func inTableBodyIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "table":
- if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
+ if p.popUntil(tableScope, "tbody", "thead", "tfoot") {
p.im = inTableIM
return false
}
@@ -1280,13 +1315,13 @@ func inRowIM(p *parser) bool {
case StartTagToken:
switch p.tok.Data {
case "td", "th":
- p.clearStackToContext(tableRowContextStopTags)
+ p.clearStackToContext(tableRowScope)
p.addElement(p.tok.Data, p.tok.Attr)
p.afe = append(p.afe, &scopeMarker)
p.im = inCellIM
return true
case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr":
- if p.popUntil(tableScopeStopTags, "tr") {
+ if p.popUntil(tableScope, "tr") {
p.im = inTableBodyIM
return false
}
@@ -1298,14 +1333,14 @@ func inRowIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "tr":
- if p.popUntil(tableScopeStopTags, "tr") {
+ if p.popUntil(tableScope, "tr") {
p.im = inTableBodyIM
return true
}
// Ignore the token.
return true
case "table":
- if p.popUntil(tableScopeStopTags, "tr") {
+ if p.popUntil(tableScope, "tr") {
p.im = inTableBodyIM
return false
}
@@ -1350,7 +1385,7 @@ func inCellIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "td", "th":
- if !p.popUntil(tableScopeStopTags, p.tok.Data) {
+ if !p.popUntil(tableScope, p.tok.Data) {
// Ignore the token.
return true
}
@@ -1371,7 +1406,7 @@ func inCellIM(p *parser) bool {
return true
}
if closeTheCellAndReprocess {
- if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") {
+ if p.popUntil(tableScope, "td") || p.popUntil(tableScope, "th") {
p.clearActiveFormattingElements()
p.im = inRowIM
return false
@@ -1451,7 +1486,7 @@ func inSelectInTableIM(p *parser) bool {
case StartTagToken, EndTagToken:
switch p.tok.Data {
case "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th":
- if p.tok.Type == StartTagToken || p.elementInScope(tableScopeStopTags, p.tok.Data) {
+ if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.Data) {
p.endSelect()
return false
} else {
@@ -1672,6 +1707,11 @@ func parseForeignContent(p *parser) bool {
Data: p.tok.Data,
})
case StartTagToken:
+ if htmlIntegrationPoint(p.top()) {
+ inBodyIM(p)
+ p.resetInsertionMode()
+ return true
+ }
if breakout[p.tok.Data] {
for i := len(p.oe) - 1; i >= 0; i-- {
// TODO: HTML, MathML integration points.
@@ -1683,10 +1723,14 @@ func parseForeignContent(p *parser) bool {
return false
}
switch p.top().Namespace {
- case "mathml":
+ case "math":
// TODO: adjust MathML attributes.
case "svg":
- // TODO: adjust SVG tag names.
+ // Adjust SVG tag names. The tokenizer lower-cases tag names, but
+ // SVG wants e.g. "foreignObject" with a capital second "O".
+ if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
+ p.tok.Data = x
+ }
// TODO: adjust SVG attributes.
default:
panic("html: bad parser state: unexpected namespace")
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
index 2b6a8b5083..91c8388b3a 100644
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -184,7 +184,7 @@ func TestParser(t *testing.T) {
{"tests4.dat", -1},
{"tests5.dat", -1},
{"tests6.dat", -1},
- {"tests10.dat", 30},
+ {"tests10.dat", 31},
}
for _, tf := range testFiles {
f, err := os.Open("testdata/webkit/" + tf.filename)