aboutsummaryrefslogtreecommitdiff
path: root/src/pkg/html/parse.go
diff options
context:
space:
mode:
authorAndrew Balholm <andybalholm@gmail.com>2011-11-24 09:28:58 +1100
committerNigel Tao <nigeltao@golang.org>2011-11-24 09:28:58 +1100
commit77b0ad1e806580e47e4f682dfb912c55e1411b73 (patch)
treeee5f208102be1f8295b68215ffe2861ca619fa99 /src/pkg/html/parse.go
parentb3923a27dd80592ec4cd21ca04ea2a736578c9ad (diff)
downloadgo-77b0ad1e806580e47e4f682dfb912c55e1411b73.tar.xz
html: parse DOCTYPE into name and public and system identifiers
Pass tests2.dat, test 59: <!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->--> | <!DOCTYPE <!doctype> | <html> | <head> | <body> | ">" | <!-- <!--x --> | "-->" Pass all the tests in doctype01.dat. Also pass tests2.dat, test 60: <!doctype html><div><form></form><div></div></div> R=nigeltao CC=golang-dev https://golang.org/cl/5437045
Diffstat (limited to 'src/pkg/html/parse.go')
-rw-r--r--src/pkg/html/parse.go58
1 files changed, 54 insertions, 4 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
index 36204895b9..041c5390ed 100644
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -321,6 +321,59 @@ func (p *parser) resetInsertionMode() {
const whitespace = " \t\r\n\f"
+// parseDoctype parses the data from a DoctypeToken into a name,
+// public identifier, and system identifier. It returns a Node whose Type
+// is DoctypeNode, whose Data is the name, and which has attributes
+// named "system" and "public" for the two identifiers if they were present.
+func parseDoctype(s string) *Node {
+ n := &Node{Type: DoctypeNode}
+
+ // Find the name.
+ space := strings.IndexAny(s, whitespace)
+ if space == -1 {
+ space = len(s)
+ }
+ n.Data = strings.ToLower(s[:space])
+ s = strings.TrimLeft(s[space:], whitespace)
+
+ if len(s) < 6 {
+ // It can't start with "PUBLIC" or "SYSTEM".
+ // Ignore the rest of the string.
+ return n
+ }
+
+ key := strings.ToLower(s[:6])
+ s = s[6:]
+ for key == "public" || key == "system" {
+ s = strings.TrimLeft(s, whitespace)
+ if s == "" {
+ break
+ }
+ quote := s[0]
+ if quote != '"' && quote != '\'' {
+ break
+ }
+ s = s[1:]
+ q := strings.IndexRune(s, rune(quote))
+ var id string
+ if q == -1 {
+ id = s
+ s = ""
+ } else {
+ id = s[:q]
+ s = s[q+1:]
+ }
+ n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
+ if key == "public" {
+ key = "system"
+ } else {
+ key = ""
+ }
+ }
+
+ return n
+}
+
// Section 11.2.5.4.1.
func initialIM(p *parser) bool {
switch p.tok.Type {
@@ -337,10 +390,7 @@ func initialIM(p *parser) bool {
})
return true
case DoctypeToken:
- p.doc.Add(&Node{
- Type: DoctypeNode,
- Data: p.tok.Data,
- })
+ p.doc.Add(parseDoctype(p.tok.Data))
p.im = beforeHTMLIM
return true
}