diff options
| author | Andrew Balholm <andybalholm@gmail.com> | 2011-11-24 09:28:58 +1100 |
|---|---|---|
| committer | Nigel Tao <nigeltao@golang.org> | 2011-11-24 09:28:58 +1100 |
| commit | 77b0ad1e806580e47e4f682dfb912c55e1411b73 (patch) | |
| tree | ee5f208102be1f8295b68215ffe2861ca619fa99 /src/pkg/html/parse.go | |
| parent | b3923a27dd80592ec4cd21ca04ea2a736578c9ad (diff) | |
| download | go-77b0ad1e806580e47e4f682dfb912c55e1411b73.tar.xz | |
html: parse DOCTYPE into name and public and system identifiers
Pass tests2.dat, test 59:
<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
| <!DOCTYPE <!doctype>
| <html>
| <head>
| <body>
| ">"
| <!-- <!--x -->
| "-->"
Pass all the tests in doctype01.dat.
Also pass tests2.dat, test 60:
<!doctype html><div><form></form><div></div></div>
R=nigeltao
CC=golang-dev
https://golang.org/cl/5437045
Diffstat (limited to 'src/pkg/html/parse.go')
| -rw-r--r-- | src/pkg/html/parse.go | 58 |
1 files changed, 54 insertions, 4 deletions
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index 36204895b9..041c5390ed 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -321,6 +321,59 @@ func (p *parser) resetInsertionMode() { const whitespace = " \t\r\n\f" +// parseDoctype parses the data from a DoctypeToken into a name, +// public identifier, and system identifier. It returns a Node whose Type +// is DoctypeNode, whose Data is the name, and which has attributes +// named "system" and "public" for the two identifiers if they were present. +func parseDoctype(s string) *Node { + n := &Node{Type: DoctypeNode} + + // Find the name. + space := strings.IndexAny(s, whitespace) + if space == -1 { + space = len(s) + } + n.Data = strings.ToLower(s[:space]) + s = strings.TrimLeft(s[space:], whitespace) + + if len(s) < 6 { + // It can't start with "PUBLIC" or "SYSTEM". + // Ignore the rest of the string. + return n + } + + key := strings.ToLower(s[:6]) + s = s[6:] + for key == "public" || key == "system" { + s = strings.TrimLeft(s, whitespace) + if s == "" { + break + } + quote := s[0] + if quote != '"' && quote != '\'' { + break + } + s = s[1:] + q := strings.IndexRune(s, rune(quote)) + var id string + if q == -1 { + id = s + s = "" + } else { + id = s[:q] + s = s[q+1:] + } + n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) + if key == "public" { + key = "system" + } else { + key = "" + } + } + + return n +} + // Section 11.2.5.4.1. func initialIM(p *parser) bool { switch p.tok.Type { @@ -337,10 +390,7 @@ func initialIM(p *parser) bool { }) return true case DoctypeToken: - p.doc.Add(&Node{ - Type: DoctypeNode, - Data: p.tok.Data, - }) + p.doc.Add(parseDoctype(p.tok.Data)) p.im = beforeHTMLIM return true } |
