| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package html |
| |
| import ( |
| "strings" |
| ) |
| |
| // parseDoctype parses the data from a DoctypeToken into a name, |
| // public identifier, and system identifier. It returns a Node whose Type |
| // is DoctypeNode, whose Data is the name, and which has attributes |
| // named "system" and "public" for the two identifiers if they were present. |
| // quirks is whether the document should be parsed in "quirks mode". |
| func parseDoctype(s string) (n *Node, quirks bool) { |
| n = &Node{Type: DoctypeNode} |
| |
| // Find the name. |
| space := strings.IndexAny(s, whitespace) |
| if space == -1 { |
| space = len(s) |
| } |
| n.Data = s[:space] |
| // The comparison to "html" is case-sensitive. |
| if n.Data != "html" { |
| quirks = true |
| } |
| n.Data = strings.ToLower(n.Data) |
| s = strings.TrimLeft(s[space:], whitespace) |
| |
| if len(s) < 6 { |
| // It can't start with "PUBLIC" or "SYSTEM". |
| // Ignore the rest of the string. |
| return n, quirks || s != "" |
| } |
| |
| key := strings.ToLower(s[:6]) |
| s = s[6:] |
| for key == "public" || key == "system" { |
| s = strings.TrimLeft(s, whitespace) |
| if s == "" { |
| break |
| } |
| quote := s[0] |
| if quote != '"' && quote != '\'' { |
| break |
| } |
| s = s[1:] |
| q := strings.IndexRune(s, rune(quote)) |
| var id string |
| if q == -1 { |
| id = s |
| s = "" |
| } else { |
| id = s[:q] |
| s = s[q+1:] |
| } |
| n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) |
| if key == "public" { |
| key = "system" |
| } else { |
| key = "" |
| } |
| } |
| |
| if key != "" || s != "" { |
| quirks = true |
| } else if len(n.Attr) > 0 { |
| if n.Attr[0].Key == "public" { |
| public := strings.ToLower(n.Attr[0].Val) |
| switch public { |
| case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": |
| quirks = true |
| default: |
| for _, q := range quirkyIDs { |
| if strings.HasPrefix(public, q) { |
| quirks = true |
| break |
| } |
| } |
| } |
| // The following two public IDs only cause quirks mode if there is no system ID. |
| if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || |
| strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { |
| quirks = true |
| } |
| } |
| if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && |
| strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { |
| quirks = true |
| } |
| } |
| |
| return n, quirks |
| } |
| |
| // quirkyIDs is a list of public doctype identifiers that cause a document |
| // to be interpreted in quirks mode. The identifiers should be in lower case. |
| var quirkyIDs = []string{ |
| "+//silmaril//dtd html pro v0r11 19970101//", |
| "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", |
| "-//as//dtd html 3.0 aswedit + extensions//", |
| "-//ietf//dtd html 2.0 level 1//", |
| "-//ietf//dtd html 2.0 level 2//", |
| "-//ietf//dtd html 2.0 strict level 1//", |
| "-//ietf//dtd html 2.0 strict level 2//", |
| "-//ietf//dtd html 2.0 strict//", |
| "-//ietf//dtd html 2.0//", |
| "-//ietf//dtd html 2.1e//", |
| "-//ietf//dtd html 3.0//", |
| "-//ietf//dtd html 3.2 final//", |
| "-//ietf//dtd html 3.2//", |
| "-//ietf//dtd html 3//", |
| "-//ietf//dtd html level 0//", |
| "-//ietf//dtd html level 1//", |
| "-//ietf//dtd html level 2//", |
| "-//ietf//dtd html level 3//", |
| "-//ietf//dtd html strict level 0//", |
| "-//ietf//dtd html strict level 1//", |
| "-//ietf//dtd html strict level 2//", |
| "-//ietf//dtd html strict level 3//", |
| "-//ietf//dtd html strict//", |
| "-//ietf//dtd html//", |
| "-//metrius//dtd metrius presentational//", |
| "-//microsoft//dtd internet explorer 2.0 html strict//", |
| "-//microsoft//dtd internet explorer 2.0 html//", |
| "-//microsoft//dtd internet explorer 2.0 tables//", |
| "-//microsoft//dtd internet explorer 3.0 html strict//", |
| "-//microsoft//dtd internet explorer 3.0 html//", |
| "-//microsoft//dtd internet explorer 3.0 tables//", |
| "-//netscape comm. corp.//dtd html//", |
| "-//netscape comm. corp.//dtd strict html//", |
| "-//o'reilly and associates//dtd html 2.0//", |
| "-//o'reilly and associates//dtd html extended 1.0//", |
| "-//o'reilly and associates//dtd html extended relaxed 1.0//", |
| "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", |
| "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", |
| "-//spyglass//dtd html 2.0 extended//", |
| "-//sq//dtd html 2.0 hotmetal + extensions//", |
| "-//sun microsystems corp.//dtd hotjava html//", |
| "-//sun microsystems corp.//dtd hotjava strict html//", |
| "-//w3c//dtd html 3 1995-03-24//", |
| "-//w3c//dtd html 3.2 draft//", |
| "-//w3c//dtd html 3.2 final//", |
| "-//w3c//dtd html 3.2//", |
| "-//w3c//dtd html 3.2s draft//", |
| "-//w3c//dtd html 4.0 frameset//", |
| "-//w3c//dtd html 4.0 transitional//", |
| "-//w3c//dtd html experimental 19960712//", |
| "-//w3c//dtd html experimental 970421//", |
| "-//w3c//dtd w3 html//", |
| "-//w3o//dtd w3 html 3.0//", |
| "-//webtechs//dtd mozilla html 2.0//", |
| "-//webtechs//dtd mozilla html//", |
| } |