| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build ignore |
| |
| package main |
| |
| import ( |
| "encoding/xml" |
| "fmt" |
| "io" |
| "log" |
| "sort" |
| "strconv" |
| "strings" |
| |
| "golang.org/x/text/encoding/internal/identifier" |
| "golang.org/x/text/internal/gen" |
| ) |
| |
| type registry struct { |
| XMLName xml.Name `xml:"registry"` |
| Updated string `xml:"updated"` |
| Registry []struct { |
| ID string `xml:"id,attr"` |
| Record []struct { |
| Name string `xml:"name"` |
| Xref []struct { |
| Type string `xml:"type,attr"` |
| Data string `xml:"data,attr"` |
| } `xml:"xref"` |
| Desc struct { |
| Data string `xml:",innerxml"` |
| } `xml:"description,"` |
| MIB string `xml:"value"` |
| Alias []string `xml:"alias"` |
| MIME string `xml:"preferred_alias"` |
| } `xml:"record"` |
| } `xml:"registry"` |
| } |
| |
| func main() { |
| r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") |
| reg := ®istry{} |
| if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { |
| log.Fatalf("Error decoding charset registry: %v", err) |
| } |
| if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { |
| log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) |
| } |
| |
| x := &indexInfo{} |
| |
| for _, rec := range reg.Registry[0].Record { |
| mib := identifier.MIB(parseInt(rec.MIB)) |
| x.addEntry(mib, rec.Name) |
| for _, a := range rec.Alias { |
| a = strings.Split(a, " ")[0] // strip comments. |
| x.addAlias(a, mib) |
| // MIB name aliases are prefixed with a "cs" (character set) in the |
| // registry to identify them as display names and to ensure that |
| // the name starts with a lowercase letter in case it is used as |
| // an identifier. We remove it to be left with a nice clean name. |
| if strings.HasPrefix(a, "cs") { |
| x.setName(2, a[2:]) |
| } |
| } |
| if rec.MIME != "" { |
| x.addAlias(rec.MIME, mib) |
| x.setName(1, rec.MIME) |
| } |
| } |
| |
| w := gen.NewCodeWriter() |
| |
| fmt.Fprintln(w, `import "golang.org/x/text/encoding/internal/identifier"`) |
| |
| writeIndex(w, x) |
| |
| w.WriteGoFile("tables.go", "ianaindex") |
| } |
| |
| type alias struct { |
| name string |
| mib identifier.MIB |
| } |
| |
| type indexInfo struct { |
| // compacted index from code to MIB |
| codeToMIB []identifier.MIB |
| alias []alias |
| names [][3]string |
| } |
| |
| func (ii *indexInfo) Len() int { |
| return len(ii.codeToMIB) |
| } |
| |
| func (ii *indexInfo) Less(a, b int) bool { |
| return ii.codeToMIB[a] < ii.codeToMIB[b] |
| } |
| |
| func (ii *indexInfo) Swap(a, b int) { |
| ii.codeToMIB[a], ii.codeToMIB[b] = ii.codeToMIB[b], ii.codeToMIB[a] |
| // Co-sort the names. |
| ii.names[a], ii.names[b] = ii.names[b], ii.names[a] |
| } |
| |
| func (ii *indexInfo) setName(i int, name string) { |
| ii.names[len(ii.names)-1][i] = name |
| } |
| |
| func (ii *indexInfo) addEntry(mib identifier.MIB, name string) { |
| ii.names = append(ii.names, [3]string{name, name, name}) |
| ii.addAlias(name, mib) |
| ii.codeToMIB = append(ii.codeToMIB, mib) |
| } |
| |
| func (ii *indexInfo) addAlias(name string, mib identifier.MIB) { |
| // Don't add duplicates for the same mib. Adding duplicate aliases for |
| // different MIBs will cause the compiler to barf on an invalid map: great!. |
| for i := len(ii.alias) - 1; i >= 0 && ii.alias[i].mib == mib; i-- { |
| if ii.alias[i].name == name { |
| return |
| } |
| } |
| ii.alias = append(ii.alias, alias{name, mib}) |
| lower := strings.ToLower(name) |
| if lower != name { |
| ii.addAlias(lower, mib) |
| } |
| } |
| |
| const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer. |
| |
| func writeIndex(w *gen.CodeWriter, x *indexInfo) { |
| sort.Stable(x) |
| |
| // Write constants. |
| fmt.Fprintln(w, "const (") |
| for i, m := range x.codeToMIB { |
| if i == 0 { |
| fmt.Fprintf(w, "enc%d = iota\n", m) |
| } else { |
| fmt.Fprintf(w, "enc%d\n", m) |
| } |
| } |
| fmt.Fprintln(w, "numIANA") |
| fmt.Fprintln(w, ")") |
| |
| w.WriteVar("ianaToMIB", x.codeToMIB) |
| |
| var ianaNames, mibNames []string |
| for _, names := range x.names { |
| n := names[0] |
| if names[0] != names[1] { |
| // MIME names are mostly identical to IANA names. We share the |
| // tables by setting the first byte of the string to an index into |
| // the string itself (< maxMIMENameLen) to the IANA name. The MIME |
| // name immediately follows the index. |
| x := len(names[1]) + 1 |
| if x > maxMIMENameLen { |
| log.Fatalf("MIME name length (%d) > %d", x, maxMIMENameLen) |
| } |
| n = string(x) + names[1] + names[0] |
| } |
| ianaNames = append(ianaNames, n) |
| mibNames = append(mibNames, names[2]) |
| } |
| |
| w.WriteVar("ianaNames", ianaNames) |
| w.WriteVar("mibNames", mibNames) |
| |
| w.WriteComment(` |
| TODO: Instead of using a map, we could use binary search strings doing |
| on-the fly lower-casing per character. This allows to always avoid |
| allocation and will be considerably more compact.`) |
| fmt.Fprintln(w, "var ianaAliases = map[string]int{") |
| for _, a := range x.alias { |
| fmt.Fprintf(w, "%q: enc%d,\n", a.name, a.mib) |
| } |
| fmt.Fprintln(w, "}") |
| } |
| |
| func parseInt(s string) int { |
| x, err := strconv.ParseInt(s, 10, 64) |
| if err != nil { |
| log.Fatalf("Could not parse integer: %v", err) |
| } |
| return int(x) |
| } |