| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build ignore |
| |
| package main |
| |
| import ( |
| "bytes" |
| "encoding/json" |
| "fmt" |
| "log" |
| "strings" |
| |
| "golang.org/x/text/internal/gen" |
| ) |
| |
| type group struct { |
| Encodings []struct { |
| Labels []string |
| Name string |
| } |
| } |
| |
| func main() { |
| gen.Init() |
| |
| r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json") |
| var groups []group |
| if err := json.NewDecoder(r).Decode(&groups); err != nil { |
| log.Fatalf("Error reading encodings.json: %v", err) |
| } |
| |
| w := &bytes.Buffer{} |
| fmt.Fprintln(w, "type htmlEncoding byte") |
| fmt.Fprintln(w, "const (") |
| for i, g := range groups { |
| for _, e := range g.Encodings { |
| key := strings.ToLower(e.Name) |
| name := consts[key] |
| if name == "" { |
| log.Fatalf("No const defined for %s.", key) |
| } |
| if i == 0 { |
| fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) |
| } else { |
| fmt.Fprintf(w, "%s\n", name) |
| } |
| } |
| } |
| fmt.Fprintln(w, "numEncodings") |
| fmt.Fprint(w, ")\n\n") |
| |
| fmt.Fprintln(w, "var canonical = [numEncodings]string{") |
| for _, g := range groups { |
| for _, e := range g.Encodings { |
| fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name)) |
| } |
| } |
| fmt.Fprint(w, "}\n\n") |
| |
| fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") |
| for _, g := range groups { |
| for _, e := range g.Encodings { |
| for _, l := range e.Labels { |
| key := strings.ToLower(e.Name) |
| name := consts[key] |
| fmt.Fprintf(w, "%q: %s,\n", l, name) |
| } |
| } |
| } |
| fmt.Fprint(w, "}\n\n") |
| |
| var tags []string |
| fmt.Fprintln(w, "var localeMap = []htmlEncoding{") |
| for _, loc := range locales { |
| tags = append(tags, loc.tag) |
| fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) |
| } |
| fmt.Fprint(w, "}\n\n") |
| |
| fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) |
| |
| gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) |
| } |
| |
| // consts maps canonical encoding name to internal constant. |
| var consts = map[string]string{ |
| "utf-8": "utf8", |
| "ibm866": "ibm866", |
| "iso-8859-2": "iso8859_2", |
| "iso-8859-3": "iso8859_3", |
| "iso-8859-4": "iso8859_4", |
| "iso-8859-5": "iso8859_5", |
| "iso-8859-6": "iso8859_6", |
| "iso-8859-7": "iso8859_7", |
| "iso-8859-8": "iso8859_8", |
| "iso-8859-8-i": "iso8859_8I", |
| "iso-8859-10": "iso8859_10", |
| "iso-8859-13": "iso8859_13", |
| "iso-8859-14": "iso8859_14", |
| "iso-8859-15": "iso8859_15", |
| "iso-8859-16": "iso8859_16", |
| "koi8-r": "koi8r", |
| "koi8-u": "koi8u", |
| "macintosh": "macintosh", |
| "windows-874": "windows874", |
| "windows-1250": "windows1250", |
| "windows-1251": "windows1251", |
| "windows-1252": "windows1252", |
| "windows-1253": "windows1253", |
| "windows-1254": "windows1254", |
| "windows-1255": "windows1255", |
| "windows-1256": "windows1256", |
| "windows-1257": "windows1257", |
| "windows-1258": "windows1258", |
| "x-mac-cyrillic": "macintoshCyrillic", |
| "gbk": "gbk", |
| "gb18030": "gb18030", |
| // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG |
| "big5": "big5", |
| "euc-jp": "eucjp", |
| "iso-2022-jp": "iso2022jp", |
| "shift_jis": "shiftJIS", |
| "euc-kr": "euckr", |
| "replacement": "replacement", |
| "utf-16be": "utf16be", |
| "utf-16le": "utf16le", |
| "x-user-defined": "xUserDefined", |
| } |
| |
| // locales is taken from |
| // https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm. |
| var locales = []struct{ tag, name string }{ |
| // The default value. Explicitly state latin to benefit from the exact |
| // script option, while still making 1252 the default encoding for languages |
| // written in Latin script. |
| {"und_Latn", "windows-1252"}, |
| {"ar", "windows-1256"}, |
| {"ba", "windows-1251"}, |
| {"be", "windows-1251"}, |
| {"bg", "windows-1251"}, |
| {"cs", "windows-1250"}, |
| {"el", "iso-8859-7"}, |
| {"et", "windows-1257"}, |
| {"fa", "windows-1256"}, |
| {"he", "windows-1255"}, |
| {"hr", "windows-1250"}, |
| {"hu", "iso-8859-2"}, |
| {"ja", "shift_jis"}, |
| {"kk", "windows-1251"}, |
| {"ko", "euc-kr"}, |
| {"ku", "windows-1254"}, |
| {"ky", "windows-1251"}, |
| {"lt", "windows-1257"}, |
| {"lv", "windows-1257"}, |
| {"mk", "windows-1251"}, |
| {"pl", "iso-8859-2"}, |
| {"ru", "windows-1251"}, |
| {"sah", "windows-1251"}, |
| {"sk", "windows-1250"}, |
| {"sl", "iso-8859-2"}, |
| {"sr", "windows-1251"}, |
| {"tg", "windows-1251"}, |
| {"th", "windows-874"}, |
| {"tr", "windows-1254"}, |
| {"tt", "windows-1251"}, |
| {"uk", "windows-1251"}, |
| {"vi", "windows-1258"}, |
| {"zh-hans", "gb18030"}, |
| {"zh-hant", "big5"}, |
| } |