| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build ignore |
| // +build ignore |
| |
| package main |
| |
| import ( |
| "flag" |
| "fmt" |
| "log" |
| "reflect" |
| "strings" |
| "unicode/utf8" |
| |
| "golang.org/x/text/internal/gen" |
| "golang.org/x/text/internal/language" |
| "golang.org/x/text/internal/language/compact" |
| "golang.org/x/text/internal/number" |
| "golang.org/x/text/internal/stringset" |
| "golang.org/x/text/unicode/cldr" |
| ) |
| |
| var ( |
| test = flag.Bool("test", false, |
| "test existing tables; can be used to compare web data with package data.") |
| outputFile = flag.String("output", "tables.go", "output file") |
| outputTestFile = flag.String("testoutput", "data_test.go", "output file") |
| |
| draft = flag.String("draft", |
| "contributed", |
| `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) |
| ) |
| |
| func main() { |
| gen.Init() |
| |
| const pkg = "number" |
| |
| gen.Repackage("gen_common.go", "common.go", pkg) |
| // Read the CLDR zip file. |
| r := gen.OpenCLDRCoreZip() |
| defer r.Close() |
| |
| d := &cldr.Decoder{} |
| d.SetDirFilter("supplemental", "main") |
| d.SetSectionFilter("numbers", "numberingSystem") |
| data, err := d.DecodeZip(r) |
| if err != nil { |
| log.Fatalf("DecodeZip: %v", err) |
| } |
| |
| w := gen.NewCodeWriter() |
| defer w.WriteGoFile(*outputFile, pkg) |
| |
| fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`) |
| |
| gen.WriteCLDRVersion(w) |
| |
| genNumSystem(w, data) |
| genSymbols(w, data) |
| genFormats(w, data) |
| } |
| |
| var systemMap = map[string]system{"latn": 0} |
| |
| func getNumberSystem(str string) system { |
| ns, ok := systemMap[str] |
| if !ok { |
| log.Fatalf("No index for numbering system %q", str) |
| } |
| return ns |
| } |
| |
| func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) { |
| numSysData := []systemData{ |
| {digitSize: 1, zero: [4]byte{'0'}}, |
| } |
| |
| for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { |
| if len(ns.Digits) == 0 { |
| continue |
| } |
| switch ns.Id { |
| case "latn": |
| // hard-wired |
| continue |
| case "hanidec": |
| // non-consecutive digits: treat as "algorithmic" |
| continue |
| } |
| |
| zero, sz := utf8.DecodeRuneInString(ns.Digits) |
| if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte |
| log.Fatalf("Last byte of zero value overflows for %s", ns.Id) |
| } |
| |
| i := rune(0) |
| for _, r := range ns.Digits { |
| // Verify that we can do simple math on the UTF-8 byte sequence |
| // of zero to get the digit. |
| if zero+i != r { |
| // Runes not consecutive. |
| log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r) |
| } |
| i++ |
| } |
| var x [utf8.UTFMax]byte |
| utf8.EncodeRune(x[:], zero) |
| id := system(len(numSysData)) |
| systemMap[ns.Id] = id |
| numSysData = append(numSysData, systemData{ |
| id: id, |
| digitSize: byte(sz), |
| zero: x, |
| }) |
| } |
| w.WriteVar("numSysData", numSysData) |
| |
| algoID := system(len(numSysData)) |
| fmt.Fprintln(w, "const (") |
| for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { |
| id, ok := systemMap[ns.Id] |
| if !ok { |
| id = algoID |
| systemMap[ns.Id] = id |
| algoID++ |
| } |
| fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id) |
| } |
| fmt.Fprintln(w, "numNumberSystems") |
| fmt.Fprintln(w, ")") |
| |
| fmt.Fprintln(w, "var systemMap = map[string]system{") |
| for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { |
| fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id)) |
| w.Size += len(ns.Id) + 16 + 1 // very coarse approximation |
| } |
| fmt.Fprintln(w, "}") |
| } |
| |
| func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { |
| d, err := cldr.ParseDraft(*draft) |
| if err != nil { |
| log.Fatalf("invalid draft level: %v", err) |
| } |
| |
| nNumberSystems := system(len(systemMap)) |
| |
| type symbols [NumSymbolTypes]string |
| |
| type key struct { |
| tag compact.ID |
| system system |
| } |
| symbolMap := map[key]*symbols{} |
| |
| defaults := map[compact.ID]system{} |
| |
| for _, lang := range data.Locales() { |
| ldml := data.RawLDML(lang) |
| if ldml.Numbers == nil { |
| continue |
| } |
| langIndex, ok := compact.FromTag(language.MustParse(lang)) |
| if !ok { |
| log.Fatalf("No compact index for language %s", lang) |
| } |
| if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 { |
| defaults[langIndex] = getNumberSystem(d[0].Data()) |
| } |
| |
| syms := cldr.MakeSlice(&ldml.Numbers.Symbols) |
| syms.SelectDraft(d) |
| |
| getFirst := func(name string, x interface{}) string { |
| v := reflect.ValueOf(x) |
| slice := cldr.MakeSlice(x) |
| slice.SelectAnyOf("alt", "", "alt") |
| if reflect.Indirect(v).Len() == 0 { |
| return "" |
| } else if reflect.Indirect(v).Len() > 1 { |
| log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name) |
| } |
| return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String() |
| } |
| |
| for _, sym := range ldml.Numbers.Symbols { |
| if sym.NumberSystem == "" { |
| // This is just linking the default of root to "latn". |
| continue |
| } |
| symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{ |
| SymDecimal: getFirst("decimal", &sym.Decimal), |
| SymGroup: getFirst("group", &sym.Group), |
| SymList: getFirst("list", &sym.List), |
| SymPercentSign: getFirst("percentSign", &sym.PercentSign), |
| SymPlusSign: getFirst("plusSign", &sym.PlusSign), |
| SymMinusSign: getFirst("minusSign", &sym.MinusSign), |
| SymExponential: getFirst("exponential", &sym.Exponential), |
| SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent), |
| SymPerMille: getFirst("perMille", &sym.PerMille), |
| SymInfinity: getFirst("infinity", &sym.Infinity), |
| SymNan: getFirst("nan", &sym.Nan), |
| SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator), |
| } |
| } |
| } |
| |
| // Expand all values. |
| for k, syms := range symbolMap { |
| for t := SymDecimal; t < NumSymbolTypes; t++ { |
| p := k.tag |
| for syms[t] == "" { |
| p = p.Parent() |
| if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" { |
| syms[t] = (*pSyms)[t] |
| break |
| } |
| if p == 0 /* und */ { |
| // Default to root, latn. |
| syms[t] = (*symbolMap[key{}])[t] |
| } |
| } |
| } |
| } |
| |
| // Unique the symbol sets and write the string data. |
| m := map[symbols]int{} |
| sb := stringset.NewBuilder() |
| |
| symIndex := [][NumSymbolTypes]byte{} |
| |
| for ns := system(0); ns < nNumberSystems; ns++ { |
| for _, l := range data.Locales() { |
| langIndex, _ := compact.FromTag(language.MustParse(l)) |
| s := symbolMap[key{langIndex, ns}] |
| if s == nil { |
| continue |
| } |
| if _, ok := m[*s]; !ok { |
| m[*s] = len(symIndex) |
| sb.Add(s[:]...) |
| var x [NumSymbolTypes]byte |
| for i := SymDecimal; i < NumSymbolTypes; i++ { |
| x[i] = byte(sb.Index((*s)[i])) |
| } |
| symIndex = append(symIndex, x) |
| } |
| } |
| } |
| w.WriteVar("symIndex", symIndex) |
| w.WriteVar("symData", sb.Set()) |
| |
| // resolveSymbolIndex gets the index from the closest matching locale, |
| // including the locale itself. |
| resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset { |
| for { |
| if sym := symbolMap[key{langIndex, ns}]; sym != nil { |
| return symOffset(m[*sym]) |
| } |
| if langIndex == 0 { |
| return 0 // und, latn |
| } |
| langIndex = langIndex.Parent() |
| } |
| } |
| |
| // Create an index with the symbols for each locale for the latn numbering |
| // system. If this is not the default, or the only one, for a locale, we |
| // will overwrite the value later. |
| var langToDefaults [compact.NumCompactTags]symOffset |
| for _, l := range data.Locales() { |
| langIndex, _ := compact.FromTag(language.MustParse(l)) |
| langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0) |
| } |
| |
| // Delete redundant entries. |
| for _, l := range data.Locales() { |
| langIndex, _ := compact.FromTag(language.MustParse(l)) |
| def := defaults[langIndex] |
| syms := symbolMap[key{langIndex, def}] |
| if syms == nil { |
| continue |
| } |
| for ns := system(0); ns < nNumberSystems; ns++ { |
| if ns == def { |
| continue |
| } |
| if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms { |
| delete(symbolMap, key{langIndex, ns}) |
| } |
| } |
| } |
| |
| // Create a sorted list of alternatives per language. This will only need to |
| // be referenced if a user specified an alternative numbering system. |
| var langToAlt []altSymData |
| for _, l := range data.Locales() { |
| langIndex, _ := compact.FromTag(language.MustParse(l)) |
| start := len(langToAlt) |
| if start >= hasNonLatnMask { |
| log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask) |
| } |
| // Create the entry for the default value. |
| def := defaults[langIndex] |
| langToAlt = append(langToAlt, altSymData{ |
| compactTag: langIndex, |
| system: def, |
| symIndex: resolveSymbolIndex(langIndex, def), |
| }) |
| |
| for ns := system(0); ns < nNumberSystems; ns++ { |
| if def == ns { |
| continue |
| } |
| if sym := symbolMap[key{langIndex, ns}]; sym != nil { |
| langToAlt = append(langToAlt, altSymData{ |
| compactTag: langIndex, |
| system: ns, |
| symIndex: resolveSymbolIndex(langIndex, ns), |
| }) |
| } |
| } |
| if def == 0 && len(langToAlt) == start+1 { |
| // No additional data: erase the entry. |
| langToAlt = langToAlt[:start] |
| } else { |
| // Overwrite the entry in langToDefaults. |
| langToDefaults[langIndex] = hasNonLatnMask | symOffset(start) |
| } |
| } |
| w.WriteComment(` |
| langToDefaults maps a compact language index to the default numbering system |
| and default symbol set`) |
| w.WriteVar("langToDefaults", langToDefaults) |
| |
| w.WriteComment(` |
| langToAlt is a list of numbering system and symbol set pairs, sorted and |
| marked by compact language index.`) |
| w.WriteVar("langToAlt", langToAlt) |
| } |
| |
| // genFormats generates the lookup table for decimal, scientific and percent |
| // patterns. |
| // |
| // CLDR allows for patterns to be different per language for different numbering |
| // systems. In practice the patterns are set to be consistent for a language |
| // independent of the numbering system. genFormats verifies that no language |
| // deviates from this. |
| func genFormats(w *gen.CodeWriter, data *cldr.CLDR) { |
| d, err := cldr.ParseDraft(*draft) |
| if err != nil { |
| log.Fatalf("invalid draft level: %v", err) |
| } |
| |
| // Fill the first slot with a dummy so we can identify unspecified tags. |
| formats := []number.Pattern{{}} |
| patterns := map[string]int{} |
| |
| // TODO: It would be possible to eliminate two of these slices by having |
| // another indirection and store a reference to the combination of patterns. |
| decimal := make([]byte, compact.NumCompactTags) |
| scientific := make([]byte, compact.NumCompactTags) |
| percent := make([]byte, compact.NumCompactTags) |
| |
| for _, lang := range data.Locales() { |
| ldml := data.RawLDML(lang) |
| if ldml.Numbers == nil { |
| continue |
| } |
| langIndex, ok := compact.FromTag(language.MustParse(lang)) |
| if !ok { |
| log.Fatalf("No compact index for language %s", lang) |
| } |
| type patternSlice []*struct { |
| cldr.Common |
| Numbers string `xml:"numbers,attr"` |
| Count string `xml:"count,attr"` |
| } |
| |
| add := func(name string, tags []byte, ps patternSlice) { |
| sl := cldr.MakeSlice(&ps) |
| sl.SelectDraft(d) |
| if len(ps) == 0 { |
| return |
| } |
| if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] { |
| log.Fatalf("Inconsistent %d patterns for language %s", name, lang) |
| } |
| s := ps[0].Data() |
| |
| index, ok := patterns[s] |
| if !ok { |
| nf, err := number.ParsePattern(s) |
| if err != nil { |
| log.Fatal(err) |
| } |
| index = len(formats) |
| patterns[s] = index |
| formats = append(formats, *nf) |
| } |
| tags[langIndex] = byte(index) |
| } |
| |
| for _, df := range ldml.Numbers.DecimalFormats { |
| for _, l := range df.DecimalFormatLength { |
| if l.Type != "" { |
| continue |
| } |
| for _, f := range l.DecimalFormat { |
| add("decimal", decimal, f.Pattern) |
| } |
| } |
| } |
| for _, df := range ldml.Numbers.ScientificFormats { |
| for _, l := range df.ScientificFormatLength { |
| if l.Type != "" { |
| continue |
| } |
| for _, f := range l.ScientificFormat { |
| add("scientific", scientific, f.Pattern) |
| } |
| } |
| } |
| for _, df := range ldml.Numbers.PercentFormats { |
| for _, l := range df.PercentFormatLength { |
| if l.Type != "" { |
| continue |
| } |
| for _, f := range l.PercentFormat { |
| add("percent", percent, f.Pattern) |
| } |
| } |
| } |
| } |
| |
| // Complete the parent tag array to reflect inheritance. An index of 0 |
| // indicates an unspecified value. |
| for _, data := range [][]byte{decimal, scientific, percent} { |
| for i := range data { |
| p := compact.ID(i) |
| for ; data[p] == 0; p = p.Parent() { |
| } |
| data[i] = data[p] |
| } |
| } |
| w.WriteVar("tagToDecimal", decimal) |
| w.WriteVar("tagToScientific", scientific) |
| w.WriteVar("tagToPercent", percent) |
| |
| value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1) |
| // Break up the lines. This won't give ideal perfect formatting, but it is |
| // better than one huge line. |
| value = strings.Replace(value, ", ", ",\n", -1) |
| fmt.Fprintf(w, "var formats = %s\n", value) |
| } |