internal/format: add API for symbol and digit info
Also exported SymbolType and related constants.
Change-Id: I0a934d315fe1a1e4ab803ddfa4fdcd1f8ea6c0be
Reviewed-on: https://go-review.googlesource.com/19198
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/internal/format/common.go b/internal/format/common.go
index f458f5f..23c7e65 100644
--- a/internal/format/common.go
+++ b/internal/format/common.go
@@ -13,23 +13,24 @@
zero [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
}
-type symbolType int
+// A SymbolType identifies a symbol of a specific kind.
+type SymbolType int
const (
- symDecimal symbolType = iota
- symGroup
- symList
- symPercentSign
- symPlusSign
- symMinusSign
- symExponential
- symSuperscriptingExponent
- symPerMille
- symInfinity
- symNan
- symTimeSeparator
+ SymDecimal SymbolType = iota
+ SymGroup
+ SymList
+ SymPercentSign
+ SymPlusSign
+ SymMinusSign
+ SymExponential
+ SymSuperscriptingExponent
+ SymPerMille
+ SymInfinity
+ SymNan
+ SymTimeSeparator
- numSymbolTypes
+ NumSymbolTypes
)
type altSymData struct {
diff --git a/internal/format/gen.go b/internal/format/gen.go
index 98ace3d..dbc8445 100644
--- a/internal/format/gen.go
+++ b/internal/format/gen.go
@@ -145,7 +145,7 @@
nNumberSystems := numberSystem(len(numberSystemMap))
- type symbols [numSymbolTypes]string
+ type symbols [NumSymbolTypes]string
type key struct {
tag int // from language.CompactIndex
@@ -177,25 +177,25 @@
continue
}
symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
- symDecimal: getFirst("decimal", sym.Decimal),
- symGroup: getFirst("group", sym.Group),
- symList: getFirst("list", sym.List),
- symPercentSign: getFirst("percentSign", sym.PercentSign),
- symPlusSign: getFirst("plusSign", sym.PlusSign),
- symMinusSign: getFirst("minusSign", sym.MinusSign),
- symExponential: getFirst("exponential", sym.Exponential),
- symSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent),
- symPerMille: getFirst("perMille", sym.PerMille),
- symInfinity: getFirst("infinity", sym.Infinity),
- symNan: getFirst("nan", sym.Nan),
- symTimeSeparator: getFirst("timeSeparator", sym.TimeSeparator),
+ SymDecimal: getFirst("decimal", sym.Decimal),
+ SymGroup: getFirst("group", sym.Group),
+ SymList: getFirst("list", sym.List),
+ SymPercentSign: getFirst("percentSign", sym.PercentSign),
+ SymPlusSign: getFirst("plusSign", sym.PlusSign),
+ SymMinusSign: getFirst("minusSign", sym.MinusSign),
+ SymExponential: getFirst("exponential", sym.Exponential),
+ SymSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent),
+ SymPerMille: getFirst("perMille", sym.PerMille),
+ SymInfinity: getFirst("infinity", sym.Infinity),
+ SymNan: getFirst("nan", sym.Nan),
+ SymTimeSeparator: getFirst("timeSeparator", sym.TimeSeparator),
}
}
}
// Expand all values.
for k, syms := range symbolMap {
- for t := symDecimal; t < numSymbolTypes; t++ {
+ for t := SymDecimal; t < NumSymbolTypes; t++ {
p := k.tag
for syms[t] == "" {
p = int(internal.Parent[p])
@@ -215,7 +215,7 @@
m := map[symbols]int{}
sb := stringset.NewBuilder()
- symIndex := [][numSymbolTypes]byte{}
+ symIndex := [][NumSymbolTypes]byte{}
for ns := numberSystem(0); ns < nNumberSystems; ns++ {
for _, l := range data.Locales() {
@@ -227,8 +227,8 @@
if _, ok := m[*s]; !ok {
m[*s] = len(symIndex)
sb.Add(s[:]...)
- var x [numSymbolTypes]byte
- for i := symDecimal; i < numSymbolTypes; i++ {
+ var x [NumSymbolTypes]byte
+ for i := SymDecimal; i < NumSymbolTypes; i++ {
x[i] = byte(sb.Index((*s)[i]))
}
symIndex = append(symIndex, x)
diff --git a/internal/format/gen_common.go b/internal/format/gen_common.go
index 50f39b9..f095c31 100644
--- a/internal/format/gen_common.go
+++ b/internal/format/gen_common.go
@@ -17,23 +17,24 @@
zero [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
}
-type symbolType int
+// A SymbolType identifies a symbol of a specific kind.
+type SymbolType int
const (
- symDecimal symbolType = iota
- symGroup
- symList
- symPercentSign
- symPlusSign
- symMinusSign
- symExponential
- symSuperscriptingExponent
- symPerMille
- symInfinity
- symNan
- symTimeSeparator
+ SymDecimal SymbolType = iota
+ SymGroup
+ SymList
+ SymPercentSign
+ SymPlusSign
+ SymMinusSign
+ SymExponential
+ SymSuperscriptingExponent
+ SymPerMille
+ SymInfinity
+ SymNan
+ SymTimeSeparator
- numSymbolTypes
+ NumSymbolTypes
)
type altSymData struct {
diff --git a/internal/format/number.go b/internal/format/number.go
new file mode 100644
index 0000000..6971cdd
--- /dev/null
+++ b/internal/format/number.go
@@ -0,0 +1,134 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package format
+
+import (
+ "unicode/utf8"
+
+ "golang.org/x/text/internal"
+ "golang.org/x/text/language"
+)
+
+// NumberInfo holds number formatting configuration data.
+type NumberInfo struct {
+ system numberSystemData // numbering system information
+ symIndex byte // index to symbols
+}
+
+// NumberInfoFromLangID returns a NumberInfo for the given compact language
+// identifier and numbering system identifier. If numberSystem is the empty
+// string, the default numbering system will be taken for that language.
+func NumberInfoFromLangID(compactIndex int, numberSystem string) NumberInfo {
+ p := langToDefaults[compactIndex]
+ // Lookup the entry for the language.
+ pSymIndex := byte(0) // Default: Latin, default symbols
+ system, ok := numberSystemMap[numberSystem]
+ if !ok {
+ // Take the value for the default numbering system. This is by far the
+ // most common case as an alternative numbering system is hardly used.
+ if p&0x80 == 0 {
+ pSymIndex = p
+ } else {
+ // Take the first entry from the alternatives list.
+ data := langToAlt[p&^0x80]
+ pSymIndex = data.symIndex
+ system = data.numberSystem
+ }
+ } else {
+ langIndex := compactIndex
+ ns := system
+ outerLoop:
+ for {
+ if p&0x80 == 0 {
+ if ns == 0 {
+ // The index directly points to the symbol data.
+ pSymIndex = p
+ break
+ }
+ // Move to the parent and retry.
+ langIndex = int(internal.Parent[langIndex])
+ }
+ // The index points to a list of symbol data indexes.
+ for _, e := range langToAlt[p&^0x80:] {
+ if int(e.compactTag) != langIndex {
+ if langIndex == 0 {
+ // The CLDR root defines full symbol information for all
+ // numbering systems (even though mostly by means of
+ // aliases). This means that we will never fall back to
+ // the default of the language. Also, the loop is
+ // guaranteed to terminate as a consequence.
+ ns = numLatn
+ // Fall back to Latin and start from the original
+ // language. See
+ // http://unicode.org/reports/tr35/#Locale_Inheritance.
+ langIndex = compactIndex
+ } else {
+ // Fall back to parent.
+ langIndex = int(internal.Parent[langIndex])
+ }
+ break
+ }
+ if e.numberSystem == ns {
+ pSymIndex = e.symIndex
+ break outerLoop
+ }
+ }
+ }
+ }
+ if int(system) >= len(numSysData) { // algorithmic
+ // Will generate ASCII digits in case the user inadvertently calls
+ // WriteDigit or Digit on it.
+ d := numSysData[0]
+ d.id = system
+ return NumberInfo{
+ system: d,
+ symIndex: pSymIndex,
+ }
+ }
+ return NumberInfo{
+ system: numSysData[system],
+ symIndex: pSymIndex,
+ }
+}
+
+// NumberInfoFromTag returns a NumberInfo for the given language tag.
+func NumberInfoFromTag(t language.Tag) NumberInfo {
+ for {
+ if index, ok := language.CompactIndex(t); ok {
+ return NumberInfoFromLangID(index, t.TypeForKey("nu"))
+ }
+ t = t.Parent()
+ }
+}
+
+// IsDecimal reports if the numbering system can convert decimal to native
+// symbols one-to-one.
+func (n NumberInfo) IsDecimal() bool {
+ return int(n.system.id) < len(numSysData)
+}
+
+// WriteDigit writes the UTF-8 sequence for n corresponding to the given ASCII
+// digit to dst and reports the number of bytes written. dst must be large
+// enough to hold the rune (can be up to utf8.UTFMax bytes).
+func (n NumberInfo) WriteDigit(dst []byte, asciiDigit rune) int {
+ copy(dst, n.system.zero[:n.system.digitSize])
+ dst[n.system.digitSize-1] += byte(asciiDigit - '0')
+ return int(n.system.digitSize)
+}
+
+// Digit returns the digit for the numbering system for the corresponding ASCII
+// value. For example, ni.Digit('3') could return '三'. Note that the argument
+// is the rune constant '3', which equals 51, not the integer constant 3.
+func (n NumberInfo) Digit(asciiDigit rune) rune {
+ var x [utf8.UTFMax]byte
+ n.WriteDigit(x[:], asciiDigit)
+ r, _ := utf8.DecodeRune(x[:])
+ return r
+}
+
+// Symbol returns the string for the given symbol type.
+func (n NumberInfo) Symbol(t SymbolType) string {
+ return symData.Elem(int(symIndex[n.symIndex][t]))
+}
diff --git a/internal/format/number_test.go b/internal/format/number_test.go
new file mode 100644
index 0000000..62431b3
--- /dev/null
+++ b/internal/format/number_test.go
@@ -0,0 +1,57 @@
+package format
+
+import (
+ "testing"
+
+ "golang.org/x/text/language"
+)
+
+func TestNumberInfo(t *testing.T) {
+ testCases := []struct {
+ lang string
+ sym SymbolType
+ wantSym string
+ wantNine rune
+ }{
+ {"und", SymDecimal, ".", '9'},
+ {"de", SymGroup, ".", '9'},
+ {"de-BE", SymGroup, ".", '9'}, // inherits from de (no number data in CLDR)
+ {"de-BE-oxendict", SymGroup, ".", '9'}, // inherits from de (no compact index)
+
+ // U+096F DEVANAGARI DIGIT NINE ('९')
+ {"de-BE-u-nu-deva", SymGroup, ".", '\u096f'}, // miss -> latn -> de
+ {"de-Cyrl-BE", SymGroup, ",", '9'}, // inherits from root
+ {"de-CH", SymGroup, "'", '9'}, // overrides values in de
+ {"de-CH-oxendict", SymGroup, "'", '9'}, // inherits from de-CH (no compact index)
+ {"de-CH-u-nu-deva", SymGroup, "'", '\u096f'}, // miss -> latn -> de-CH
+
+ {"pa", SymExponential, "E", '9'},
+
+ // "×۱۰^" -> U+00d7 U+06f1 U+06f0^"
+ // U+06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
+ // U+06F1 EXTENDED ARABIC-INDIC DIGIT ONE
+ // U+06F9 EXTENDED ARABIC-INDIC DIGIT NINE
+ {"pa-u-nu-arabext", SymExponential, "\u00d7\u06f1\u06f0^", '\u06f9'},
+
+ // "གྲངས་མེད" - > U+0f42 U+0fb2 U+0f44 U+0f66 U+0f0b U+0f58 U+0f7a U+0f51
+ // Examples:
+ // U+0F29 TIBETAN DIGIT NINE (༩)
+ {"dz", SymInfinity, "\u0f42\u0fb2\u0f44\u0f66\u0f0b\u0f58\u0f7a\u0f51", '\u0f29'}, // defaults to tibt
+ {"dz-u-nu-latn", SymInfinity, "∞", '9'}, // select alternative
+ {"dz-u-nu-tibt", SymInfinity, "\u0f42\u0fb2\u0f44\u0f66\u0f0b\u0f58\u0f7a\u0f51", '\u0f29'},
+ {"en-u-nu-tibt", SymInfinity, "∞", '\u0f29'},
+
+ // algorithmic number systems fall back to ASCII if Digits is used.
+ {"en-u-nu-hanidec", SymPlusSign, "+", '9'},
+ {"en-u-nu-roman", SymPlusSign, "+", '9'},
+ }
+ for _, tc := range testCases {
+ info := NumberInfoFromTag(language.MustParse(tc.lang))
+ if got := info.Symbol(tc.sym); got != tc.wantSym {
+ t.Errorf("%s:%v:sym: got %q; want %q", tc.lang, tc.sym, got, tc.wantSym)
+ }
+ if got := info.Digit('9'); got != tc.wantNine {
+ t.Errorf("%s:%v:nine: got %q; want %q", tc.lang, tc.sym, got, tc.wantNine)
+ }
+ }
+}
diff --git a/internal/format/tables_test.go b/internal/format/tables_test.go
index 30d4465..888eaf1 100644
--- a/internal/format/tables_test.go
+++ b/internal/format/tables_test.go
@@ -9,9 +9,7 @@
"log"
"reflect"
"testing"
- "unicode/utf8"
- "golang.org/x/text/internal"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
@@ -41,14 +39,10 @@
if int(n) >= len(numSysData) {
continue
}
- d := numSysData[n]
- val := byte(0)
+ info := NumberInfoFromLangID(0, ns.Id)
+ val := '0'
for _, rWant := range ns.Digits {
- var x [utf8.UTFMax]byte
- copy(x[:], d.zero[:d.digitSize])
- x[d.digitSize-1] += val
- rGot, _ := utf8.DecodeRune(x[:])
- if rGot != rWant {
+ if rGot := info.Digit(val); rGot != rWant {
t.Errorf("%s:%d: got %U; want %U", ns.Id, val, rGot, rWant)
}
val++
@@ -94,22 +88,23 @@
}
testCases := []struct {
name string
- st symbolType
+ st SymbolType
x interface{}
}{
- {"Decimal", symDecimal, sym.Decimal},
- {"Group", symGroup, sym.Group},
- {"List", symList, sym.List},
- {"PercentSign", symPercentSign, sym.PercentSign},
- {"PlusSign", symPlusSign, sym.PlusSign},
- {"MinusSign", symMinusSign, sym.MinusSign},
- {"Exponential", symExponential, sym.Exponential},
- {"SuperscriptingExponent", symSuperscriptingExponent, sym.SuperscriptingExponent},
- {"PerMille", symPerMille, sym.PerMille},
- {"Infinity", symInfinity, sym.Infinity},
- {"NaN", symNan, sym.Nan},
- {"TimeSeparator", symTimeSeparator, sym.TimeSeparator},
+ {"Decimal", SymDecimal, sym.Decimal},
+ {"Group", SymGroup, sym.Group},
+ {"List", SymList, sym.List},
+ {"PercentSign", SymPercentSign, sym.PercentSign},
+ {"PlusSign", SymPlusSign, sym.PlusSign},
+ {"MinusSign", SymMinusSign, sym.MinusSign},
+ {"Exponential", SymExponential, sym.Exponential},
+ {"SuperscriptingExponent", SymSuperscriptingExponent, sym.SuperscriptingExponent},
+ {"PerMille", SymPerMille, sym.PerMille},
+ {"Infinity", SymInfinity, sym.Infinity},
+ {"NaN", SymNan, sym.Nan},
+ {"TimeSeparator", SymTimeSeparator, sym.TimeSeparator},
}
+ info := NumberInfoFromLangID(langIndex, sym.NumberSystem)
for _, tc := range testCases {
// Extract the wanted value.
v := reflect.ValueOf(tc.x)
@@ -120,36 +115,7 @@
t.Fatalf("Multiple values of %q within single symbol not supported.", tc.name)
}
want := v.Index(0).MethodByName("Data").Call(nil)[0].String()
-
- // Extract the value from the table.
- ns := numberSystemMap[sym.NumberSystem]
- strIndex := -1
- for strIndex == -1 {
- index := langToDefaults[langIndex]
- if index&0x80 == 0 && ns == 0 {
- // The index directly points to the symbol data.
- strIndex = int(symIndex[index][tc.st])
- continue
- }
- // The index points to a list of symbol data indexes.
- for _, e := range langToAlt[index&^0x80:] {
- if int(e.compactTag) != langIndex {
- if langIndex == 0 {
- // Fall back to Latin.
- ns = 0
- } else {
- // Fall back to parent.
- langIndex = int(internal.Parent[langIndex])
- }
- break
- }
- if e.numberSystem == ns {
- strIndex = int(symIndex[e.symIndex][tc.st])
- break
- }
- }
- }
- got := symData.Elem(strIndex)
+ got := info.Symbol(tc.st)
if got != want {
t.Errorf("%s:%s:%s: got %q; want %q", lang, sym.NumberSystem, tc.name, got, want)
}