internal/format: add API for symbol and digit info

Also exported SymbolType and related constants.

Change-Id: I0a934d315fe1a1e4ab803ddfa4fdcd1f8ea6c0be
Reviewed-on: https://go-review.googlesource.com/19198
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/internal/format/common.go b/internal/format/common.go
index f458f5f..23c7e65 100644
--- a/internal/format/common.go
+++ b/internal/format/common.go
@@ -13,23 +13,24 @@
 	zero      [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
 }
 
-type symbolType int
+// A SymbolType identifies a symbol of a specific kind.
+type SymbolType int
 
 const (
-	symDecimal symbolType = iota
-	symGroup
-	symList
-	symPercentSign
-	symPlusSign
-	symMinusSign
-	symExponential
-	symSuperscriptingExponent
-	symPerMille
-	symInfinity
-	symNan
-	symTimeSeparator
+	SymDecimal SymbolType = iota
+	SymGroup
+	SymList
+	SymPercentSign
+	SymPlusSign
+	SymMinusSign
+	SymExponential
+	SymSuperscriptingExponent
+	SymPerMille
+	SymInfinity
+	SymNan
+	SymTimeSeparator
 
-	numSymbolTypes
+	NumSymbolTypes
 )
 
 type altSymData struct {
diff --git a/internal/format/gen.go b/internal/format/gen.go
index 98ace3d..dbc8445 100644
--- a/internal/format/gen.go
+++ b/internal/format/gen.go
@@ -145,7 +145,7 @@
 
 	nNumberSystems := numberSystem(len(numberSystemMap))
 
-	type symbols [numSymbolTypes]string
+	type symbols [NumSymbolTypes]string
 
 	type key struct {
 		tag          int // from language.CompactIndex
@@ -177,25 +177,25 @@
 				continue
 			}
 			symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
-				symDecimal:                getFirst("decimal", sym.Decimal),
-				symGroup:                  getFirst("group", sym.Group),
-				symList:                   getFirst("list", sym.List),
-				symPercentSign:            getFirst("percentSign", sym.PercentSign),
-				symPlusSign:               getFirst("plusSign", sym.PlusSign),
-				symMinusSign:              getFirst("minusSign", sym.MinusSign),
-				symExponential:            getFirst("exponential", sym.Exponential),
-				symSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent),
-				symPerMille:               getFirst("perMille", sym.PerMille),
-				symInfinity:               getFirst("infinity", sym.Infinity),
-				symNan:                    getFirst("nan", sym.Nan),
-				symTimeSeparator:          getFirst("timeSeparator", sym.TimeSeparator),
+				SymDecimal:                getFirst("decimal", sym.Decimal),
+				SymGroup:                  getFirst("group", sym.Group),
+				SymList:                   getFirst("list", sym.List),
+				SymPercentSign:            getFirst("percentSign", sym.PercentSign),
+				SymPlusSign:               getFirst("plusSign", sym.PlusSign),
+				SymMinusSign:              getFirst("minusSign", sym.MinusSign),
+				SymExponential:            getFirst("exponential", sym.Exponential),
+				SymSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent),
+				SymPerMille:               getFirst("perMille", sym.PerMille),
+				SymInfinity:               getFirst("infinity", sym.Infinity),
+				SymNan:                    getFirst("nan", sym.Nan),
+				SymTimeSeparator:          getFirst("timeSeparator", sym.TimeSeparator),
 			}
 		}
 	}
 
 	// Expand all values.
 	for k, syms := range symbolMap {
-		for t := symDecimal; t < numSymbolTypes; t++ {
+		for t := SymDecimal; t < NumSymbolTypes; t++ {
 			p := k.tag
 			for syms[t] == "" {
 				p = int(internal.Parent[p])
@@ -215,7 +215,7 @@
 	m := map[symbols]int{}
 	sb := stringset.NewBuilder()
 
-	symIndex := [][numSymbolTypes]byte{}
+	symIndex := [][NumSymbolTypes]byte{}
 
 	for ns := numberSystem(0); ns < nNumberSystems; ns++ {
 		for _, l := range data.Locales() {
@@ -227,8 +227,8 @@
 			if _, ok := m[*s]; !ok {
 				m[*s] = len(symIndex)
 				sb.Add(s[:]...)
-				var x [numSymbolTypes]byte
-				for i := symDecimal; i < numSymbolTypes; i++ {
+				var x [NumSymbolTypes]byte
+				for i := SymDecimal; i < NumSymbolTypes; i++ {
 					x[i] = byte(sb.Index((*s)[i]))
 				}
 				symIndex = append(symIndex, x)
diff --git a/internal/format/gen_common.go b/internal/format/gen_common.go
index 50f39b9..f095c31 100644
--- a/internal/format/gen_common.go
+++ b/internal/format/gen_common.go
@@ -17,23 +17,24 @@
 	zero      [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
 }
 
-type symbolType int
+// A SymbolType identifies a symbol of a specific kind.
+type SymbolType int
 
 const (
-	symDecimal symbolType = iota
-	symGroup
-	symList
-	symPercentSign
-	symPlusSign
-	symMinusSign
-	symExponential
-	symSuperscriptingExponent
-	symPerMille
-	symInfinity
-	symNan
-	symTimeSeparator
+	SymDecimal SymbolType = iota
+	SymGroup
+	SymList
+	SymPercentSign
+	SymPlusSign
+	SymMinusSign
+	SymExponential
+	SymSuperscriptingExponent
+	SymPerMille
+	SymInfinity
+	SymNan
+	SymTimeSeparator
 
-	numSymbolTypes
+	NumSymbolTypes
 )
 
 type altSymData struct {
diff --git a/internal/format/number.go b/internal/format/number.go
new file mode 100644
index 0000000..6971cdd
--- /dev/null
+++ b/internal/format/number.go
@@ -0,0 +1,134 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package format
+
+import (
+	"unicode/utf8"
+
+	"golang.org/x/text/internal"
+	"golang.org/x/text/language"
+)
+
+// NumberInfo holds number formatting configuration data.
+type NumberInfo struct {
+	system   numberSystemData // numbering system information
+	symIndex byte             // index to symbols
+}
+
+// NumberInfoFromLangID returns a NumberInfo for the given compact language
+// identifier and numbering system identifier. If numberSystem is the empty
+// string, the default numbering system will be taken for that language.
+func NumberInfoFromLangID(compactIndex int, numberSystem string) NumberInfo {
+	p := langToDefaults[compactIndex]
+	// Lookup the entry for the language.
+	pSymIndex := byte(0) // Default: Latin, default symbols
+	system, ok := numberSystemMap[numberSystem]
+	if !ok {
+		// Take the value for the default numbering system. This is by far the
+		// most common case as an alternative numbering system is hardly used.
+		if p&0x80 == 0 {
+			pSymIndex = p
+		} else {
+			// Take the first entry from the alternatives list.
+			data := langToAlt[p&^0x80]
+			pSymIndex = data.symIndex
+			system = data.numberSystem
+		}
+	} else {
+		langIndex := compactIndex
+		ns := system
+	outerLoop:
+		for {
+			if p&0x80 == 0 {
+				if ns == 0 {
+					// The index directly points to the symbol data.
+					pSymIndex = p
+					break
+				}
+				// Move to the parent and retry.
+				langIndex = int(internal.Parent[langIndex])
+			}
+			// The index points to a list of symbol data indexes.
+			for _, e := range langToAlt[p&^0x80:] {
+				if int(e.compactTag) != langIndex {
+					if langIndex == 0 {
+						// The CLDR root defines full symbol information for all
+						// numbering systems (even though mostly by means of
+						// aliases). This means that we will never fall back to
+						// the default of the language. Also, the loop is
+						// guaranteed to terminate as a consequence.
+						ns = numLatn
+						// Fall back to Latin and start from the original
+						// language. See
+						// http://unicode.org/reports/tr35/#Locale_Inheritance.
+						langIndex = compactIndex
+					} else {
+						// Fall back to parent.
+						langIndex = int(internal.Parent[langIndex])
+					}
+					break
+				}
+				if e.numberSystem == ns {
+					pSymIndex = e.symIndex
+					break outerLoop
+				}
+			}
+		}
+	}
+	if int(system) >= len(numSysData) { // algorithmic
+		// Will generate ASCII digits in case the user inadvertently calls
+		// WriteDigit or Digit on it.
+		d := numSysData[0]
+		d.id = system
+		return NumberInfo{
+			system:   d,
+			symIndex: pSymIndex,
+		}
+	}
+	return NumberInfo{
+		system:   numSysData[system],
+		symIndex: pSymIndex,
+	}
+}
+
+// NumberInfoFromTag returns a NumberInfo for the given language tag.
+func NumberInfoFromTag(t language.Tag) NumberInfo {
+	for {
+		if index, ok := language.CompactIndex(t); ok {
+			return NumberInfoFromLangID(index, t.TypeForKey("nu"))
+		}
+		t = t.Parent()
+	}
+}
+
+// IsDecimal reports if the numbering system can convert decimal to native
+// symbols one-to-one.
+func (n NumberInfo) IsDecimal() bool {
+	return int(n.system.id) < len(numSysData)
+}
+
+// WriteDigit writes the UTF-8 sequence for n corresponding to the given ASCII
+// digit to dst and reports the number of bytes written. dst must be large
+// enough to hold the rune (can be up to utf8.UTFMax bytes).
+func (n NumberInfo) WriteDigit(dst []byte, asciiDigit rune) int {
+	copy(dst, n.system.zero[:n.system.digitSize])
+	dst[n.system.digitSize-1] += byte(asciiDigit - '0')
+	return int(n.system.digitSize)
+}
+
+// Digit returns the digit for the numbering system for the corresponding ASCII
+// value. For example, ni.Digit('3') could return '三'. Note that the argument
+// is the rune constant '3', which equals 51, not the integer constant 3.
+func (n NumberInfo) Digit(asciiDigit rune) rune {
+	var x [utf8.UTFMax]byte
+	n.WriteDigit(x[:], asciiDigit)
+	r, _ := utf8.DecodeRune(x[:])
+	return r
+}
+
+// Symbol returns the string for the given symbol type.
+func (n NumberInfo) Symbol(t SymbolType) string {
+	return symData.Elem(int(symIndex[n.symIndex][t]))
+}
diff --git a/internal/format/number_test.go b/internal/format/number_test.go
new file mode 100644
index 0000000..62431b3
--- /dev/null
+++ b/internal/format/number_test.go
@@ -0,0 +1,57 @@
+package format
+
+import (
+	"testing"
+
+	"golang.org/x/text/language"
+)
+
+func TestNumberInfo(t *testing.T) {
+	testCases := []struct {
+		lang     string
+		sym      SymbolType
+		wantSym  string
+		wantNine rune
+	}{
+		{"und", SymDecimal, ".", '9'},
+		{"de", SymGroup, ".", '9'},
+		{"de-BE", SymGroup, ".", '9'},          // inherits from de (no number data in CLDR)
+		{"de-BE-oxendict", SymGroup, ".", '9'}, // inherits from de (no compact index)
+
+		// U+096F DEVANAGARI DIGIT NINE ('९')
+		{"de-BE-u-nu-deva", SymGroup, ".", '\u096f'}, // miss -> latn -> de
+		{"de-Cyrl-BE", SymGroup, ",", '9'},           // inherits from root
+		{"de-CH", SymGroup, "'", '9'},                // overrides values in de
+		{"de-CH-oxendict", SymGroup, "'", '9'},       // inherits from de-CH (no compact index)
+		{"de-CH-u-nu-deva", SymGroup, "'", '\u096f'}, // miss -> latn -> de-CH
+
+		{"pa", SymExponential, "E", '9'},
+
+		// "×۱۰^" -> U+00d7 U+06f1 U+06f0^"
+		// U+06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
+		// U+06F1 EXTENDED ARABIC-INDIC DIGIT ONE
+		// U+06F9 EXTENDED ARABIC-INDIC DIGIT NINE
+		{"pa-u-nu-arabext", SymExponential, "\u00d7\u06f1\u06f0^", '\u06f9'},
+
+		//  "གྲངས་མེད" - > U+0f42 U+0fb2 U+0f44 U+0f66 U+0f0b U+0f58 U+0f7a U+0f51
+		// Examples:
+		// U+0F29 TIBETAN DIGIT NINE (༩)
+		{"dz", SymInfinity, "\u0f42\u0fb2\u0f44\u0f66\u0f0b\u0f58\u0f7a\u0f51", '\u0f29'}, // defaults to tibt
+		{"dz-u-nu-latn", SymInfinity, "∞", '9'},                                           // select alternative
+		{"dz-u-nu-tibt", SymInfinity, "\u0f42\u0fb2\u0f44\u0f66\u0f0b\u0f58\u0f7a\u0f51", '\u0f29'},
+		{"en-u-nu-tibt", SymInfinity, "∞", '\u0f29'},
+
+		// algorithmic number systems fall back to ASCII if Digits is used.
+		{"en-u-nu-hanidec", SymPlusSign, "+", '9'},
+		{"en-u-nu-roman", SymPlusSign, "+", '9'},
+	}
+	for _, tc := range testCases {
+		info := NumberInfoFromTag(language.MustParse(tc.lang))
+		if got := info.Symbol(tc.sym); got != tc.wantSym {
+			t.Errorf("%s:%v:sym: got %q; want %q", tc.lang, tc.sym, got, tc.wantSym)
+		}
+		if got := info.Digit('9'); got != tc.wantNine {
+			t.Errorf("%s:%v:nine: got %q; want %q", tc.lang, tc.sym, got, tc.wantNine)
+		}
+	}
+}
diff --git a/internal/format/tables_test.go b/internal/format/tables_test.go
index 30d4465..888eaf1 100644
--- a/internal/format/tables_test.go
+++ b/internal/format/tables_test.go
@@ -9,9 +9,7 @@
 	"log"
 	"reflect"
 	"testing"
-	"unicode/utf8"
 
-	"golang.org/x/text/internal"
 	"golang.org/x/text/internal/gen"
 	"golang.org/x/text/internal/testtext"
 	"golang.org/x/text/language"
@@ -41,14 +39,10 @@
 		if int(n) >= len(numSysData) {
 			continue
 		}
-		d := numSysData[n]
-		val := byte(0)
+		info := NumberInfoFromLangID(0, ns.Id)
+		val := '0'
 		for _, rWant := range ns.Digits {
-			var x [utf8.UTFMax]byte
-			copy(x[:], d.zero[:d.digitSize])
-			x[d.digitSize-1] += val
-			rGot, _ := utf8.DecodeRune(x[:])
-			if rGot != rWant {
+			if rGot := info.Digit(val); rGot != rWant {
 				t.Errorf("%s:%d: got %U; want %U", ns.Id, val, rGot, rWant)
 			}
 			val++
@@ -94,22 +88,23 @@
 			}
 			testCases := []struct {
 				name string
-				st   symbolType
+				st   SymbolType
 				x    interface{}
 			}{
-				{"Decimal", symDecimal, sym.Decimal},
-				{"Group", symGroup, sym.Group},
-				{"List", symList, sym.List},
-				{"PercentSign", symPercentSign, sym.PercentSign},
-				{"PlusSign", symPlusSign, sym.PlusSign},
-				{"MinusSign", symMinusSign, sym.MinusSign},
-				{"Exponential", symExponential, sym.Exponential},
-				{"SuperscriptingExponent", symSuperscriptingExponent, sym.SuperscriptingExponent},
-				{"PerMille", symPerMille, sym.PerMille},
-				{"Infinity", symInfinity, sym.Infinity},
-				{"NaN", symNan, sym.Nan},
-				{"TimeSeparator", symTimeSeparator, sym.TimeSeparator},
+				{"Decimal", SymDecimal, sym.Decimal},
+				{"Group", SymGroup, sym.Group},
+				{"List", SymList, sym.List},
+				{"PercentSign", SymPercentSign, sym.PercentSign},
+				{"PlusSign", SymPlusSign, sym.PlusSign},
+				{"MinusSign", SymMinusSign, sym.MinusSign},
+				{"Exponential", SymExponential, sym.Exponential},
+				{"SuperscriptingExponent", SymSuperscriptingExponent, sym.SuperscriptingExponent},
+				{"PerMille", SymPerMille, sym.PerMille},
+				{"Infinity", SymInfinity, sym.Infinity},
+				{"NaN", SymNan, sym.Nan},
+				{"TimeSeparator", SymTimeSeparator, sym.TimeSeparator},
 			}
+			info := NumberInfoFromLangID(langIndex, sym.NumberSystem)
 			for _, tc := range testCases {
 				// Extract the wanted value.
 				v := reflect.ValueOf(tc.x)
@@ -120,36 +115,7 @@
 					t.Fatalf("Multiple values of %q within single symbol not supported.", tc.name)
 				}
 				want := v.Index(0).MethodByName("Data").Call(nil)[0].String()
-
-				// Extract the value from the table.
-				ns := numberSystemMap[sym.NumberSystem]
-				strIndex := -1
-				for strIndex == -1 {
-					index := langToDefaults[langIndex]
-					if index&0x80 == 0 && ns == 0 {
-						// The index directly points to the symbol data.
-						strIndex = int(symIndex[index][tc.st])
-						continue
-					}
-					// The index points to a list of symbol data indexes.
-					for _, e := range langToAlt[index&^0x80:] {
-						if int(e.compactTag) != langIndex {
-							if langIndex == 0 {
-								// Fall back to Latin.
-								ns = 0
-							} else {
-								// Fall back to parent.
-								langIndex = int(internal.Parent[langIndex])
-							}
-							break
-						}
-						if e.numberSystem == ns {
-							strIndex = int(symIndex[e.symIndex][tc.st])
-							break
-						}
-					}
-				}
-				got := symData.Elem(strIndex)
+				got := info.Symbol(tc.st)
 				if got != want {
 					t.Errorf("%s:%s:%s: got %q; want %q", lang, sym.NumberSystem, tc.name, got, want)
 				}