language/internal: more exposing of internals

Also simplify variants and extensions.

Change-Id: I3dfb347057302a7e5947ed35cbb29aa83d22ec0e
Reviewed-on: https://go-review.googlesource.com/95819
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/internal/language.go b/language/internal/language.go
index b0f0b6c..cc9bd4c 100644
--- a/language/internal/language.go
+++ b/language/internal/language.go
@@ -169,19 +169,34 @@
 	return err
 }
 
-// Variant returns the variants specified explicitly for this language tag.
-// or nil if no variant was specified.
-func (t Tag) Variants() []Variant {
-	v := []Variant{}
-	if int(t.pVariant) < int(t.pExt) {
-		for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
-			x, str = nextToken(str)
-			v = append(v, Variant{x})
-		}
+// Variants returns the part of the tag holding all variants or the empty string
+// if there are no variants defined.
+func (t Tag) Variants() string {
+	if t.pVariant == 0 {
+		return ""
 	}
-	return v
+	return t.str[t.pVariant:t.pExt]
 }
 
+// VariantOrPrivateTagStr returns variants or private use tags.
+func (t Tag) VariantOrPrivateTagStr() string {
+	if t.pExt > 0 {
+		return t.str[t.pVariant:t.pExt]
+	}
+	return t.str[t.pVariant:]
+}
+
+// HasString reports whether this tag defines more than just the raw
+// components.
+func (t Tag) HasString() bool {
+	return t.str != ""
+}
+
+// // IsPrivateUse reports whether this is tag starting with x-.
+// func (t Tag) IsPrivateUse() bool {
+// 	return t.str != "" && strings.HasPrefix(t.str, "x-")
+// }
+
 // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
 // specific language are substituted with fields from the parent language.
 // The parent for a language may change for newer versions of CLDR.
@@ -239,77 +254,52 @@
 	return und
 }
 
-// returns token t and the rest of the string.
-func nextToken(s string) (t, tail string) {
-	p := strings.Index(s[1:], "-")
-	if p == -1 {
-		return s[1:], ""
-	}
-	p++
-	return s[1:p], s[p:]
-}
-
-// Extension is a single BCP 47 extension.
-type Extension struct {
-	s string
-}
-
-// String returns the string representation of the extension, including the
-// type tag.
-func (e Extension) String() string {
-	return e.s
-}
-
 // ParseExtension parses s as an extension and returns it on success.
-func ParseExtension(s string) (e Extension, err error) {
+func ParseExtension(s string) (ext string, err error) {
 	scan := makeScannerString(s)
 	var end int
 	if n := len(scan.token); n != 1 {
-		return Extension{}, errSyntax
+		return "", ErrSyntax
 	}
 	scan.toLower(0, len(scan.b))
 	end = parseExtension(&scan)
 	if end != len(s) {
-		return Extension{}, errSyntax
+		return "", ErrSyntax
 	}
-	return Extension{string(scan.b)}, nil
+	return string(scan.b), nil
 }
 
-// Type returns the one-byte extension type of e. It returns 0 for the zero
-// exception.
-func (e Extension) Type() byte {
-	if e.s == "" {
-		return 0
-	}
-	return e.s[0]
+// HasVariants reports whether t has variants.
+func (t Tag) HasVariants() bool {
+	return uint16(t.pVariant) < t.pExt
 }
 
-// Tokens returns the list of tokens of e.
-func (e Extension) Tokens() []string {
-	return strings.Split(e.s, "-")
+// HasExtensions reports whether t has extensions.
+func (t Tag) HasExtensions() bool {
+	return int(t.pExt) < len(t.str)
 }
 
 // Extension returns the extension of type x for tag t. It will return
 // false for ok if t does not have the requested extension. The returned
 // extension will be invalid in this case.
-func (t Tag) Extension(x byte) (ext Extension, ok bool) {
+func (t Tag) Extension(x byte) (ext string, ok bool) {
 	for i := int(t.pExt); i < len(t.str)-1; {
 		var ext string
 		i, ext = getExtension(t.str, i)
 		if ext[0] == x {
-			return Extension{ext}, true
+			return ext, true
 		}
 	}
-	return Extension{}, false
+	return "", false
 }
 
 // Extensions returns all extensions of t.
-func (t Tag) Extensions() []Extension {
-	e := []Extension{}
+func (t Tag) Extensions() []string {
+	e := []string{}
 	for i := int(t.pExt); i < len(t.str)-1; {
 		var ext string
 		i, ext = getExtension(t.str, i)
-		e = append(e, Extension{ext})
+		e = append(e, ext)
 	}
 	return e
 }
@@ -482,7 +472,7 @@
 // or another error if another error occurred.
 func ParseBase(s string) (Language, error) {
 	if n := len(s); n < 2 || 3 < n {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	var buf [3]byte
 	return getLangID(buf[:copy(buf[:], s)])
@@ -493,7 +483,7 @@
 // or another error if another error occurred.
 func ParseScript(s string) (Script, error) {
 	if len(s) != 4 {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	var buf [4]byte
 	return getScriptID(script, buf[:copy(buf[:], s)])
@@ -510,7 +500,7 @@
 // or another error if another error occurred.
 func ParseRegion(s string) (Region, error) {
 	if n := len(s); n < 2 || 3 < n {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	var buf [3]byte
 	return getRegionID(buf[:copy(buf[:], s)])
@@ -591,20 +581,21 @@
 
 // Variant represents a registered variant of a language as defined by BCP 47.
 type Variant struct {
-	variant string
+	ID  uint8
+	str string
 }
 
 // ParseVariant parses and returns a Variant. An error is returned if s is not
 // a valid variant.
 func ParseVariant(s string) (Variant, error) {
 	s = strings.ToLower(s)
-	if _, ok := variantIndex[s]; ok {
-		return Variant{s}, nil
+	if id, ok := variantIndex[s]; ok {
+		return Variant{id, s}, nil
 	}
 	return Variant{}, mkErrInvalid([]byte(s))
 }
 
 // String returns the string representation of the variant.
 func (v Variant) String() string {
-	return v.variant
+	return v.str
 }
diff --git a/language/internal/lookup.go b/language/internal/lookup.go
index 73fc34b..977861b 100644
--- a/language/internal/lookup.go
+++ b/language/internal/lookup.go
@@ -17,7 +17,7 @@
 // if it could not be found.
 func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
 	if !tag.FixCase(form, key) {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	i := idx.Index(key)
 	if i == -1 {
@@ -43,6 +43,10 @@
 	return getLangISO3(s)
 }
 
+func (id Language) Canonicalize() (Language, AliasType) {
+	return normLang(id)
+}
+
 // mapLang returns the mapped langID of id according to mapping m.
 func normLang(id Language) (Language, AliasType) {
 	k := sort.Search(len(langAliasMap), func(i int) bool {
@@ -58,7 +62,7 @@
 // or unknownLang if this does not exist.
 func getLangISO2(s []byte) (Language, error) {
 	if !tag.FixCase("zz", s) {
-		return 0, errSyntax
+		return 0, ErrSyntax
 	}
 	if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
 		return Language(i), nil
@@ -118,7 +122,7 @@
 		}
 		return 0, mkErrInvalid(s)
 	}
-	return 0, errSyntax
+	return 0, ErrSyntax
 }
 
 // stringToBuf writes the string to b and returns the number of bytes
@@ -228,7 +232,7 @@
 		}
 		return 0, mkErrInvalid(s)
 	}
-	return 0, errSyntax
+	return 0, ErrSyntax
 }
 
 func getRegionM49(n int) (Region, error) {
diff --git a/language/internal/lookup_test.go b/language/internal/lookup_test.go
index 7c0dad1..d60dd88 100644
--- a/language/internal/lookup_test.go
+++ b/language/internal/lookup_test.go
@@ -19,10 +19,10 @@
 		id, bcp47, iso3, norm string
 		err                   error
 	}{
-		{id: "", bcp47: "und", iso3: "und", err: errSyntax},
-		{id: "  ", bcp47: "und", iso3: "und", err: errSyntax},
-		{id: "   ", bcp47: "und", iso3: "und", err: errSyntax},
-		{id: "    ", bcp47: "und", iso3: "und", err: errSyntax},
+		{id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "  ", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "   ", bcp47: "und", iso3: "und", err: ErrSyntax},
+		{id: "    ", bcp47: "und", iso3: "und", err: ErrSyntax},
 		{id: "xxx", bcp47: "und", iso3: "und", err: mkErrInvalid([]byte("xxx"))},
 		{id: "und", bcp47: "und", iso3: "und"},
 		{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
diff --git a/language/internal/match.go b/language/internal/match.go
index 37f4eb6..bcefcd9 100644
--- a/language/internal/match.go
+++ b/language/internal/match.go
@@ -224,18 +224,3 @@
 	}
 	return t, nil
 }
-
-func (t Tag) variants() string {
-	if t.pVariant == 0 {
-		return ""
-	}
-	return t.str[t.pVariant:t.pExt]
-}
-
-// variantOrPrivateTagStr returns variants or private use tags.
-func (t Tag) variantOrPrivateTagStr() string {
-	if t.pExt > 0 {
-		return t.str[t.pVariant:t.pExt]
-	}
-	return t.str[t.pVariant:]
-}
diff --git a/language/internal/parse.go b/language/internal/parse.go
index c482eed..94e7271 100644
--- a/language/internal/parse.go
+++ b/language/internal/parse.go
@@ -29,10 +29,10 @@
 	return true
 }
 
-// errSyntax is returned by any of the parsing functions when the
+// ErrSyntax is returned by any of the parsing functions when the
 // input is not well-formed, according to BCP 47.
 // TODO: return the position at which the syntax error occurred?
-var errSyntax = errors.New("language: tag is not well-formed")
+var ErrSyntax = errors.New("language: tag is not well-formed")
 
 // ValueError is returned by any of the parsing functions when the
 // input is well-formed but the respective subtag is not recognized
@@ -116,7 +116,7 @@
 }
 
 func (s *scanner) setError(e error) {
-	if s.err == nil || (e == errSyntax && s.err != errSyntax) {
+	if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
 		s.err = e
 	}
 }
@@ -163,7 +163,7 @@
 
 // deleteRange removes the given range from s.b before the current token.
 func (s *scanner) deleteRange(start, end int) {
-	s.setError(errSyntax)
+	s.setError(ErrSyntax)
 	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
 	diff := end - start
 	s.next -= diff
@@ -190,14 +190,14 @@
 		}
 		token := s.b[s.start:s.end]
 		if i < 1 || i > 8 || !isAlphaNum(token) {
-			s.gobble(errSyntax)
+			s.gobble(ErrSyntax)
 			continue
 		}
 		s.token = token
 		return end
 	}
 	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
-		s.setError(errSyntax)
+		s.setError(ErrSyntax)
 		s.b = s.b[:len(s.b)-1]
 	}
 	s.done = true
@@ -225,7 +225,7 @@
 func Parse(s string) (t Tag, err error) {
 	// TODO: consider supporting old-style locale key-value pairs.
 	if s == "" {
-		return und, errSyntax
+		return und, ErrSyntax
 	}
 	if len(s) <= maxAltTaglen {
 		b := [maxAltTaglen]byte{}
@@ -252,18 +252,18 @@
 	if n := len(scan.token); n <= 1 {
 		scan.toLower(0, len(scan.b))
 		if n == 0 || scan.token[0] != 'x' {
-			return t, errSyntax
+			return t, ErrSyntax
 		}
 		end = parseExtensions(scan)
 	} else if n >= 4 {
-		return und, errSyntax
+		return und, ErrSyntax
 	} else { // the usual case
 		t, end = parseTag(scan)
 		if n := len(scan.token); n == 1 {
 			t.pExt = uint16(end)
 			end = parseExtensions(scan)
 		} else if end < len(scan.b) {
-			scan.setError(errSyntax)
+			scan.setError(ErrSyntax)
 			scan.b = scan.b[:end]
 		}
 	}
@@ -438,7 +438,7 @@
 		end = parseExtension(scan)
 		extension := scan.b[extStart:end]
 		if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
-			scan.setError(errSyntax)
+			scan.setError(ErrSyntax)
 			end = extStart
 			continue
 		} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
@@ -507,7 +507,7 @@
 					if keyEnd != end {
 						keys = append(keys, scan.b[keyStart:end])
 					} else {
-						scan.setError(errSyntax)
+						scan.setError(ErrSyntax)
 						end = keyStart
 					}
 				}
diff --git a/language/internal/parse_test.go b/language/internal/parse_test.go
index 393c59d..a9d796f 100644
--- a/language/internal/parse_test.go
+++ b/language/internal/parse_test.go
@@ -347,10 +347,10 @@
 		{"aa-Uuuu", mkInvalid("Uuuu")},
 		{"aa-AB", mkInvalid("AB")},
 		// ill-formed wins over invalid.
-		{"ac-u", errSyntax},
-		{"ac-u-ca", errSyntax},
-		{"ac-u-ca-co-pinyin", errSyntax},
-		{"noob", errSyntax},
+		{"ac-u", ErrSyntax},
+		{"ac-u-ca", ErrSyntax},
+		{"ac-u-ca-co-pinyin", ErrSyntax},
+		{"noob", ErrSyntax},
 	}
 	for _, tt := range tests {
 		_, err := Parse(tt.in)