language/internal: more exposing of internals
Also simplify variants and extensions.
Change-Id: I3dfb347057302a7e5947ed35cbb29aa83d22ec0e
Reviewed-on: https://go-review.googlesource.com/95819
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/language/internal/language.go b/language/internal/language.go
index b0f0b6c..cc9bd4c 100644
--- a/language/internal/language.go
+++ b/language/internal/language.go
@@ -169,19 +169,34 @@
return err
}
-// Variant returns the variants specified explicitly for this language tag.
-// or nil if no variant was specified.
-func (t Tag) Variants() []Variant {
- v := []Variant{}
- if int(t.pVariant) < int(t.pExt) {
- for x, str := "", t.str[t.pVariant:t.pExt]; str != ""; {
- x, str = nextToken(str)
- v = append(v, Variant{x})
- }
+// Variants returns the part of the tag holding all variants or the empty string
+// if there are no variants defined.
+func (t Tag) Variants() string {
+ if t.pVariant == 0 {
+ return ""
}
- return v
+ return t.str[t.pVariant:t.pExt]
}
+// VariantOrPrivateTagStr returns variants or private use tags.
+func (t Tag) VariantOrPrivateTagStr() string {
+ if t.pExt > 0 {
+ return t.str[t.pVariant:t.pExt]
+ }
+ return t.str[t.pVariant:]
+}
+
+// HasString reports whether this tag defines more than just the raw
+// components.
+func (t Tag) HasString() bool {
+ return t.str != ""
+}
+
+// // IsPrivateUse reports whether this is tag starting with x-.
+// func (t Tag) IsPrivateUse() bool {
+// return t.str != "" && strings.HasPrefix(t.str, "x-")
+// }
+
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
@@ -239,77 +254,52 @@
return und
}
-// returns token t and the rest of the string.
-func nextToken(s string) (t, tail string) {
- p := strings.Index(s[1:], "-")
- if p == -1 {
- return s[1:], ""
- }
- p++
- return s[1:p], s[p:]
-}
-
-// Extension is a single BCP 47 extension.
-type Extension struct {
- s string
-}
-
-// String returns the string representation of the extension, including the
-// type tag.
-func (e Extension) String() string {
- return e.s
-}
-
// ParseExtension parses s as an extension and returns it on success.
-func ParseExtension(s string) (e Extension, err error) {
+func ParseExtension(s string) (ext string, err error) {
scan := makeScannerString(s)
var end int
if n := len(scan.token); n != 1 {
- return Extension{}, errSyntax
+ return "", ErrSyntax
}
scan.toLower(0, len(scan.b))
end = parseExtension(&scan)
if end != len(s) {
- return Extension{}, errSyntax
+ return "", ErrSyntax
}
- return Extension{string(scan.b)}, nil
+ return string(scan.b), nil
}
-// Type returns the one-byte extension type of e. It returns 0 for the zero
-// exception.
-func (e Extension) Type() byte {
- if e.s == "" {
- return 0
- }
- return e.s[0]
+// HasVariants reports whether t has variants.
+func (t Tag) HasVariants() bool {
+ return uint16(t.pVariant) < t.pExt
}
-// Tokens returns the list of tokens of e.
-func (e Extension) Tokens() []string {
- return strings.Split(e.s, "-")
+// HasExtensions reports whether t has extensions.
+func (t Tag) HasExtensions() bool {
+ return int(t.pExt) < len(t.str)
}
// Extension returns the extension of type x for tag t. It will return
// false for ok if t does not have the requested extension. The returned
// extension will be invalid in this case.
-func (t Tag) Extension(x byte) (ext Extension, ok bool) {
+func (t Tag) Extension(x byte) (ext string, ok bool) {
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
if ext[0] == x {
- return Extension{ext}, true
+ return ext, true
}
}
- return Extension{}, false
+ return "", false
}
// Extensions returns all extensions of t.
-func (t Tag) Extensions() []Extension {
- e := []Extension{}
+func (t Tag) Extensions() []string {
+ e := []string{}
for i := int(t.pExt); i < len(t.str)-1; {
var ext string
i, ext = getExtension(t.str, i)
- e = append(e, Extension{ext})
+ e = append(e, ext)
}
return e
}
@@ -482,7 +472,7 @@
// or another error if another error occurred.
func ParseBase(s string) (Language, error) {
if n := len(s); n < 2 || 3 < n {
- return 0, errSyntax
+ return 0, ErrSyntax
}
var buf [3]byte
return getLangID(buf[:copy(buf[:], s)])
@@ -493,7 +483,7 @@
// or another error if another error occurred.
func ParseScript(s string) (Script, error) {
if len(s) != 4 {
- return 0, errSyntax
+ return 0, ErrSyntax
}
var buf [4]byte
return getScriptID(script, buf[:copy(buf[:], s)])
@@ -510,7 +500,7 @@
// or another error if another error occurred.
func ParseRegion(s string) (Region, error) {
if n := len(s); n < 2 || 3 < n {
- return 0, errSyntax
+ return 0, ErrSyntax
}
var buf [3]byte
return getRegionID(buf[:copy(buf[:], s)])
@@ -591,20 +581,21 @@
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
- variant string
+ ID uint8
+ str string
}
// ParseVariant parses and returns a Variant. An error is returned if s is not
// a valid variant.
func ParseVariant(s string) (Variant, error) {
s = strings.ToLower(s)
- if _, ok := variantIndex[s]; ok {
- return Variant{s}, nil
+ if id, ok := variantIndex[s]; ok {
+ return Variant{id, s}, nil
}
return Variant{}, mkErrInvalid([]byte(s))
}
// String returns the string representation of the variant.
func (v Variant) String() string {
- return v.variant
+ return v.str
}
diff --git a/language/internal/lookup.go b/language/internal/lookup.go
index 73fc34b..977861b 100644
--- a/language/internal/lookup.go
+++ b/language/internal/lookup.go
@@ -17,7 +17,7 @@
// if it could not be found.
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
if !tag.FixCase(form, key) {
- return 0, errSyntax
+ return 0, ErrSyntax
}
i := idx.Index(key)
if i == -1 {
@@ -43,6 +43,10 @@
return getLangISO3(s)
}
+func (id Language) Canonicalize() (Language, AliasType) {
+ return normLang(id)
+}
+
// mapLang returns the mapped langID of id according to mapping m.
func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(langAliasMap), func(i int) bool {
@@ -58,7 +62,7 @@
// or unknownLang if this does not exist.
func getLangISO2(s []byte) (Language, error) {
if !tag.FixCase("zz", s) {
- return 0, errSyntax
+ return 0, ErrSyntax
}
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
return Language(i), nil
@@ -118,7 +122,7 @@
}
return 0, mkErrInvalid(s)
}
- return 0, errSyntax
+ return 0, ErrSyntax
}
// stringToBuf writes the string to b and returns the number of bytes
@@ -228,7 +232,7 @@
}
return 0, mkErrInvalid(s)
}
- return 0, errSyntax
+ return 0, ErrSyntax
}
func getRegionM49(n int) (Region, error) {
diff --git a/language/internal/lookup_test.go b/language/internal/lookup_test.go
index 7c0dad1..d60dd88 100644
--- a/language/internal/lookup_test.go
+++ b/language/internal/lookup_test.go
@@ -19,10 +19,10 @@
id, bcp47, iso3, norm string
err error
}{
- {id: "", bcp47: "und", iso3: "und", err: errSyntax},
- {id: " ", bcp47: "und", iso3: "und", err: errSyntax},
- {id: " ", bcp47: "und", iso3: "und", err: errSyntax},
- {id: " ", bcp47: "und", iso3: "und", err: errSyntax},
+ {id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
+ {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
+ {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
+ {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
{id: "xxx", bcp47: "und", iso3: "und", err: mkErrInvalid([]byte("xxx"))},
{id: "und", bcp47: "und", iso3: "und"},
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
diff --git a/language/internal/match.go b/language/internal/match.go
index 37f4eb6..bcefcd9 100644
--- a/language/internal/match.go
+++ b/language/internal/match.go
@@ -224,18 +224,3 @@
}
return t, nil
}
-
-func (t Tag) variants() string {
- if t.pVariant == 0 {
- return ""
- }
- return t.str[t.pVariant:t.pExt]
-}
-
-// variantOrPrivateTagStr returns variants or private use tags.
-func (t Tag) variantOrPrivateTagStr() string {
- if t.pExt > 0 {
- return t.str[t.pVariant:t.pExt]
- }
- return t.str[t.pVariant:]
-}
diff --git a/language/internal/parse.go b/language/internal/parse.go
index c482eed..94e7271 100644
--- a/language/internal/parse.go
+++ b/language/internal/parse.go
@@ -29,10 +29,10 @@
return true
}
-// errSyntax is returned by any of the parsing functions when the
+// ErrSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
-var errSyntax = errors.New("language: tag is not well-formed")
+var ErrSyntax = errors.New("language: tag is not well-formed")
// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
@@ -116,7 +116,7 @@
}
func (s *scanner) setError(e error) {
- if s.err == nil || (e == errSyntax && s.err != errSyntax) {
+ if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
s.err = e
}
}
@@ -163,7 +163,7 @@
// deleteRange removes the given range from s.b before the current token.
func (s *scanner) deleteRange(start, end int) {
- s.setError(errSyntax)
+ s.setError(ErrSyntax)
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
diff := end - start
s.next -= diff
@@ -190,14 +190,14 @@
}
token := s.b[s.start:s.end]
if i < 1 || i > 8 || !isAlphaNum(token) {
- s.gobble(errSyntax)
+ s.gobble(ErrSyntax)
continue
}
s.token = token
return end
}
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
- s.setError(errSyntax)
+ s.setError(ErrSyntax)
s.b = s.b[:len(s.b)-1]
}
s.done = true
@@ -225,7 +225,7 @@
func Parse(s string) (t Tag, err error) {
// TODO: consider supporting old-style locale key-value pairs.
if s == "" {
- return und, errSyntax
+ return und, ErrSyntax
}
if len(s) <= maxAltTaglen {
b := [maxAltTaglen]byte{}
@@ -252,18 +252,18 @@
if n := len(scan.token); n <= 1 {
scan.toLower(0, len(scan.b))
if n == 0 || scan.token[0] != 'x' {
- return t, errSyntax
+ return t, ErrSyntax
}
end = parseExtensions(scan)
} else if n >= 4 {
- return und, errSyntax
+ return und, ErrSyntax
} else { // the usual case
t, end = parseTag(scan)
if n := len(scan.token); n == 1 {
t.pExt = uint16(end)
end = parseExtensions(scan)
} else if end < len(scan.b) {
- scan.setError(errSyntax)
+ scan.setError(ErrSyntax)
scan.b = scan.b[:end]
}
}
@@ -438,7 +438,7 @@
end = parseExtension(scan)
extension := scan.b[extStart:end]
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
- scan.setError(errSyntax)
+ scan.setError(ErrSyntax)
end = extStart
continue
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
@@ -507,7 +507,7 @@
if keyEnd != end {
keys = append(keys, scan.b[keyStart:end])
} else {
- scan.setError(errSyntax)
+ scan.setError(ErrSyntax)
end = keyStart
}
}
diff --git a/language/internal/parse_test.go b/language/internal/parse_test.go
index 393c59d..a9d796f 100644
--- a/language/internal/parse_test.go
+++ b/language/internal/parse_test.go
@@ -347,10 +347,10 @@
{"aa-Uuuu", mkInvalid("Uuuu")},
{"aa-AB", mkInvalid("AB")},
// ill-formed wins over invalid.
- {"ac-u", errSyntax},
- {"ac-u-ca", errSyntax},
- {"ac-u-ca-co-pinyin", errSyntax},
- {"noob", errSyntax},
+ {"ac-u", ErrSyntax},
+ {"ac-u-ca", ErrSyntax},
+ {"ac-u-ca-co-pinyin", ErrSyntax},
+ {"noob", ErrSyntax},
}
for _, tt := range tests {
_, err := Parse(tt.in)