internal/language/compact: removes, renames, and exports
The compact package exists to expose compact tag IDs
within x/text, without exporting it outside the repo.
Adjust naming, remove unneeded code, exported some
previously internal code.
Change-Id: I716375dc775c038315d17822c442338c86a2f82d
Reviewed-on: https://go-review.googlesource.com/96638
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ross Light <light@google.com>
diff --git a/internal/language/compact/compact.go b/internal/language/compact/compact.go
index 0106331..7994984 100644
--- a/internal/language/compact/compact.go
+++ b/internal/language/compact/compact.go
@@ -2,7 +2,16 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package language // import "golang.org/x/text/internal/language/compact"
+// Package compact defines a compact representation of language tags.
+//
+// Common language tags (at least all for which locale information is defined
+// in CLDR) are assigned a unique index. Each Tag is associated with such an
+// ID for selecting language-related resources (such as translations) as well
+// as one for selecting regional defaults (currency, number formatting, etc.)
+//
+// It may want to export this functionality at some point, but at this point
+// this is only available for use within x/text.
+package compact // import "golang.org/x/text/internal/language/compact"
import (
"sort"
@@ -11,9 +20,10 @@
"golang.org/x/text/internal/language"
)
-type compactID uint16
+// ID is an integer identifying a single tag.
+type ID uint16
-func getCoreIndex(t language.Tag) (id compactID, ok bool) {
+func getCoreIndex(t language.Tag) (id ID, ok bool) {
cci, ok := language.GetCompactCore(t)
if !ok {
return 0, false
@@ -24,14 +34,15 @@
if i == len(coreTags) || coreTags[i] != cci {
return 0, false
}
- return compactID(i), true
+ return ID(i), true
}
-func (c compactID) tag() language.Tag {
- if int(c) >= len(coreTags) {
- return specialTags[int(c)-len(coreTags)]
+// Tag converts id to an internal language Tag.
+func (id ID) Tag() language.Tag {
+ if int(id) >= len(coreTags) {
+ return specialTags[int(id)-len(coreTags)]
}
- return coreTags[c].Tag()
+ return coreTags[id].Tag()
}
var specialTags []language.Tag
diff --git a/internal/language/compact/coverage.go b/internal/language/compact/coverage.go
deleted file mode 100644
index fdb6156..0000000
--- a/internal/language/compact/coverage.go
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language
-
-import (
- "fmt"
- "sort"
-
- "golang.org/x/text/internal/language"
-)
-
-// The Coverage interface is used to define the level of coverage of an
-// internationalization service. Note that not all types are supported by all
-// services. As lists may be generated on the fly, it is recommended that users
-// of a Coverage cache the results.
-type Coverage interface {
- // Tags returns the list of supported tags.
- Tags() []Tag
-
- // BaseLanguages returns the list of supported base languages.
- BaseLanguages() []Base
-
- // Scripts returns the list of supported scripts.
- Scripts() []Script
-
- // Regions returns the list of supported regions.
- Regions() []Region
-}
-
-var (
- // Supported defines a Coverage that lists all supported subtags. Tags
- // always returns nil.
- Supported Coverage = allSubtags{}
-)
-
-// TODO:
-// - Support Variants, numbering systems.
-// - CLDR coverage levels.
-// - Set of common tags defined in this package.
-
-type allSubtags struct{}
-
-// Regions returns the list of supported regions. As all regions are in a
-// consecutive range, it simply returns a slice of numbers in increasing order.
-// The "undefined" region is not returned.
-func (s allSubtags) Regions() []Region {
- reg := make([]Region, language.NumRegions)
- for i := range reg {
- reg[i] = Region{language.Region(i + 1)}
- }
- return reg
-}
-
-// Scripts returns the list of supported scripts. As all scripts are in a
-// consecutive range, it simply returns a slice of numbers in increasing order.
-// The "undefined" script is not returned.
-func (s allSubtags) Scripts() []Script {
- scr := make([]Script, language.NumScripts)
- for i := range scr {
- scr[i] = Script{language.Script(i + 1)}
- }
- return scr
-}
-
-// BaseLanguages returns the list of all supported base languages. It generates
-// the list by traversing the internal structures.
-func (s allSubtags) BaseLanguages() []Base {
- bs := language.BaseLanguages()
- base := make([]Base, len(bs))
- for i, b := range bs {
- base[i] = Base{b}
- }
- return base
-}
-
-// Tags always returns nil.
-func (s allSubtags) Tags() []Tag {
- return nil
-}
-
-// coverage is used used by NewCoverage which is used as a convenient way for
-// creating Coverage implementations for partially defined data. Very often a
-// package will only need to define a subset of slices. coverage provides a
-// convenient way to do this. Moreover, packages using NewCoverage, instead of
-// their own implementation, will not break if later new slice types are added.
-type coverage struct {
- tags func() []Tag
- bases func() []Base
- scripts func() []Script
- regions func() []Region
-}
-
-func (s *coverage) Tags() []Tag {
- if s.tags == nil {
- return nil
- }
- return s.tags()
-}
-
-// bases implements sort.Interface and is used to sort base languages.
-type bases []Base
-
-func (b bases) Len() int {
- return len(b)
-}
-
-func (b bases) Swap(i, j int) {
- b[i], b[j] = b[j], b[i]
-}
-
-func (b bases) Less(i, j int) bool {
- return b[i].langID < b[j].langID
-}
-
-// BaseLanguages returns the result from calling s.bases if it is specified or
-// otherwise derives the set of supported base languages from tags.
-func (s *coverage) BaseLanguages() []Base {
- if s.bases == nil {
- tags := s.Tags()
- if len(tags) == 0 {
- return nil
- }
- a := make([]Base, len(tags))
- for i, t := range tags {
- a[i] = Base{language.Language(t.lang())}
- }
- sort.Sort(bases(a))
- k := 0
- for i := 1; i < len(a); i++ {
- if a[k] != a[i] {
- k++
- a[k] = a[i]
- }
- }
- return a[:k+1]
- }
- return s.bases()
-}
-
-func (s *coverage) Scripts() []Script {
- if s.scripts == nil {
- return nil
- }
- return s.scripts()
-}
-
-func (s *coverage) Regions() []Region {
- if s.regions == nil {
- return nil
- }
- return s.regions()
-}
-
-// NewCoverage returns a Coverage for the given lists. It is typically used by
-// packages providing internationalization services to define their level of
-// coverage. A list may be of type []T or func() []T, where T is either Tag,
-// Base, Script or Region. The returned Coverage derives the value for Bases
-// from Tags if no func or slice for []Base is specified. For other unspecified
-// types the returned Coverage will return nil for the respective methods.
-func NewCoverage(list ...interface{}) Coverage {
- s := &coverage{}
- for _, x := range list {
- switch v := x.(type) {
- case func() []Base:
- s.bases = v
- case func() []Script:
- s.scripts = v
- case func() []Region:
- s.regions = v
- case func() []Tag:
- s.tags = v
- case []Base:
- s.bases = func() []Base { return v }
- case []Script:
- s.scripts = func() []Script { return v }
- case []Region:
- s.regions = func() []Region { return v }
- case []Tag:
- s.tags = func() []Tag { return v }
- default:
- panic(fmt.Sprintf("language: unsupported set type %T", v))
- }
- }
- return s
-}
diff --git a/internal/language/compact/coverage_test.go b/internal/language/compact/coverage_test.go
deleted file mode 100644
index bbc092c..0000000
--- a/internal/language/compact/coverage_test.go
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language
-
-import (
- "fmt"
- "reflect"
- "testing"
-
- "golang.org/x/text/internal/language"
-)
-
-func TestSupported(t *testing.T) {
- // To prove the results are correct for a type, we test that the number of
- // results is identical to the number of results on record, that all results
- // are distinct and that all results are valid.
- tests := map[string]int{
- "BaseLanguages": language.NumLanguages,
- "Scripts": language.NumScripts,
- "Regions": language.NumRegions,
- "Tags": 0,
- }
- sup := reflect.ValueOf(Supported)
- for name, num := range tests {
- v := sup.MethodByName(name).Call(nil)[0]
- if n := v.Len(); n != num {
- t.Errorf("len(%s()) was %d; want %d", name, n, num)
- }
- dup := make(map[string]bool)
- for i := 0; i < v.Len(); i++ {
- x := v.Index(i).Interface()
- // An invalid value will either cause a crash or result in a
- // duplicate when passed to Sprint.
- s := fmt.Sprint(x)
- if dup[s] {
- t.Errorf("%s: duplicate entry %q", name, s)
- }
- dup[s] = true
- }
- if len(dup) != v.Len() {
- t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
- }
- }
-}
-
-func TestNewCoverage(t *testing.T) {
- bases := []Base{Base{0}, Base{3}, Base{7}}
- scripts := []Script{Script{11}, Script{17}, Script{23}}
- regions := []Region{Region{101}, Region{103}, Region{107}}
- tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
- fbases := func() []Base { return bases }
- fscripts := func() []Script { return scripts }
- fregions := func() []Region { return regions }
- ftags := func() []Tag { return tags }
-
- tests := []struct {
- desc string
- list []interface{}
- bases []Base
- scripts []Script
- regions []Region
- tags []Tag
- }{
- {
- desc: "empty",
- },
- {
- desc: "bases",
- list: []interface{}{bases},
- bases: bases,
- },
- {
- desc: "scripts",
- list: []interface{}{scripts},
- scripts: scripts,
- },
- {
- desc: "regions",
- list: []interface{}{regions},
- regions: regions,
- },
- {
- desc: "bases derives from tags",
- list: []interface{}{tags},
- bases: []Base{Base{_en}, Base{_pt}},
- tags: tags,
- },
- {
- desc: "tags and bases",
- list: []interface{}{tags, bases},
- bases: bases,
- tags: tags,
- },
- {
- desc: "fully specified",
- list: []interface{}{tags, bases, scripts, regions},
- bases: bases,
- scripts: scripts,
- regions: regions,
- tags: tags,
- },
- {
- desc: "bases func",
- list: []interface{}{fbases},
- bases: bases,
- },
- {
- desc: "scripts func",
- list: []interface{}{fscripts},
- scripts: scripts,
- },
- {
- desc: "regions func",
- list: []interface{}{fregions},
- regions: regions,
- },
- {
- desc: "tags func",
- list: []interface{}{ftags},
- bases: []Base{Base{_en}, Base{_pt}},
- tags: tags,
- },
- {
- desc: "tags and bases",
- list: []interface{}{ftags, fbases},
- bases: bases,
- tags: tags,
- },
- {
- desc: "fully specified",
- list: []interface{}{ftags, fbases, fscripts, fregions},
- bases: bases,
- scripts: scripts,
- regions: regions,
- tags: tags,
- },
- }
-
- for i, tt := range tests {
- l := NewCoverage(tt.list...)
- if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
- t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
- }
- if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
- t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
- }
- if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
- t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
- }
- if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
- t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
- }
- }
-}
diff --git a/internal/language/compact/examples_test.go b/internal/language/compact/examples_test.go
deleted file mode 100644
index 68caa3f..0000000
--- a/internal/language/compact/examples_test.go
+++ /dev/null
@@ -1,413 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language_test
-
-import (
- "fmt"
- "net/http"
-
- "golang.org/x/text/language"
-)
-
-func ExampleCanonType() {
- p := func(id string) {
- fmt.Printf("Default(%s) -> %s\n", id, language.Make(id))
- fmt.Printf("BCP47(%s) -> %s\n", id, language.BCP47.Make(id))
- fmt.Printf("Macro(%s) -> %s\n", id, language.Macro.Make(id))
- fmt.Printf("All(%s) -> %s\n", id, language.All.Make(id))
- }
- p("en-Latn")
- p("sh")
- p("zh-cmn")
- p("bjd")
- p("iw-Latn-fonipa-u-cu-usd")
- // Output:
- // Default(en-Latn) -> en-Latn
- // BCP47(en-Latn) -> en
- // Macro(en-Latn) -> en-Latn
- // All(en-Latn) -> en
- // Default(sh) -> sr-Latn
- // BCP47(sh) -> sh
- // Macro(sh) -> sh
- // All(sh) -> sr-Latn
- // Default(zh-cmn) -> cmn
- // BCP47(zh-cmn) -> cmn
- // Macro(zh-cmn) -> zh
- // All(zh-cmn) -> zh
- // Default(bjd) -> drl
- // BCP47(bjd) -> drl
- // Macro(bjd) -> bjd
- // All(bjd) -> drl
- // Default(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
- // BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
- // Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
- // All(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
-}
-
-func ExampleTag_Base() {
- fmt.Println(language.Make("und").Base())
- fmt.Println(language.Make("und-US").Base())
- fmt.Println(language.Make("und-NL").Base())
- fmt.Println(language.Make("und-419").Base()) // Latin America
- fmt.Println(language.Make("und-ZZ").Base())
- // Output:
- // en Low
- // en High
- // nl High
- // es Low
- // en Low
-}
-
-func ExampleTag_Script() {
- en := language.Make("en")
- sr := language.Make("sr")
- sr_Latn := language.Make("sr_Latn")
- fmt.Println(en.Script())
- fmt.Println(sr.Script())
- // Was a script explicitly specified?
- _, c := sr.Script()
- fmt.Println(c == language.Exact)
- _, c = sr_Latn.Script()
- fmt.Println(c == language.Exact)
- // Output:
- // Latn High
- // Cyrl Low
- // false
- // true
-}
-
-func ExampleTag_Region() {
- ru := language.Make("ru")
- en := language.Make("en")
- fmt.Println(ru.Region())
- fmt.Println(en.Region())
- // Output:
- // RU Low
- // US Low
-}
-
-func ExampleRegion_TLD() {
- us := language.MustParseRegion("US")
- gb := language.MustParseRegion("GB")
- uk := language.MustParseRegion("UK")
- bu := language.MustParseRegion("BU")
-
- fmt.Println(us.TLD())
- fmt.Println(gb.TLD())
- fmt.Println(uk.TLD())
- fmt.Println(bu.TLD())
-
- fmt.Println(us.Canonicalize().TLD())
- fmt.Println(gb.Canonicalize().TLD())
- fmt.Println(uk.Canonicalize().TLD())
- fmt.Println(bu.Canonicalize().TLD())
- // Output:
- // US <nil>
- // UK <nil>
- // UK <nil>
- // ZZ language: region is not a valid ccTLD
- // US <nil>
- // UK <nil>
- // UK <nil>
- // MM <nil>
-}
-
-func ExampleCompose() {
- nl, _ := language.ParseBase("nl")
- us, _ := language.ParseRegion("US")
- de := language.Make("de-1901-u-co-phonebk")
- jp := language.Make("ja-JP")
- fi := language.Make("fi-x-ing")
-
- u, _ := language.ParseExtension("u-nu-arabic")
- x, _ := language.ParseExtension("x-piglatin")
-
- // Combine a base language and region.
- fmt.Println(language.Compose(nl, us))
- // Combine a base language and extension.
- fmt.Println(language.Compose(nl, x))
- // Replace the region.
- fmt.Println(language.Compose(jp, us))
- // Combine several tags.
- fmt.Println(language.Compose(us, nl, u))
-
- // Replace the base language of a tag.
- fmt.Println(language.Compose(de, nl))
- fmt.Println(language.Compose(de, nl, u))
- // Remove the base language.
- fmt.Println(language.Compose(de, language.Base{}))
- // Remove all variants.
- fmt.Println(language.Compose(de, []language.Variant{}))
- // Remove all extensions.
- fmt.Println(language.Compose(de, []language.Extension{}))
- fmt.Println(language.Compose(fi, []language.Extension{}))
- // Remove all variants and extensions.
- fmt.Println(language.Compose(de.Raw()))
-
- // An error is gobbled or returned if non-nil.
- fmt.Println(language.Compose(language.ParseRegion("ZA")))
- fmt.Println(language.Compose(language.ParseRegion("HH")))
-
- // Compose uses the same Default canonicalization as Make.
- fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))
-
- // Call compose on a different CanonType for different results.
- fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))
-
- // Output:
- // nl-US <nil>
- // nl-x-piglatin <nil>
- // ja-US <nil>
- // nl-US-u-nu-arabic <nil>
- // nl-1901-u-co-phonebk <nil>
- // nl-1901-u-co-phonebk-nu-arabic <nil>
- // und-1901-u-co-phonebk <nil>
- // de-u-co-phonebk <nil>
- // de-1901 <nil>
- // fi <nil>
- // de <nil>
- // und-ZA <nil>
- // und language: subtag "HH" is well-formed but unknown
- // en-Latn-GB <nil>
- // en-GB <nil>
-}
-
-func ExampleParse_errors() {
- for _, s := range []string{"Foo", "Bar", "Foobar"} {
- _, err := language.Parse(s)
- if err != nil {
- if inv, ok := err.(language.ValueError); ok {
- fmt.Println(inv.Subtag())
- } else {
- fmt.Println(s)
- }
- }
- }
- for _, s := range []string{"en", "aa-Uuuu", "AC", "ac-u"} {
- _, err := language.Parse(s)
- switch e := err.(type) {
- case language.ValueError:
- fmt.Printf("%s: culprit %q\n", s, e.Subtag())
- case nil:
- // No error.
- default:
- // A syntax error.
- fmt.Printf("%s: ill-formed\n", s)
- }
- }
- // Output:
- // foo
- // Foobar
- // aa-Uuuu: culprit "Uuuu"
- // AC: culprit "ac"
- // ac-u: ill-formed
-}
-
-func ExampleParent() {
- p := func(tag string) {
- fmt.Printf("parent(%v): %v\n", tag, language.Make(tag).Parent())
- }
- p("zh-CN")
-
- // Australian English inherits from World English.
- p("en-AU")
-
- // If the tag has a different maximized script from its parent, a tag with
- // this maximized script is inserted. This allows different language tags
- // which have the same base language and script in common to inherit from
- // a common set of settings.
- p("zh-HK")
-
- // If the maximized script of the parent is not identical, CLDR will skip
- // inheriting from it, as it means there will not be many entries in common
- // and inheriting from it is nonsensical.
- p("zh-Hant")
-
- // The parent of a tag with variants and extensions is the tag with all
- // variants and extensions removed.
- p("de-1994-u-co-phonebk")
-
- // Remove default script.
- p("de-Latn-LU")
-
- // Output:
- // parent(zh-CN): zh
- // parent(en-AU): en-001
- // parent(zh-HK): zh-Hant
- // parent(zh-Hant): und
- // parent(de-1994-u-co-phonebk): de
- // parent(de-Latn-LU): de
-}
-
-// ExampleMatcher_bestMatch gives some examples of getting the best match of
-// a set of tags to any of the tags of given set.
-func ExampleMatcher() {
- // This is the set of tags from which we want to pick the best match. These
- // can be, for example, the supported languages for some package.
- tags := []language.Tag{
- language.English,
- language.BritishEnglish,
- language.French,
- language.Afrikaans,
- language.BrazilianPortuguese,
- language.EuropeanPortuguese,
- language.Croatian,
- language.SimplifiedChinese,
- language.Raw.Make("iw-IL"),
- language.Raw.Make("iw"),
- language.Raw.Make("he"),
- }
- m := language.NewMatcher(tags)
-
- // A simple match.
- fmt.Println(m.Match(language.Make("fr")))
-
- // Australian English is closer to British than American English.
- fmt.Println(m.Match(language.Make("en-AU")))
-
- // Default to the first tag passed to the Matcher if there is no match.
- fmt.Println(m.Match(language.Make("ar")))
-
- // Get the default tag.
- fmt.Println(m.Match())
-
- fmt.Println("----")
-
- // Someone specifying sr-Latn is probably fine with getting Croatian.
- fmt.Println(m.Match(language.Make("sr-Latn")))
-
- // We match SimplifiedChinese, but with Low confidence.
- fmt.Println(m.Match(language.TraditionalChinese))
-
- // Serbian in Latin script is a closer match to Croatian than Traditional
- // Chinese to Simplified Chinese.
- fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn")))
-
- fmt.Println("----")
-
- // In case a multiple variants of a language are available, the most spoken
- // variant is typically returned.
- fmt.Println(m.Match(language.Portuguese))
-
- // Pick the first value passed to Match in case of a tie.
- fmt.Println(m.Match(language.Dutch, language.Make("fr-BE"), language.Make("af-NA")))
- fmt.Println(m.Match(language.Dutch, language.Make("af-NA"), language.Make("fr-BE")))
-
- fmt.Println("----")
-
- // If a Matcher is initialized with a language and it's deprecated version,
- // it will distinguish between them.
- fmt.Println(m.Match(language.Raw.Make("iw")))
-
- // However, for non-exact matches, it will treat deprecated versions as
- // equivalent and consider other factors first.
- fmt.Println(m.Match(language.Raw.Make("he-IL")))
-
- fmt.Println("----")
-
- // User settings passed to the Unicode extension are ignored for matching
- // and preserved in the returned tag.
- fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("fr-u-cu-frf")))
-
- // Even if the matching language is different.
- fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("br-u-cu-frf")))
-
- // If there is no matching language, the options of the first preferred tag are used.
- fmt.Println(m.Match(language.Make("de-u-co-phonebk")))
-
- // Output:
- // fr 2 Exact
- // en-GB 1 High
- // en 0 No
- // en 0 No
- // ----
- // hr 6 High
- // zh-Hans 7 Low
- // hr 6 High
- // ----
- // pt-BR 4 High
- // fr 2 High
- // af 3 High
- // ----
- // iw 9 Exact
- // he 10 Exact
- // ----
- // fr-u-cu-frf 2 Exact
- // fr-u-cu-frf 2 High
- // en-u-co-phonebk 0 No
-
- // TODO: "he" should be "he-u-rg-IL High"
-}
-
-func ExampleMatchStrings() {
- // languages supported by this service:
- matcher := language.NewMatcher([]language.Tag{
- language.English, language.Dutch, language.German,
- })
-
- http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
- lang, _ := r.Cookie("lang")
- tag, _ := language.MatchStrings(matcher, lang.String(), r.Header.Get("Accept-Language"))
-
- fmt.Println("User language:", tag)
- })
-}
-
-func ExampleComprehends() {
- // Various levels of comprehensibility.
- fmt.Println(language.Comprehends(language.English, language.English))
- fmt.Println(language.Comprehends(language.AmericanEnglish, language.BritishEnglish))
-
- // An explicit Und results in no match.
- fmt.Println(language.Comprehends(language.English, language.Und))
-
- fmt.Println("----")
-
- // There is usually no mutual comprehensibility between different scripts.
- fmt.Println(language.Comprehends(language.Make("en-Dsrt"), language.English))
-
- // One exception is for Traditional versus Simplified Chinese, albeit with
- // a low confidence.
- fmt.Println(language.Comprehends(language.TraditionalChinese, language.SimplifiedChinese))
-
- fmt.Println("----")
-
- // A Swiss German speaker will often understand High German.
- fmt.Println(language.Comprehends(language.Make("gsw"), language.Make("de")))
-
- // The converse is not generally the case.
- fmt.Println(language.Comprehends(language.Make("de"), language.Make("gsw")))
-
- // Output:
- // Exact
- // High
- // No
- // ----
- // No
- // Low
- // ----
- // High
- // No
-}
-
-func ExampleTag_values() {
- us := language.MustParseRegion("US")
- en := language.MustParseBase("en")
-
- lang, _, region := language.AmericanEnglish.Raw()
- fmt.Println(lang == en, region == us)
-
- lang, _, region = language.BritishEnglish.Raw()
- fmt.Println(lang == en, region == us)
-
- // Tags can be compared for exact equivalence using '=='.
- en_us, _ := language.Compose(en, us)
- fmt.Println(en_us == language.AmericanEnglish)
-
- // Output:
- // true true
- // true false
- // true
-}
diff --git a/internal/language/compact/gen.go b/internal/language/compact/gen.go
index 5190040..0c36a05 100644
--- a/internal/language/compact/gen.go
+++ b/internal/language/compact/gen.go
@@ -12,14 +12,9 @@
import (
"flag"
"fmt"
- "io"
"log"
- "sort"
- "strconv"
- "strings"
"golang.org/x/text/internal/gen"
- "golang.org/x/text/internal/language"
"golang.org/x/text/unicode/cldr"
)
@@ -36,37 +31,22 @@
gen.Init()
w := gen.NewCodeWriter()
- defer w.WriteGoFile("tables.go", "language")
+ defer w.WriteGoFile("tables.go", "compact")
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
b := newBuilder(w)
gen.WriteCLDRVersion(w)
- b.writeConstants()
b.writeCompactIndex()
- b.writeMatchData()
}
type builder struct {
w *gen.CodeWriter
- hw io.Writer // MultiWriter for w and w.Hash
data *cldr.CLDR
supp *cldr.SupplementalData
}
-func (b *builder) langIndex(s string) uint16 {
- return uint16(language.MustParseBase(s))
-}
-
-func (b *builder) regionIndex(s string) int {
- return int(language.MustParseRegion(s))
-}
-
-func (b *builder) scriptIndex(s string) int {
- return int(language.MustParseScript(s))
-}
-
func newBuilder(w *gen.CodeWriter) *builder {
r := gen.OpenCLDRCoreZip()
defer r.Close()
@@ -77,233 +57,8 @@
}
b := builder{
w: w,
- hw: io.MultiWriter(w, w.Hash),
data: data,
supp: data.Supplemental(),
}
return &b
}
-
-// writeConsts computes f(v) for all v in values and writes the results
-// as constants named _v to a single constant block.
-func (b *builder) writeConsts(f func(string) int, values ...string) {
- fmt.Fprintln(b.w, "const (")
- for _, v := range values {
- fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
- }
- fmt.Fprintln(b.w, ")")
-}
-
-// TODO: region inclusion data will probably not be use used in future matchers.
-
-var langConsts = []string{
- "de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
-}
-
-var scriptConsts = []string{
- "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
- "Zzzz",
-}
-
-var regionConsts = []string{
- "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
- "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
-}
-
-// writeLanguage generates all tables needed for language canonicalization.
-func (b *builder) writeConstants() {
- b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
- b.writeConsts(b.regionIndex, regionConsts...)
- b.writeConsts(b.scriptIndex, scriptConsts...)
-}
-
-type mutualIntelligibility struct {
- want, have uint16
- distance uint8
- oneway bool
-}
-
-type scriptIntelligibility struct {
- wantLang, haveLang uint16
- wantScript, haveScript uint8
- distance uint8
- // Always oneway
-}
-
-type regionIntelligibility struct {
- lang uint16 // compact language id
- script uint8 // 0 means any
- group uint8 // 0 means any; if bit 7 is set it means inverse
- distance uint8
- // Always twoway.
-}
-
-// writeMatchData writes tables with languages and scripts for which there is
-// mutual intelligibility. The data is based on CLDR's languageMatching data.
-// Note that we use a different algorithm than the one defined by CLDR and that
-// we slightly modify the data. For example, we convert scores to confidence levels.
-// We also drop all region-related data as we use a different algorithm to
-// determine region equivalence.
-func (b *builder) writeMatchData() {
- lm := b.supp.LanguageMatching.LanguageMatches
- cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
-
- regionHierarchy := map[string][]string{}
- for _, g := range b.supp.TerritoryContainment.Group {
- regions := strings.Split(g.Contains, " ")
- regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
- }
- regionToGroups := make([]uint8, language.NumRegions)
-
- idToIndex := map[string]uint8{}
- for i, mv := range lm[0].MatchVariable {
- if i > 6 {
- log.Fatalf("Too many groups: %d", i)
- }
- idToIndex[mv.Id] = uint8(i + 1)
- // TODO: also handle '-'
- for _, r := range strings.Split(mv.Value, "+") {
- todo := []string{r}
- for k := 0; k < len(todo); k++ {
- r := todo[k]
- regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
- todo = append(todo, regionHierarchy[r]...)
- }
- }
- }
- b.w.WriteVar("regionToGroups", regionToGroups)
-
- // maps language id to in- and out-of-group region.
- paradigmLocales := [][3]uint16{}
- locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
- for i := 0; i < len(locales); i += 2 {
- x := [3]uint16{}
- for j := 0; j < 2; j++ {
- pc := strings.SplitN(locales[i+j], "-", 2)
- x[0] = b.langIndex(pc[0])
- if len(pc) == 2 {
- x[1+j] = uint16(b.regionIndex(pc[1]))
- }
- }
- paradigmLocales = append(paradigmLocales, x)
- }
- b.w.WriteVar("paradigmLocales", paradigmLocales)
-
- b.w.WriteType(mutualIntelligibility{})
- b.w.WriteType(scriptIntelligibility{})
- b.w.WriteType(regionIntelligibility{})
-
- matchLang := []mutualIntelligibility{}
- matchScript := []scriptIntelligibility{}
- matchRegion := []regionIntelligibility{}
- // Convert the languageMatch entries in lists keyed by desired language.
- for _, m := range lm[0].LanguageMatch {
- // Different versions of CLDR use different separators.
- desired := strings.Replace(m.Desired, "-", "_", -1)
- supported := strings.Replace(m.Supported, "-", "_", -1)
- d := strings.Split(desired, "_")
- s := strings.Split(supported, "_")
- if len(d) != len(s) {
- log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
- continue
- }
- distance, _ := strconv.ParseInt(m.Distance, 10, 8)
- switch len(d) {
- case 2:
- if desired == supported && desired == "*_*" {
- continue
- }
- // language-script pair.
- matchScript = append(matchScript, scriptIntelligibility{
- wantLang: uint16(b.langIndex(d[0])),
- haveLang: uint16(b.langIndex(s[0])),
- wantScript: uint8(b.scriptIndex(d[1])),
- haveScript: uint8(b.scriptIndex(s[1])),
- distance: uint8(distance),
- })
- if m.Oneway != "true" {
- matchScript = append(matchScript, scriptIntelligibility{
- wantLang: uint16(b.langIndex(s[0])),
- haveLang: uint16(b.langIndex(d[0])),
- wantScript: uint8(b.scriptIndex(s[1])),
- haveScript: uint8(b.scriptIndex(d[1])),
- distance: uint8(distance),
- })
- }
- case 1:
- if desired == supported && desired == "*" {
- continue
- }
- if distance == 1 {
- // nb == no is already handled by macro mapping. Check there
- // really is only this case.
- if d[0] != "no" || s[0] != "nb" {
- log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
- }
- continue
- }
- // TODO: consider dropping oneway field and just doubling the entry.
- matchLang = append(matchLang, mutualIntelligibility{
- want: uint16(b.langIndex(d[0])),
- have: uint16(b.langIndex(s[0])),
- distance: uint8(distance),
- oneway: m.Oneway == "true",
- })
- case 3:
- if desired == supported && desired == "*_*_*" {
- continue
- }
- if desired != supported {
- // This is now supported by CLDR, but only one case, which
- // should already be covered by paradigm locales. For instance,
- // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
- // testdata/CLDRLocaleMatcherTest.txt tests this.
- if supported != "en_*_GB" {
- log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
- }
- continue
- }
- ri := regionIntelligibility{
- lang: b.langIndex(d[0]),
- distance: uint8(distance),
- }
- if d[1] != "*" {
- ri.script = uint8(b.scriptIndex(d[1]))
- }
- switch {
- case d[2] == "*":
- ri.group = 0x80 // not contained in anything
- case strings.HasPrefix(d[2], "$!"):
- ri.group = 0x80
- d[2] = "$" + d[2][len("$!"):]
- fallthrough
- case strings.HasPrefix(d[2], "$"):
- ri.group |= idToIndex[d[2]]
- }
- matchRegion = append(matchRegion, ri)
- default:
- log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
- }
- }
- sort.SliceStable(matchLang, func(i, j int) bool {
- return matchLang[i].distance < matchLang[j].distance
- })
- b.w.WriteComment(`
- matchLang holds pairs of langIDs of base languages that are typically
- mutually intelligible. Each pair is associated with a confidence and
- whether the intelligibility goes one or both ways.`)
- b.w.WriteVar("matchLang", matchLang)
-
- b.w.WriteComment(`
- matchScript holds pairs of scriptIDs where readers of one script
- can typically also read the other. Each is associated with a confidence.`)
- sort.SliceStable(matchScript, func(i, j int) bool {
- return matchScript[i].distance < matchScript[j].distance
- })
- b.w.WriteVar("matchScript", matchScript)
-
- sort.SliceStable(matchRegion, func(i, j int) bool {
- return matchRegion[i].distance < matchRegion[j].distance
- })
- b.w.WriteVar("matchRegion", matchRegion)
-}
diff --git a/internal/language/compact/gen_index.go b/internal/language/compact/gen_index.go
index 2a84a91..475ca39 100644
--- a/internal/language/compact/gen_index.go
+++ b/internal/language/compact/gen_index.go
@@ -91,10 +91,10 @@
fmt.Fprintln(w, "const (")
for i, t := range coreTags {
- fmt.Fprintf(w, "%s compactID = %d\n", ident(t.Tag().String()), i)
+ fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
}
for i, t := range special {
- fmt.Fprintf(w, "%s compactID = %d\n", ident(t), i+len(coreTags))
+ fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
}
fmt.Fprintln(w, ")")
diff --git a/internal/language/compact/go1_1.go b/internal/language/compact/go1_1.go
deleted file mode 100644
index 380f4c0..0000000
--- a/internal/language/compact/go1_1.go
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !go1.2
-
-package language
-
-import "sort"
-
-func sortStable(s sort.Interface) {
- ss := stableSort{
- s: s,
- pos: make([]int, s.Len()),
- }
- for i := range ss.pos {
- ss.pos[i] = i
- }
- sort.Sort(&ss)
-}
-
-type stableSort struct {
- s sort.Interface
- pos []int
-}
-
-func (s *stableSort) Len() int {
- return len(s.pos)
-}
-
-func (s *stableSort) Less(i, j int) bool {
- return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
-}
-
-func (s *stableSort) Swap(i, j int) {
- s.s.Swap(i, j)
- s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
-}
diff --git a/internal/language/compact/go1_2.go b/internal/language/compact/go1_2.go
deleted file mode 100644
index 38268c5..0000000
--- a/internal/language/compact/go1_2.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build go1.2
-
-package language
-
-import "sort"
-
-var sortStable = sort.Stable
diff --git a/internal/language/compact/httpexample_test.go b/internal/language/compact/httpexample_test.go
deleted file mode 100644
index 03c0ab9..0000000
--- a/internal/language/compact/httpexample_test.go
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language_test
-
-import (
- "fmt"
- "net/http"
- "strings"
-
- "golang.org/x/text/language"
-)
-
-// matcher is a language.Matcher configured for all supported languages.
-var matcher = language.NewMatcher([]language.Tag{
- language.BritishEnglish,
- language.Norwegian,
- language.German,
-})
-
-// handler is a http.HandlerFunc.
-func handler(w http.ResponseWriter, r *http.Request) {
- t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
- // We ignore the error: the default language will be selected for t == nil.
- tag, _, _ := matcher.Match(t...)
- fmt.Printf("%17v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
-}
-
-func ExampleParseAcceptLanguage() {
- for _, al := range []string{
- "nn;q=0.3, en-us;q=0.8, en,",
- "gsw, en;q=0.7, en-US;q=0.8",
- "gsw, nl, da",
- "invalid",
- } {
- // Create dummy request with Accept-Language set and pass it to handler.
- r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
- r.Header.Set("Accept-Language", al)
- handler(nil, r)
- }
-
- // Output:
- // en-GB (t: [ en en-US nn]; q: [ 1 0.8 0.3]; err: <nil>)
- // en-GB-u-rg-uszzzz (t: [ gsw en-US en]; q: [ 1 0.8 0.7]; err: <nil>)
- // de (t: [ gsw nl da]; q: [ 1 1 1]; err: <nil>)
- // en-GB (t: []; q: []; err: language: tag is not well-formed)
-}
diff --git a/internal/language/compact/language.go b/internal/language/compact/language.go
index c4855b5..415b55e 100644
--- a/internal/language/compact/language.go
+++ b/internal/language/compact/language.go
@@ -4,7 +4,7 @@
//go:generate go run gen.go gen_index.go -output tables.go
-package language
+package compact
// TODO: Remove above NOTE after:
// - verifying that tables are dropped correctly (most notably matcher tables).
@@ -19,33 +19,36 @@
// specific language or locale. All language tag values are guaranteed to be
// well-formed.
type Tag struct {
- language compactID
- locale compactID
+ language ID
+ locale ID
full fullTag // always a language.Tag for now.
}
+const _und = 0
+
type fullTag interface {
IsRoot() bool
Parent() language.Tag
}
-func makeTag(t language.Tag) (tag Tag) {
+// Make a compact Tag from a fully specified internal language Tag.
+func Make(t language.Tag) (tag Tag) {
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
if r, err := language.ParseRegion(region[:2]); err == nil {
tFull := t
t, _ = t.SetTypeForKey("rg", "")
// TODO: should we not consider "va" for the language tag?
var exact1, exact2 bool
- tag.language, exact1 = compactIndex(t)
+ tag.language, exact1 = FromTag(t)
t.RegionID = r
- tag.locale, exact2 = compactIndex(t)
+ tag.locale, exact2 = FromTag(t)
if !exact1 || !exact2 {
tag.full = tFull
}
return tag
}
}
- lang, ok := compactIndex(t)
+ lang, ok := FromTag(t)
tag.language = lang
tag.locale = lang
if !ok {
@@ -54,53 +57,38 @@
return tag
}
-func (t *Tag) tag() language.Tag {
+// Tag returns an internal language Tag version of this tag.
+func (t Tag) Tag() language.Tag {
if t.full != nil {
return t.full.(language.Tag)
}
- tag := t.language.tag()
+ tag := t.language.Tag()
if t.language != t.locale {
- loc := t.locale.tag()
+ loc := t.locale.Tag()
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
}
return tag
}
-func (t *Tag) mayHaveVariants() bool {
+// IsCompact reports whether this tag is fully defined in terms of ID.
+func (t *Tag) IsCompact() bool {
+ return t.full == nil
+}
+
+// MayHaveVariants reports whether a tag may have variants. If it returns false
+// it is guaranteed the tag does not have variants.
+func (t Tag) MayHaveVariants() bool {
return t.full != nil || int(t.language) >= len(coreTags)
}
-func (t *Tag) mayHaveExtensions() bool {
+// MayHaveExtensions reports whether a tag may have extensions. If it returns
+// false it is guaranteed the tag does not have them.
+func (t Tag) MayHaveExtensions() bool {
return t.full != nil ||
int(t.language) >= len(coreTags) ||
t.language != t.locale
}
-// TODO: improve performance.
-func (t *Tag) lang() language.Language { return t.tag().LangID }
-func (t *Tag) region() language.Region { return t.tag().RegionID }
-func (t *Tag) script() language.Script { return t.tag().ScriptID }
-
-// Make is a convenience wrapper for Parse that omits the error.
-// In case of an error, a sensible default is returned.
-func Make(s string) Tag {
- return Default.Make(s)
-}
-
-// Make is a convenience wrapper for c.Parse that omits the error.
-// In case of an error, a sensible default is returned.
-func (c CanonType) Make(s string) Tag {
- t, _ := c.Parse(s)
- return t
-}
-
-// Raw returns the raw base language, script and region, without making an
-// attempt to infer their values.
-func (t Tag) Raw() (b Base, s Script, r Region) {
- tt := t.tag()
- return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
-}
-
// IsRoot returns true if t is equal to language "und".
func (t Tag) IsRoot() bool {
if t.full != nil {
@@ -109,281 +97,12 @@
return t.language == _und
}
-// CanonType can be used to enable or disable various types of canonicalization.
-type CanonType int
-
-const (
- // Replace deprecated base languages with their preferred replacements.
- DeprecatedBase CanonType = 1 << iota
- // Replace deprecated scripts with their preferred replacements.
- DeprecatedScript
- // Replace deprecated regions with their preferred replacements.
- DeprecatedRegion
- // Remove redundant scripts.
- SuppressScript
- // Normalize legacy encodings. This includes legacy languages defined in
- // CLDR as well as bibliographic codes defined in ISO-639.
- Legacy
- // Map the dominant language of a macro language group to the macro language
- // subtag. For example cmn -> zh.
- Macro
- // The CLDR flag should be used if full compatibility with CLDR is required.
- // There are a few cases where language.Tag may differ from CLDR. To follow all
- // of CLDR's suggestions, use All|CLDR.
- CLDR
-
- // Raw can be used to Compose or Parse without Canonicalization.
- Raw CanonType = 0
-
- // Replace all deprecated tags with their preferred replacements.
- Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
-
- // All canonicalizations recommended by BCP 47.
- BCP47 = Deprecated | SuppressScript
-
- // All canonicalizations.
- All = BCP47 | Legacy | Macro
-
- // Default is the canonicalization used by Parse, Make and Compose. To
- // preserve as much information as possible, canonicalizations that remove
- // potentially valuable information are not included. The Matcher is
- // designed to recognize similar tags that would be the same if
- // they were canonicalized using All.
- Default = Deprecated | Legacy
-
- canonLang = DeprecatedBase | Legacy | Macro
-
- // TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
-)
-
-// canonicalize returns the canonicalized equivalent of the tag and
-// whether there was any change.
-func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
- if c == Raw {
- return t, false
- }
- changed := false
- if c&SuppressScript != 0 {
- if t.LangID.SuppressScript() == t.ScriptID {
- t.ScriptID = 0
- changed = true
- }
- }
- if c&canonLang != 0 {
- for {
- if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
- switch aliasType {
- case language.Legacy:
- if c&Legacy != 0 {
- if t.LangID == _sh && t.ScriptID == 0 {
- t.ScriptID = _Latn
- }
- t.LangID = l
- changed = true
- }
- case language.Macro:
- if c&Macro != 0 {
- // We deviate here from CLDR. The mapping "nb" -> "no"
- // qualifies as a typical Macro language mapping. However,
- // for legacy reasons, CLDR maps "no", the macro language
- // code for Norwegian, to the dominant variant "nb". This
- // change is currently under consideration for CLDR as well.
- // See http://unicode.org/cldr/trac/ticket/2698 and also
- // http://unicode.org/cldr/trac/ticket/1790 for some of the
- // practical implications. TODO: this check could be removed
- // if CLDR adopts this change.
- if c&CLDR == 0 || t.LangID != _nb {
- changed = true
- t.LangID = l
- }
- }
- case language.Deprecated:
- if c&DeprecatedBase != 0 {
- if t.LangID == _mo && t.RegionID == 0 {
- t.RegionID = _MD
- }
- t.LangID = l
- changed = true
- // Other canonicalization types may still apply.
- continue
- }
- }
- } else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
- t.LangID = _nb
- changed = true
- }
- break
- }
- }
- if c&DeprecatedScript != 0 {
- if t.ScriptID == _Qaai {
- changed = true
- t.ScriptID = _Zinh
- }
- }
- if c&DeprecatedRegion != 0 {
- if r := t.RegionID.Canonicalize(); r != t.RegionID {
- changed = true
- t.RegionID = r
- }
- }
- return t, changed
-}
-
-// Canonicalize returns the canonicalized equivalent of the tag.
-func (c CanonType) Canonicalize(t Tag) (Tag, error) {
- // First try fast path.
- if t.full == nil {
- if _, changed := canonicalize(c, t.language.tag()); !changed {
- return t, nil
- }
- }
- // It is unlikely that one will canonicalize a tag after matching. So do
- // a slow but simple approach here.
- if tag, changed := canonicalize(c, t.tag()); changed {
- tag.RemakeString()
- return makeTag(tag), nil
- }
- return t, nil
-
-}
-
-// Confidence indicates the level of certainty for a given return value.
-// For example, Serbian may be written in Cyrillic or Latin script.
-// The confidence level indicates whether a value was explicitly specified,
-// whether it is typically the only possible value, or whether there is
-// an ambiguity.
-type Confidence int
-
-const (
- No Confidence = iota // full confidence that there was no match
- Low // most likely value picked out of a set of alternatives
- High // value is generally assumed to be the correct match
- Exact // exact match or explicitly specified value
-)
-
-var confName = []string{"No", "Low", "High", "Exact"}
-
-func (c Confidence) String() string {
- return confName[c]
-}
-
-// String returns the canonical string representation of the language tag.
-func (t Tag) String() string {
- return t.tag().String()
-}
-
-// MarshalText implements encoding.TextMarshaler.
-func (t Tag) MarshalText() (text []byte, err error) {
- return t.tag().MarshalText()
-}
-
-// UnmarshalText implements encoding.TextUnmarshaler.
-func (t *Tag) UnmarshalText(text []byte) error {
- var tag language.Tag
- err := tag.UnmarshalText(text)
- *t = makeTag(tag)
- return err
-}
-
-// Base returns the base language of the language tag. If the base language is
-// unspecified, an attempt will be made to infer it from the context.
-// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
-func (t Tag) Base() (Base, Confidence) {
- if b := t.lang(); b != 0 {
- return Base{b}, Exact
- }
- tt := t.tag()
- c := High
- if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
- c = Low
- }
- if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
- return Base{tag.LangID}, c
- }
- return Base{0}, No
-}
-
-// Script infers the script for the language tag. If it was not explicitly given, it will infer
-// a most likely candidate.
-// If more than one script is commonly used for a language, the most likely one
-// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
-// for Serbian.
-// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
-// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
-// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
-// See http://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
-// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
-// Note that an inferred script is never guaranteed to be the correct one. Latin is
-// almost exclusively used for Afrikaans, but Arabic has been used for some texts
-// in the past. Also, the script that is commonly used may change over time.
-// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
-func (t Tag) Script() (Script, Confidence) {
- if scr := t.script(); scr != 0 {
- return Script{scr}, Exact
- }
- tt := t.tag()
- sc, c := language.Script(_Zzzz), No
- if scr := tt.LangID.SuppressScript(); scr != 0 {
- // Note: it is not always the case that a language with a suppress
- // script value is only written in one script (e.g. kk, ms, pa).
- if tt.RegionID == 0 {
- return Script{scr}, High
- }
- sc, c = scr, High
- }
- if tag, err := tt.Maximize(); err == nil {
- if tag.ScriptID != sc {
- sc, c = tag.ScriptID, Low
- }
- } else {
- tt, _ = canonicalize(Deprecated|Macro, tt)
- if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
- sc, c = tag.ScriptID, Low
- }
- }
- return Script{sc}, c
-}
-
-// Region returns the region for the language tag. If it was not explicitly given, it will
-// infer a most likely candidate from the context.
-// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
-func (t Tag) Region() (Region, Confidence) {
- if r := t.region(); r != 0 {
- return Region{r}, Exact
- }
- tt := t.tag()
- if tt, err := tt.Maximize(); err == nil {
- return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
- }
- tt, _ = canonicalize(Deprecated|Macro, tt)
- if tag, err := tt.Maximize(); err == nil {
- return Region{tag.RegionID}, Low
- }
- return Region{_ZZ}, No // TODO: return world instead of undetermined?
-}
-
-// Variants returns the variants specified explicitly for this language tag.
-// or nil if no variant was specified.
-func (t Tag) Variants() []Variant {
- if !t.mayHaveVariants() {
- return nil
- }
- v := []Variant{}
- x, str := "", t.tag().Variants()
- for str != "" {
- x, str = nextToken(str)
- v = append(v, Variant{x})
- }
- return v
-}
-
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
// specific language are substituted with fields from the parent language.
// The parent for a language may change for newer versions of CLDR.
func (t Tag) Parent() Tag {
if t.full != nil {
- return makeTag(t.full.Parent())
+ return Make(t.full.Parent())
}
if t.language != t.locale {
// Simulate stripping -u-rg-xxxxxx
@@ -393,7 +112,7 @@
// removed. Probably by internalizing the table and declaring this fast
// enough.
// lang := compactID(internal.Parent(uint16(t.language)))
- lang, _ := compactIndex(t.language.tag().Parent())
+ lang, _ := FromTag(t.language.Tag().Parent())
return Tag{language: lang, locale: lang}
}
@@ -407,144 +126,78 @@
return s[1:p], s[p:]
}
-// Extension is a single BCP 47 extension.
-type Extension struct {
- s string
-}
-
-// String returns the string representation of the extension, including the
-// type tag.
-func (e Extension) String() string {
- return e.s
-}
-
-// ParseExtension parses s as an extension and returns it on success.
-func ParseExtension(s string) (e Extension, err error) {
- ext, err := language.ParseExtension(s)
- return Extension{ext}, err
-}
-
-// Type returns the one-byte extension type of e. It returns 0 for the zero
-// exception.
-func (e Extension) Type() byte {
- if e.s == "" {
- return 0
- }
- return e.s[0]
-}
-
-// Tokens returns the list of tokens of e.
-func (e Extension) Tokens() []string {
- return strings.Split(e.s, "-")
-}
-
-// Extension returns the extension of type x for tag t. It will return
-// false for ok if t does not have the requested extension. The returned
-// extension will be invalid in this case.
-func (t Tag) Extension(x byte) (ext Extension, ok bool) {
- if !t.mayHaveExtensions() {
- return Extension{}, false
- }
- e, ok := t.tag().Extension(x)
- return Extension{e}, ok
-}
-
-// Extensions returns all extensions of t.
-func (t Tag) Extensions() []Extension {
- if !t.mayHaveExtensions() {
- return nil
- }
- e := []Extension{}
- for _, ext := range t.tag().Extensions() {
- e = append(e, Extension{ext})
- }
- return e
-}
-
-// TypeForKey returns the type associated with the given key, where key and type
-// are of the allowed values defined for the Unicode locale extension ('u') in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// TypeForKey will traverse the inheritance chain to get the correct value.
-func (t Tag) TypeForKey(key string) string {
- if !t.mayHaveExtensions() {
- if key != "rg" && key != "va" {
- return ""
- }
- }
- return t.tag().TypeForKey(key)
-}
-
-// SetTypeForKey returns a new Tag with the key set to type, where key and type
-// are of the allowed values defined for the Unicode locale extension ('u') in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// An empty value removes an existing pair with the same key.
-func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
- tt, err := t.tag().SetTypeForKey(key, value)
- return makeTag(tt), err
-}
-
-// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
+// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
// for which data exists in the text repository.The index will change over time
// and should not be stored in persistent storage. If t does not match a compact
// index, exact will be false and the compact index will be returned for the
// first match after repeatedly taking the Parent of t.
-func CompactIndex(t Tag) (index int, exact bool) {
- return int(t.language), t.full == nil
+func LanguageID(t Tag) (id ID, exact bool) {
+ return t.language, t.full == nil
}
// TODO: make these functions and methods public once we settle on the API and
//
-// regionalCompactIndex returns the CompactIndex for the regional variant of this
-// tag. This index is used to indicate region-specific overrides, such as
-// default currency, default calendar and week data, default time cycle, and
-// default measurement system and unit preferences.
+// RegionalID returns the ID for the regional variant of this tag. This index is
+// used to indicate region-specific overrides, such as default currency, default
+// calendar and week data, default time cycle, and default measurement system
+// and unit preferences.
//
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
// settings for currency, number formatting, etc. The CompactIndex for this tag
-// will be that for en-GB, while the regionalCompactIndex will be the one
-// corresponding to en-US.
-func regionalCompactIndex(t Tag) (index int, exact bool) {
- return int(t.locale), t.full == nil
+// will be that for en-GB, while the RegionalID will be the one corresponding to
+// en-US.
+func RegionalID(t Tag) (id ID, exact bool) {
+ return t.locale, t.full == nil
}
-// languageTag returns t stripped of regional variant indicators.
+// LanguageTag returns t stripped of regional variant indicators.
//
// At the moment this means it is stripped of a regional and variant subtag "rg"
// and "va" in the "u" extension.
-func (t Tag) languageTag() Tag {
+func (t Tag) LanguageTag() Tag {
if t.full == nil {
return Tag{language: t.language, locale: t.language}
}
- tt := t.tag()
+ tt := t.Tag()
tt.SetTypeForKey("rg", "")
tt.SetTypeForKey("va", "")
- return makeTag(tt)
+ return Make(tt)
}
-// regionalTag returns the regional variant of the tag.
+// RegionalTag returns the regional variant of the tag.
//
// At the moment this means that the region is set from the regional subtag
// "rg" in the "u" extension.
-func (t Tag) regionalTag() Tag {
+func (t Tag) RegionalTag() Tag {
rt := Tag{language: t.locale, locale: t.locale}
if t.full == nil {
return rt
}
- t, _ = Raw.Compose(rt, t.Variants(), t.Extensions())
- t, _ = t.SetTypeForKey("rg", "")
+ b := language.Builder{}
+ tag := t.Tag()
+ // tag, _ = tag.SetTypeForKey("rg", "")
+ b.SetTag(t.locale.Tag())
+ if v := tag.Variants(); v != "" {
+ for _, v := range strings.Split(v, "-") {
+ b.AddVariant(v)
+ }
+ }
+ for _, e := range tag.Extensions() {
+ b.AddExt(e)
+ }
return t
}
-func compactIndex(t language.Tag) (index compactID, exact bool) {
+// FromTag reports closest matching ID for an internal language Tag.
+func FromTag(t language.Tag) (id ID, exact bool) {
// TODO: perhaps give more frequent tags a lower index.
// TODO: we could make the indexes stable. This will excluded some
// possibilities for optimization, so don't do this quite yet.
exact = true
b, s, r := t.Raw()
- switch {
- case t.HasString():
+ if t.HasString() {
if t.IsPrivateUse() {
// We have no entries for user-defined tags.
return 0, false
@@ -577,7 +230,7 @@
// We have some variants.
for i, s := range specialTags {
if s == t {
- return compactID(i + len(coreTags)), exact
+ return ID(i + len(coreTags)), exact
}
}
exact = false
@@ -606,157 +259,3 @@
}
var root = language.Tag{}
-
-// Base is an ISO 639 language code, used for encoding the base language
-// of a language tag.
-type Base struct {
- langID language.Language
-}
-
-// ParseBase parses a 2- or 3-letter ISO 639 code.
-// It returns a ValueError if s is a well-formed but unknown language identifier
-// or another error if another error occurred.
-func ParseBase(s string) (Base, error) {
- l, err := language.ParseBase(s)
- return Base{l}, err
-}
-
-// String returns the BCP 47 representation of the base language.
-func (b Base) String() string {
- return b.langID.String()
-}
-
-// ISO3 returns the ISO 639-3 language code.
-func (b Base) ISO3() string {
- return b.langID.ISO3()
-}
-
-// IsPrivateUse reports whether this language code is reserved for private use.
-func (b Base) IsPrivateUse() bool {
- return b.langID.IsPrivateUse()
-}
-
-// Script is a 4-letter ISO 15924 code for representing scripts.
-// It is idiomatically represented in title case.
-type Script struct {
- scriptID language.Script
-}
-
-// ParseScript parses a 4-letter ISO 15924 code.
-// It returns a ValueError if s is a well-formed but unknown script identifier
-// or another error if another error occurred.
-func ParseScript(s string) (Script, error) {
- sc, err := language.ParseScript(s)
- return Script{sc}, err
-}
-
-// String returns the script code in title case.
-// It returns "Zzzz" for an unspecified script.
-func (s Script) String() string {
- return s.scriptID.String()
-}
-
-// IsPrivateUse reports whether this script code is reserved for private use.
-func (s Script) IsPrivateUse() bool {
- return s.scriptID.IsPrivateUse()
-}
-
-// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
-type Region struct {
- regionID language.Region
-}
-
-// EncodeM49 returns the Region for the given UN M.49 code.
-// It returns an error if r is not a valid code.
-func EncodeM49(r int) (Region, error) {
- rid, err := language.EncodeM49(r)
- return Region{rid}, err
-}
-
-// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
-// It returns a ValueError if s is a well-formed but unknown region identifier
-// or another error if another error occurred.
-func ParseRegion(s string) (Region, error) {
- r, err := language.ParseRegion(s)
- return Region{r}, err
-}
-
-// String returns the BCP 47 representation for the region.
-// It returns "ZZ" for an unspecified region.
-func (r Region) String() string {
- return r.regionID.String()
-}
-
-// ISO3 returns the 3-letter ISO code of r.
-// Note that not all regions have a 3-letter ISO code.
-// In such cases this method returns "ZZZ".
-func (r Region) ISO3() string {
- return r.regionID.String()
-}
-
-// M49 returns the UN M.49 encoding of r, or 0 if this encoding
-// is not defined for r.
-func (r Region) M49() int {
- return r.regionID.M49()
-}
-
-// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
-// may include private-use tags that are assigned by CLDR and used in this
-// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
-func (r Region) IsPrivateUse() bool {
- return r.regionID.IsPrivateUse()
-}
-
-// IsCountry returns whether this region is a country or autonomous area. This
-// includes non-standard definitions from CLDR.
-func (r Region) IsCountry() bool {
- return r.regionID.IsCountry()
-}
-
-// IsGroup returns whether this region defines a collection of regions. This
-// includes non-standard definitions from CLDR.
-func (r Region) IsGroup() bool {
- return r.regionID.IsGroup()
-}
-
-// Contains returns whether Region c is contained by Region r. It returns true
-// if c == r.
-func (r Region) Contains(c Region) bool {
- return r.regionID.Contains(c.regionID)
-}
-
-// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
-// In all other cases it returns either the region itself or an error.
-//
-// This method may return an error for a region for which there exists a
-// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
-// region will already be canonicalized it was obtained from a Tag that was
-// obtained using any of the default methods.
-func (r Region) TLD() (Region, error) {
- tld, err := r.regionID.TLD()
- return Region{tld}, err
-}
-
-// Canonicalize returns the region or a possible replacement if the region is
-// deprecated. It will not return a replacement for deprecated regions that
-// are split into multiple regions.
-func (r Region) Canonicalize() Region {
- return Region{r.regionID.Canonicalize()}
-}
-
-// Variant represents a registered variant of a language as defined by BCP 47.
-type Variant struct {
- variant string
-}
-
-// ParseVariant parses and returns a Variant. An error is returned if s is not
-// a valid variant.
-func ParseVariant(s string) (Variant, error) {
- v, err := language.ParseVariant(s)
- return Variant{v.String()}, err
-}
-
-// String returns the string representation of the variant.
-func (v Variant) String() string {
- return v.variant
-}
diff --git a/internal/language/compact/language_test.go b/internal/language/compact/language_test.go
index 20bc48c..4b1eea1 100644
--- a/internal/language/compact/language_test.go
+++ b/internal/language/compact/language_test.go
@@ -2,13 +2,23 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package language
+package compact
import (
"reflect"
"testing"
+
+ "golang.org/x/text/internal/language"
)
+func mustParse(s string) Tag {
+ t, err := language.Parse(s)
+ if err != nil {
+ panic(err)
+ }
+ return Make(t)
+}
+
func TestTagSize(t *testing.T) {
id := Tag{}
typ := reflect.TypeOf(id)
@@ -17,50 +27,20 @@
}
}
-func TestIsRoot(t *testing.T) {
- loc := Tag{}
- if !loc.IsRoot() {
- t.Errorf("unspecified should be root.")
- }
- for i, tt := range parseTests() {
- loc, _ := Parse(tt.in)
- undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
- if loc.IsRoot() != undef {
- t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
- }
- }
-}
-
func TestEquality(t *testing.T) {
for i, tt := range parseTests() {
s := tt.in
- tag := Make(s)
- t1 := Make(tag.String())
+ tag := mk(s)
+ t1 := mustParse(tag.Tag().String())
if tag != t1 {
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
}
- t2, _ := Compose(tag)
- if tag != t2 {
- t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
- }
- }
-}
-
-func TestString(t *testing.T) {
- tests := []string{
- "no-u-rg-dkzzzz",
- }
- for i, s := range tests {
- tag := Make(s)
- if tag.String() != s {
- t.Errorf("%d:%s: got %s: want %s (%#v)", i, s, tag.String(), s, tag)
- }
}
}
type compactTest struct {
tag string
- index compactID
+ index ID
ok bool
}
@@ -89,7 +69,7 @@
{"sh", shIndex, true}, // From plural rules.
}
-func TestCompactIndex(t *testing.T) {
+func TestLanguageID(t *testing.T) {
tests := append(compactTests, []compactTest{
{"en-GB", enGBIndex, true},
{"en-GB-u-rg-uszzzz", enGBIndex, true},
@@ -101,14 +81,14 @@
{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
}...)
for _, tt := range tests {
- x, ok := CompactIndex(Raw.MustParse(tt.tag))
- if compactID(x) != tt.index || ok != tt.ok {
+ x, ok := LanguageID(mustParse(tt.tag))
+ if ID(x) != tt.index || ok != tt.ok {
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
}
}
}
-func TestRegionalCompactIndex(t *testing.T) {
+func TestRegionalID(t *testing.T) {
tests := append(compactTests, []compactTest{
{"en-GB", enGBIndex, true},
{"en-GB-u-rg-uszzzz", enUSIndex, true},
@@ -122,516 +102,13 @@
{"en-GB-u-co-phonebk-rg-usz", enGBIndex, false},
}...)
for _, tt := range tests {
- x, ok := regionalCompactIndex(Raw.MustParse(tt.tag))
- if compactID(x) != tt.index || ok != tt.ok {
+ x, ok := RegionalID(mustParse(tt.tag))
+ if ID(x) != tt.index || ok != tt.ok {
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
}
}
}
-func TestMarshal(t *testing.T) {
- testCases := []string{
- // TODO: these values will change with each CLDR update. This issue
- // will be solved if we decide to fix the indexes.
- "und",
- "ca-ES-valencia",
- "ca-ES-valencia-u-va-posix",
- "ca-ES-valencia-u-co-phonebk",
- "ca-ES-valencia-u-co-phonebk-va-posix",
- "x-klingon",
- "en-US",
- "en-US-u-va-posix",
- "en",
- "en-u-co-phonebk",
- "en-001",
- "sh",
-
- "en-GB-u-rg-uszzzz",
- "en-GB-u-rg-uszzzz-va-posix",
- "en-GB-u-co-phonebk-rg-uszzzz",
- // Invalid tags should also roundtrip.
- "en-GB-u-co-phonebk-rg-uszz",
- }
- for _, tc := range testCases {
- var tag Tag
- err := tag.UnmarshalText([]byte(tc))
- if err != nil {
- t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
- }
- b, err := tag.MarshalText()
- if err != nil {
- t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
- }
- if got := string(b); got != tc {
- t.Errorf("%s: got %q; want %q", tc, got, tc)
- }
- }
-}
-
-func TestBase(t *testing.T) {
- tests := []struct {
- loc, lang string
- conf Confidence
- }{
- {"und", "en", Low},
- {"x-abc", "und", No},
- {"en", "en", Exact},
- {"und-Cyrl", "ru", High},
- // If a region is not included, the official language should be English.
- {"und-US", "en", High},
- // TODO: not-explicitly listed scripts should probably be und, No
- // Modify addTags to return info on how the match was derived.
- // {"und-Aghb", "und", No},
- }
- for i, tt := range tests {
- loc, _ := Parse(tt.loc)
- lang, conf := loc.Base()
- if lang.String() != tt.lang {
- t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
- }
- if conf != tt.conf {
- t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
- }
- }
-}
-
-func TestParseBase(t *testing.T) {
- tests := []struct {
- in string
- out string
- ok bool
- }{
- {"en", "en", true},
- {"EN", "en", true},
- {"nld", "nl", true},
- {"dut", "dut", true}, // bibliographic
- {"aaj", "und", false}, // unknown
- {"qaa", "qaa", true},
- {"a", "und", false},
- {"", "und", false},
- {"aaaa", "und", false},
- }
- for i, tt := range tests {
- x, err := ParseBase(tt.in)
- if x.String() != tt.out || err == nil != tt.ok {
- t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
- }
- if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
- t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
- }
- }
-}
-
-func TestScript(t *testing.T) {
- tests := []struct {
- loc, scr string
- conf Confidence
- }{
- {"und", "Latn", Low},
- {"en-Latn", "Latn", Exact},
- {"en", "Latn", High},
- {"sr", "Cyrl", Low},
- {"kk", "Cyrl", High},
- {"kk-CN", "Arab", Low},
- {"cmn", "Hans", Low},
- {"ru", "Cyrl", High},
- {"ru-RU", "Cyrl", High},
- {"yue", "Hant", Low},
- {"x-abc", "Zzzz", Low},
- {"und-zyyy", "Zyyy", Exact},
- }
- for i, tt := range tests {
- loc, _ := Parse(tt.loc)
- sc, conf := loc.Script()
- if sc.String() != tt.scr {
- t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
- }
- if conf != tt.conf {
- t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
- }
- }
-}
-
-func TestParseScript(t *testing.T) {
- tests := []struct {
- in string
- out string
- ok bool
- }{
- {"Latn", "Latn", true},
- {"zzzz", "Zzzz", true},
- {"zyyy", "Zyyy", true},
- {"Latm", "Zzzz", false},
- {"Zzz", "Zzzz", false},
- {"", "Zzzz", false},
- {"Zzzxx", "Zzzz", false},
- }
- for i, tt := range tests {
- x, err := ParseScript(tt.in)
- if x.String() != tt.out || err == nil != tt.ok {
- t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
- }
- if err == nil {
- if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
- t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
- }
- }
- }
-}
-
-func TestRegion(t *testing.T) {
- tests := []struct {
- loc, reg string
- conf Confidence
- }{
- {"und", "US", Low},
- {"en", "US", Low},
- {"zh-Hant", "TW", Low},
- {"en-US", "US", Exact},
- {"cmn", "CN", Low},
- {"ru", "RU", Low},
- {"yue", "HK", Low},
- {"x-abc", "ZZ", Low},
- }
- for i, tt := range tests {
- loc, _ := Raw.Parse(tt.loc)
- reg, conf := loc.Region()
- if reg.String() != tt.reg {
- t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
- }
- if conf != tt.conf {
- t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
- }
- }
-}
-
-func TestEncodeM49(t *testing.T) {
- tests := []struct {
- m49 int
- code string
- ok bool
- }{
- {1, "001", true},
- {840, "US", true},
- {899, "ZZ", false},
- }
- for i, tt := range tests {
- if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
- t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
- }
- }
- for i := 1; i <= 1000; i++ {
- if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
- t.Errorf("%d has no error, but maps to undefined region", i)
- }
- }
-}
-
-func TestParseRegion(t *testing.T) {
- tests := []struct {
- in string
- out string
- ok bool
- }{
- {"001", "001", true},
- {"840", "US", true},
- {"899", "ZZ", false},
- {"USA", "US", true},
- {"US", "US", true},
- {"BC", "ZZ", false},
- {"C", "ZZ", false},
- {"CCCC", "ZZ", false},
- {"01", "ZZ", false},
- }
- for i, tt := range tests {
- r, err := ParseRegion(tt.in)
- if r.String() != tt.out || err == nil != tt.ok {
- t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
- }
- if err == nil {
- if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
- t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
- }
- }
- }
-}
-
-func TestIsCountry(t *testing.T) {
- tests := []struct {
- reg string
- country bool
- }{
- {"US", true},
- {"001", false},
- {"958", false},
- {"419", false},
- {"203", true},
- {"020", true},
- {"900", false},
- {"999", false},
- {"QO", false},
- {"EU", false},
- {"AA", false},
- {"XK", true},
- }
- for i, tt := range tests {
- r, _ := ParseRegion(tt.reg)
- if r.IsCountry() != tt.country {
- t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
- }
- }
-}
-
-func TestIsGroup(t *testing.T) {
- tests := []struct {
- reg string
- group bool
- }{
- {"US", false},
- {"001", true},
- {"958", false},
- {"419", true},
- {"203", false},
- {"020", false},
- {"900", false},
- {"999", false},
- {"QO", true},
- {"EU", true},
- {"AA", false},
- {"XK", false},
- }
- for i, tt := range tests {
- r, _ := ParseRegion(tt.reg)
- if r.IsGroup() != tt.group {
- t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
- }
- }
-}
-
-func TestContains(t *testing.T) {
- tests := []struct {
- enclosing, contained string
- contains bool
- }{
- // A region contains itself.
- {"US", "US", true},
- {"001", "001", true},
-
- // Direct containment.
- {"001", "002", true},
- {"039", "XK", true},
- {"150", "XK", true},
- {"EU", "AT", true},
- {"QO", "AQ", true},
-
- // Indirect containemnt.
- {"001", "US", true},
- {"001", "419", true},
- {"001", "013", true},
-
- // No containment.
- {"US", "001", false},
- {"155", "EU", false},
- }
- for i, tt := range tests {
- r := MustParseRegion(tt.enclosing)
- con := MustParseRegion(tt.contained)
- if got := r.Contains(con); got != tt.contains {
- t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
- }
- }
-}
-
-func TestRegionCanonicalize(t *testing.T) {
- for i, tt := range []struct{ in, out string }{
- {"UK", "GB"},
- {"TP", "TL"},
- {"QU", "EU"},
- {"SU", "SU"},
- {"VD", "VN"},
- {"DD", "DE"},
- } {
- r := MustParseRegion(tt.in)
- want := MustParseRegion(tt.out)
- if got := r.Canonicalize(); got != want {
- t.Errorf("%d: got %v; want %v", i, got, want)
- }
- }
-}
-
-func TestRegionTLD(t *testing.T) {
- for _, tt := range []struct {
- in, out string
- ok bool
- }{
- {"EH", "EH", true},
- {"FR", "FR", true},
- {"TL", "TL", true},
-
- // In ccTLD before in ISO.
- {"GG", "GG", true},
-
- // Non-standard assignment of ccTLD to ISO code.
- {"GB", "UK", true},
-
- // Exceptionally reserved in ISO and valid ccTLD.
- {"UK", "UK", true},
- {"AC", "AC", true},
- {"EU", "EU", true},
- {"SU", "SU", true},
-
- // Exceptionally reserved in ISO and invalid ccTLD.
- {"CP", "ZZ", false},
- {"DG", "ZZ", false},
- {"EA", "ZZ", false},
- {"FX", "ZZ", false},
- {"IC", "ZZ", false},
- {"TA", "ZZ", false},
-
- // Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
- // it is still being phased out.
- {"AN", "AN", true},
- {"TP", "TP", true},
-
- // Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
- // Defined in package language as it has a mapping in CLDR.
- {"BU", "ZZ", false},
- {"CS", "ZZ", false},
- {"NT", "ZZ", false},
- {"YU", "ZZ", false},
- {"ZR", "ZZ", false},
- // Not defined in package: SF.
-
- // Indeterminately reserved in ISO.
- // Defined in package language as it has a legacy mapping in CLDR.
- {"DY", "ZZ", false},
- {"RH", "ZZ", false},
- {"VD", "ZZ", false},
- // Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
- // RN, RP, WG, WL, WV, and YV.
-
- // Not assigned in ISO, but legacy definitions in CLDR.
- {"DD", "ZZ", false},
- {"YD", "ZZ", false},
-
- // Normal mappings but somewhat special status in ccTLD.
- {"BL", "BL", true},
- {"MF", "MF", true},
- {"BV", "BV", true},
- {"SJ", "SJ", true},
-
- // Have values when normalized, but not as is.
- {"QU", "ZZ", false},
-
- // ISO Private Use.
- {"AA", "ZZ", false},
- {"QM", "ZZ", false},
- {"QO", "ZZ", false},
- {"XA", "ZZ", false},
- {"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
- } {
- if tt.in == "" {
- continue
- }
-
- r := MustParseRegion(tt.in)
- var want Region
- if tt.out != "ZZ" {
- want = MustParseRegion(tt.out)
- }
- tld, err := r.TLD()
- if got := err == nil; got != tt.ok {
- t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
- }
- if tld != want {
- t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
- }
- }
-}
-
-func TestCanonicalize(t *testing.T) {
- // TODO: do a full test using CLDR data in a separate regression test.
- tests := []struct {
- in, out string
- option CanonType
- }{
- {"en-Latn", "en", SuppressScript},
- {"sr-Cyrl", "sr-Cyrl", SuppressScript},
- {"sh", "sr-Latn", Legacy},
- {"sh-HR", "sr-Latn-HR", Legacy},
- {"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
- {"tl", "fil", Legacy},
- {"no", "no", Legacy},
- {"no", "nb", Legacy | CLDR},
- {"cmn", "cmn", Legacy},
- {"cmn", "zh", Macro},
- {"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
- {"yue", "yue", Macro},
- {"nb", "no", Macro},
- {"nb", "nb", Macro | CLDR},
- {"no", "no", Macro},
- {"no", "no", Macro | CLDR},
- {"iw", "he", DeprecatedBase},
- {"iw", "he", Deprecated | CLDR},
- {"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
- {"alb", "sq", Legacy}, // bibliographic
- {"dut", "nl", Legacy}, // bibliographic
- // As of CLDR 25, mo is no longer considered a legacy mapping.
- {"mo", "mo", Legacy | CLDR},
- {"und-AN", "und-AN", Deprecated},
- {"und-YD", "und-YE", DeprecatedRegion},
- {"und-YD", "und-YD", DeprecatedBase},
- {"und-Qaai", "und-Zinh", DeprecatedScript},
- {"und-Qaai", "und-Qaai", DeprecatedBase},
- {"drh", "mn", All}, // drh -> khk -> mn
-
- {"en-GB-u-rg-uszzzz", "en-GB-u-rg-uszzzz", Raw},
- {"en-GB-u-rg-USZZZZ", "en-GB-u-rg-uszzzz", Raw},
- // TODO: use different exact values for language and regional tag?
- {"en-GB-u-rg-uszzzz-va-posix", "en-GB-u-rg-uszzzz-va-posix", Raw},
- {"en-GB-u-rg-uszzzz-co-phonebk", "en-GB-u-co-phonebk-rg-uszzzz", Raw},
- // Invalid region specifications are left as is.
- {"en-GB-u-rg-usz", "en-GB-u-rg-usz", Raw},
- {"en-GB-u-rg-usz-va-posix", "en-GB-u-rg-usz-va-posix", Raw},
- {"en-GB-u-rg-usz-co-phonebk", "en-GB-u-co-phonebk-rg-usz", Raw},
- }
- for i, tt := range tests {
- in, _ := Raw.Parse(tt.in)
- in, _ = tt.option.Canonicalize(in)
- if in.String() != tt.out {
- t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
- }
- }
- // Test idempotence.
- for _, base := range Supported.BaseLanguages() {
- tag, _ := Raw.Compose(base)
- got, _ := All.Canonicalize(tag)
- want, _ := All.Canonicalize(got)
- if got != want {
- t.Errorf("idem(%s): got %s; want %s", tag, got, want)
- }
- }
-}
-
-func TestTypeForKey(t *testing.T) {
- tests := []struct{ key, in, out string }{
- {"co", "en", ""},
- {"co", "en-u-abc", ""},
- {"co", "en-u-co-phonebk", "phonebk"},
- {"co", "en-u-co-phonebk-cu-aud", "phonebk"},
- {"co", "x-foo-u-co-phonebk", ""},
- {"va", "en-US-u-va-posix", "posix"},
- {"rg", "en-u-rg-gbzzzz", "gbzzzz"},
- {"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
- {"kc", "cmn-u-co-stroke", ""},
- }
- for _, tt := range tests {
- if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
- t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
- }
- }
-}
-
func TestParent(t *testing.T) {
tests := []struct{ in, out string }{
// Strip variants and extensions first
@@ -735,110 +212,9 @@
{"nl-US-u-rg-gbzzzz", "nl-US"}, // t.full != nil
}
for _, tt := range tests {
- tag := Raw.MustParse(tt.in)
- if p := Raw.MustParse(tt.out); p != tag.Parent() {
+ tag := mustParse(tt.in)
+ if p := mustParse(tt.out); p != tag.Parent() {
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
}
}
}
-
-var (
- // Tags without error that don't need to be changed.
- benchBasic = []string{
- "en",
- "en-Latn",
- "en-GB",
- "za",
- "zh-Hant",
- "zh",
- "zh-HK",
- "ar-MK",
- "en-CA",
- "fr-CA",
- "fr-CH",
- "fr",
- "lv",
- "he-IT",
- "tlh",
- "ja",
- "ja-Jpan",
- "ja-Jpan-JP",
- "de-1996",
- "de-CH",
- "sr",
- "sr-Latn",
- }
- // Tags with extensions, not changes required.
- benchExt = []string{
- "x-a-b-c-d",
- "x-aa-bbbb-cccccccc-d",
- "en-x_cc-b-bbb-a-aaa",
- "en-c_cc-b-bbb-a-aaa-x-x",
- "en-u-co-phonebk",
- "en-Cyrl-u-co-phonebk",
- "en-US-u-co-phonebk-cu-xau",
- "en-nedix-u-co-phonebk",
- "en-t-t0-abcd",
- "en-t-nl-latn",
- "en-t-t0-abcd-x-a",
- }
- // Change, but not memory allocation required.
- benchSimpleChange = []string{
- "EN",
- "i-klingon",
- "en-latn",
- "zh-cmn-Hans-CN",
- "iw-NL",
- }
- // Change and memory allocation required.
- benchChangeAlloc = []string{
- "en-c_cc-b-bbb-a-aaa",
- "en-u-cu-xua-co-phonebk",
- "en-u-cu-xua-co-phonebk-a-cd",
- "en-u-def-abc-cu-xua-co-phonebk",
- "en-t-en-Cyrl-NL-1994",
- "en-t-en-Cyrl-NL-1994-t0-abc-def",
- }
- // Tags that result in errors.
- benchErr = []string{
- // IllFormed
- "x_A.-B-C_D",
- "en-u-cu-co-phonebk",
- "en-u-cu-xau-co",
- "en-t-nl-abcd",
- // Invalid
- "xx",
- "nl-Uuuu",
- "nl-QB",
- }
- benchChange = append(benchSimpleChange, benchChangeAlloc...)
- benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
-)
-
-func doParse(b *testing.B, tag []string) {
- for i := 0; i < b.N; i++ {
- // Use the modulo instead of looping over all tags so that we get a somewhat
- // meaningful ns/op.
- Parse(tag[i%len(tag)])
- }
-}
-
-func BenchmarkParse(b *testing.B) {
- doParse(b, benchAll)
-}
-
-func BenchmarkParseBasic(b *testing.B) {
- doParse(b, benchBasic)
-}
-
-func BenchmarkParseError(b *testing.B) {
- doParse(b, benchErr)
-}
-
-func BenchmarkParseSimpleChange(b *testing.B) {
- doParse(b, benchSimpleChange)
-}
-
-func BenchmarkParseChangeAlloc(b *testing.B) {
- doParse(b, benchChangeAlloc)
-}
diff --git a/internal/language/compact/parse.go b/internal/language/compact/parse.go
deleted file mode 100644
index d50c8aa..0000000
--- a/internal/language/compact/parse.go
+++ /dev/null
@@ -1,228 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package language
-
-import (
- "errors"
- "strconv"
- "strings"
-
- "golang.org/x/text/internal/language"
-)
-
-// ValueError is returned by any of the parsing functions when the
-// input is well-formed but the respective subtag is not recognized
-// as a valid value.
-type ValueError interface {
- error
-
- // Subtag returns the subtag for which the error occurred.
- Subtag() string
-}
-
-// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
-// failed it returns an error and any part of the tag that could be parsed.
-// If parsing succeeded but an unknown value was found, it returns
-// ValueError. The Tag returned in this case is just stripped of the unknown
-// value. All other values are preserved. It accepts tags in the BCP 47 format
-// and extensions to this standard defined in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// The resulting tag is canonicalized using the default canonicalization type.
-func Parse(s string) (t Tag, err error) {
- return Default.Parse(s)
-}
-
-// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
-// failed it returns an error and any part of the tag that could be parsed.
-// If parsing succeeded but an unknown value was found, it returns
-// ValueError. The Tag returned in this case is just stripped of the unknown
-// value. All other values are preserved. It accepts tags in the BCP 47 format
-// and extensions to this standard defined in
-// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
-// The resulting tag is canonicalized using the the canonicalization type c.
-func (c CanonType) Parse(s string) (t Tag, err error) {
- tt, err := language.Parse(s)
- if err != nil {
- return makeTag(tt), err
- }
- tt, changed := canonicalize(c, tt)
- if changed {
- tt.RemakeString()
- }
- return makeTag(tt), err
-}
-
-// Compose creates a Tag from individual parts, which may be of type Tag, Base,
-// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
-// Base, Script or Region or slice of type Variant or Extension is passed more
-// than once, the latter will overwrite the former. Variants and Extensions are
-// accumulated, but if two extensions of the same type are passed, the latter
-// will replace the former. For -u extensions, though, the key-type pairs are
-// added, where later values overwrite older ones. A Tag overwrites all former
-// values and typically only makes sense as the first argument. The resulting
-// tag is returned after canonicalizing using the Default CanonType. If one or
-// more errors are encountered, one of the errors is returned.
-func Compose(part ...interface{}) (t Tag, err error) {
- return Default.Compose(part...)
-}
-
-// Compose creates a Tag from individual parts, which may be of type Tag, Base,
-// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
-// Base, Script or Region or slice of type Variant or Extension is passed more
-// than once, the latter will overwrite the former. Variants and Extensions are
-// accumulated, but if two extensions of the same type are passed, the latter
-// will replace the former. For -u extensions, though, the key-type pairs are
-// added, where later values overwrite older ones. A Tag overwrites all former
-// values and typically only makes sense as the first argument. The resulting
-// tag is returned after canonicalizing using CanonType c. If one or more errors
-// are encountered, one of the errors is returned.
-func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
- var b language.Builder
- if err = update(&b, part...); err != nil {
- return und, err
- }
- b.Tag, _ = canonicalize(c, b.Tag)
- return makeTag(b.Make()), err
-}
-
-var errInvalidArgument = errors.New("invalid Extension or Variant")
-
-func update(b *language.Builder, part ...interface{}) (err error) {
- for _, x := range part {
- switch v := x.(type) {
- case Tag:
- b.SetTag(v.tag())
- case Base:
- b.Tag.LangID = v.langID
- case Script:
- b.Tag.ScriptID = v.scriptID
- case Region:
- b.Tag.RegionID = v.regionID
- case Variant:
- if v.variant == "" {
- err = errInvalidArgument
- break
- }
- b.AddVariant(v.variant)
- case Extension:
- if v.s == "" {
- err = errInvalidArgument
- break
- }
- b.SetExt(v.s)
- case []Variant:
- b.ClearVariants()
- for _, v := range v {
- b.AddVariant(v.variant)
- }
- case []Extension:
- b.ClearExtensions()
- for _, e := range v {
- b.SetExt(e.s)
- }
- // TODO: support parsing of raw strings based on morphology or just extensions?
- case error:
- if v != nil {
- err = v
- }
- }
- }
- return
-}
-
-var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
-
-// ParseAcceptLanguage parses the contents of an Accept-Language header as
-// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
-// a list of corresponding quality weights. It is more permissive than RFC 2616
-// and may return non-nil slices even if the input is not valid.
-// The Tags will be sorted by highest weight first and then by first occurrence.
-// Tags with a weight of zero will be dropped. An error will be returned if the
-// input could not be parsed.
-func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
- var entry string
- for s != "" {
- if entry, s = split(s, ','); entry == "" {
- continue
- }
-
- entry, weight := split(entry, ';')
-
- // Scan the language.
- t, err := Parse(entry)
- if err != nil {
- id, ok := acceptFallback[entry]
- if !ok {
- return nil, nil, err
- }
- t = makeTag(language.Tag{LangID: id})
- }
-
- // Scan the optional weight.
- w := 1.0
- if weight != "" {
- weight = consume(weight, 'q')
- weight = consume(weight, '=')
- // consume returns the empty string when a token could not be
- // consumed, resulting in an error for ParseFloat.
- if w, err = strconv.ParseFloat(weight, 32); err != nil {
- return nil, nil, errInvalidWeight
- }
- // Drop tags with a quality weight of 0.
- if w <= 0 {
- continue
- }
- }
-
- tag = append(tag, t)
- q = append(q, float32(w))
- }
- sortStable(&tagSort{tag, q})
- return tag, q, nil
-}
-
-// consume removes a leading token c from s and returns the result or the empty
-// string if there is no such token.
-func consume(s string, c byte) string {
- if s == "" || s[0] != c {
- return ""
- }
- return strings.TrimSpace(s[1:])
-}
-
-func split(s string, c byte) (head, tail string) {
- if i := strings.IndexByte(s, c); i >= 0 {
- return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
- }
- return strings.TrimSpace(s), ""
-}
-
-// Add hack mapping to deal with a small number of cases that that occur
-// in Accept-Language (with reasonable frequency).
-var acceptFallback = map[string]language.Language{
- "english": _en,
- "deutsch": _de,
- "italian": _it,
- "french": _fr,
- "*": _mul, // defined in the spec to match all languages.
-}
-
-type tagSort struct {
- tag []Tag
- q []float32
-}
-
-func (s *tagSort) Len() int {
- return len(s.q)
-}
-
-func (s *tagSort) Less(i, j int) bool {
- return s.q[i] > s.q[j]
-}
-
-func (s *tagSort) Swap(i, j int) {
- s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
- s.q[i], s.q[j] = s.q[j], s.q[i]
-}
diff --git a/internal/language/compact/parse_test.go b/internal/language/compact/parse_test.go
index 2ff28bf..abe3a58 100644
--- a/internal/language/compact/parse_test.go
+++ b/internal/language/compact/parse_test.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package language
+package compact
import (
"strings"
@@ -11,13 +11,6 @@
"golang.org/x/text/internal/language"
)
-// equalTags compares language, script and region subtags only.
-func (t Tag) equalTags(a Tag) bool {
- return t.lang() == a.lang() &&
- t.script() == a.script() &&
- t.region() == a.region()
-}
-
var errSyntax = language.ErrSyntax
type parseTest struct {
@@ -175,216 +168,29 @@
if skip {
continue
}
- if l, _ := language.ParseBase(tt.lang); l != tag.lang() {
- t.Errorf("%d: lang was %q; want %q", i, tag.lang(), l)
+ if l, _ := language.ParseBase(tt.lang); l != tag.Tag().LangID {
+ t.Errorf("%d: lang was %q; want %q", i, tag.Tag().LangID, l)
}
- if sc, _ := language.ParseScript(tt.script); sc != tag.script() {
- t.Errorf("%d: script was %q; want %q", i, tag.script(), sc)
+ if sc, _ := language.ParseScript(tt.script); sc != tag.Tag().ScriptID {
+ t.Errorf("%d: script was %q; want %q", i, tag.Tag().ScriptID, sc)
}
- if r, _ := language.ParseRegion(tt.region); r != tag.region() {
- t.Errorf("%d: region was %q; want %q", i, tag.region(), r)
+ if r, _ := language.ParseRegion(tt.region); r != tag.Tag().RegionID {
+ t.Errorf("%d: region was %q; want %q", i, tag.Tag().RegionID, r)
}
- v := tag.tag().Variants()
+ v := tag.Tag().Variants()
if v != "" {
v = v[1:]
}
if v != tt.variants {
t.Errorf("%d: variants was %q; want %q", i, v, tt.variants)
}
- if e := strings.Join(tag.tag().Extensions(), "-"); e != tt.ext {
+ if e := strings.Join(tag.Tag().Extensions(), "-"); e != tt.ext {
t.Errorf("%d: extensions were %q; want %q", i, e, tt.ext)
}
}
}
-func TestParse(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- id, _ = Raw.Parse(tt.in)
- return id, false
- })
-}
-
-func TestErrors(t *testing.T) {
- mkInvalid := func(s string) error {
- return language.NewValueError([]byte(s))
- }
- tests := []struct {
- in string
- out error
- }{
- // invalid subtags.
- {"ac", mkInvalid("ac")},
- {"AC", mkInvalid("ac")},
- {"aa-Uuuu", mkInvalid("Uuuu")},
- {"aa-AB", mkInvalid("AB")},
- // ill-formed wins over invalid.
- {"ac-u", errSyntax},
- {"ac-u-ca", errSyntax},
- {"ac-u-ca-co-pinyin", errSyntax},
- {"noob", errSyntax},
- }
- for _, tt := range tests {
- _, err := Parse(tt.in)
- if err != tt.out {
- t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
- }
- }
-}
-
-func TestCompose1(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- l, _ := ParseBase(tt.lang)
- s, _ := ParseScript(tt.script)
- r, _ := ParseRegion(tt.region)
- v := []Variant{}
- for _, x := range strings.Split(tt.variants, "-") {
- p, _ := ParseVariant(x)
- v = append(v, p)
- }
- e := []Extension{}
- for _, x := range tt.extList {
- p, _ := ParseExtension(x)
- e = append(e, p)
- }
- id, _ = Raw.Compose(l, s, r, v, e)
- return id, false
- })
-}
-
-func TestCompose2(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- l, _ := ParseBase(tt.lang)
- s, _ := ParseScript(tt.script)
- r, _ := ParseRegion(tt.region)
- p := []interface{}{l, s, r, s, r, l}
- for _, x := range strings.Split(tt.variants, "-") {
- if x != "" {
- v, _ := ParseVariant(x)
- p = append(p, v)
- }
- }
- for _, x := range tt.extList {
- e, _ := ParseExtension(x)
- p = append(p, e)
- }
- id, _ = Raw.Compose(p...)
- return id, false
- })
-}
-
-func TestCompose3(t *testing.T) {
- partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
- id, _ = Raw.Parse(tt.in)
- id, _ = Raw.Compose(id)
- return id, false
- })
-}
-
func mk(s string) Tag {
- return Raw.Make(s)
-}
-
-func TestParseAcceptLanguage(t *testing.T) {
- type res struct {
- t Tag
- q float32
- }
- en := []res{{mk("en"), 1.0}}
- tests := []struct {
- out []res
- in string
- ok bool
- }{
- {en, "en", true},
- {en, " en", true},
- {en, "en ", true},
- {en, " en ", true},
- {en, "en,", true},
- {en, ",en", true},
- {en, ",,,en,,,", true},
- {en, ",en;q=1", true},
-
- // We allow an empty input, contrary to spec.
- {nil, "", true},
- {[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
-
- // errors
- {nil, ";", false},
- {nil, "$", false},
- {nil, "e;", false},
- {nil, "x;", false},
- {nil, "x", false},
- {nil, "ac", false}, // non-existing language
- {nil, "aa;q", false},
- {nil, "aa;q=", false},
- {nil, "aa;q=.", false},
-
- // odd fallbacks
- {
- []res{{mk("en"), 0.1}},
- " english ;q=.1",
- true,
- },
- {
- []res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
- " italian, deutsch, french",
- true,
- },
-
- // lists
- {
- []res{{mk("en"), 0.1}},
- "en;q=.1",
- true,
- },
- {
- []res{{mk("mul"), 1.0}},
- "*",
- true,
- },
- {
- []res{{mk("en"), 1.0}, {mk("de"), 1.0}},
- "en,de",
- true,
- },
- {
- []res{{mk("en"), 1.0}, {mk("de"), .5}},
- "en,de;q=0.5",
- true,
- },
- {
- []res{{mk("de"), 0.8}, {mk("en"), 0.5}},
- " en ; q = 0.5 , , de;q=0.8",
- true,
- },
- {
- []res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
- "en,de,fr,i-klingon",
- true,
- },
- // sorting
- {
- []res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
- "en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
- true,
- },
- // dropping
- {
- []res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
- "en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
- true,
- },
- }
- for i, tt := range tests {
- tags, qs, e := ParseAcceptLanguage(tt.in)
- if e == nil != tt.ok {
- t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
- }
- for j, tag := range tags {
- if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
- t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
- break
- }
- }
- }
+ tag, _ := language.Parse(s)
+ return Make(tag)
}
diff --git a/internal/language/compact/tables.go b/internal/language/compact/tables.go
index 1825af6..554ca35 100644
--- a/internal/language/compact/tables.go
+++ b/internal/language/compact/tables.go
@@ -1,833 +1,791 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
-package language
+package compact
import "golang.org/x/text/internal/language"
// CLDRVersion is the CLDR version from which the tables in this package are derived.
const CLDRVersion = "32"
-const (
- _de = 269
- _en = 313
- _fr = 350
- _it = 505
- _mo = 784
- _no = 879
- _nb = 839
- _pt = 960
- _sh = 1031
- _mul = 806
- _und = 0
-)
-const (
- _001 = 1
- _419 = 31
- _BR = 65
- _CA = 73
- _ES = 110
- _GB = 123
- _MD = 188
- _PT = 238
- _UK = 306
- _US = 309
- _ZZ = 357
- _XA = 323
- _XC = 325
- _XK = 333
-)
-const (
- _Latn = 87
- _Hani = 54
- _Hans = 56
- _Hant = 57
- _Qaaa = 139
- _Qaai = 147
- _Qabx = 188
- _Zinh = 236
- _Zyyy = 241
- _Zzzz = 242
-)
-
// NumCompactTags is the number of common tags. The maximum tag is
// NumCompactTags-1.
const NumCompactTags = 775
const (
- undIndex compactID = 0
- afIndex compactID = 1
- afNAIndex compactID = 2
- afZAIndex compactID = 3
- agqIndex compactID = 4
- agqCMIndex compactID = 5
- akIndex compactID = 6
- akGHIndex compactID = 7
- amIndex compactID = 8
- amETIndex compactID = 9
- arIndex compactID = 10
- ar001Index compactID = 11
- arAEIndex compactID = 12
- arBHIndex compactID = 13
- arDJIndex compactID = 14
- arDZIndex compactID = 15
- arEGIndex compactID = 16
- arEHIndex compactID = 17
- arERIndex compactID = 18
- arILIndex compactID = 19
- arIQIndex compactID = 20
- arJOIndex compactID = 21
- arKMIndex compactID = 22
- arKWIndex compactID = 23
- arLBIndex compactID = 24
- arLYIndex compactID = 25
- arMAIndex compactID = 26
- arMRIndex compactID = 27
- arOMIndex compactID = 28
- arPSIndex compactID = 29
- arQAIndex compactID = 30
- arSAIndex compactID = 31
- arSDIndex compactID = 32
- arSOIndex compactID = 33
- arSSIndex compactID = 34
- arSYIndex compactID = 35
- arTDIndex compactID = 36
- arTNIndex compactID = 37
- arYEIndex compactID = 38
- arsIndex compactID = 39
- asIndex compactID = 40
- asINIndex compactID = 41
- asaIndex compactID = 42
- asaTZIndex compactID = 43
- astIndex compactID = 44
- astESIndex compactID = 45
- azIndex compactID = 46
- azCyrlIndex compactID = 47
- azCyrlAZIndex compactID = 48
- azLatnIndex compactID = 49
- azLatnAZIndex compactID = 50
- basIndex compactID = 51
- basCMIndex compactID = 52
- beIndex compactID = 53
- beBYIndex compactID = 54
- bemIndex compactID = 55
- bemZMIndex compactID = 56
- bezIndex compactID = 57
- bezTZIndex compactID = 58
- bgIndex compactID = 59
- bgBGIndex compactID = 60
- bhIndex compactID = 61
- bmIndex compactID = 62
- bmMLIndex compactID = 63
- bnIndex compactID = 64
- bnBDIndex compactID = 65
- bnINIndex compactID = 66
- boIndex compactID = 67
- boCNIndex compactID = 68
- boINIndex compactID = 69
- brIndex compactID = 70
- brFRIndex compactID = 71
- brxIndex compactID = 72
- brxINIndex compactID = 73
- bsIndex compactID = 74
- bsCyrlIndex compactID = 75
- bsCyrlBAIndex compactID = 76
- bsLatnIndex compactID = 77
- bsLatnBAIndex compactID = 78
- caIndex compactID = 79
- caADIndex compactID = 80
- caESIndex compactID = 81
- caFRIndex compactID = 82
- caITIndex compactID = 83
- ccpIndex compactID = 84
- ccpBDIndex compactID = 85
- ccpINIndex compactID = 86
- ceIndex compactID = 87
- ceRUIndex compactID = 88
- cggIndex compactID = 89
- cggUGIndex compactID = 90
- chrIndex compactID = 91
- chrUSIndex compactID = 92
- ckbIndex compactID = 93
- ckbIQIndex compactID = 94
- ckbIRIndex compactID = 95
- csIndex compactID = 96
- csCZIndex compactID = 97
- cuIndex compactID = 98
- cuRUIndex compactID = 99
- cyIndex compactID = 100
- cyGBIndex compactID = 101
- daIndex compactID = 102
- daDKIndex compactID = 103
- daGLIndex compactID = 104
- davIndex compactID = 105
- davKEIndex compactID = 106
- deIndex compactID = 107
- deATIndex compactID = 108
- deBEIndex compactID = 109
- deCHIndex compactID = 110
- deDEIndex compactID = 111
- deITIndex compactID = 112
- deLIIndex compactID = 113
- deLUIndex compactID = 114
- djeIndex compactID = 115
- djeNEIndex compactID = 116
- dsbIndex compactID = 117
- dsbDEIndex compactID = 118
- duaIndex compactID = 119
- duaCMIndex compactID = 120
- dvIndex compactID = 121
- dyoIndex compactID = 122
- dyoSNIndex compactID = 123
- dzIndex compactID = 124
- dzBTIndex compactID = 125
- ebuIndex compactID = 126
- ebuKEIndex compactID = 127
- eeIndex compactID = 128
- eeGHIndex compactID = 129
- eeTGIndex compactID = 130
- elIndex compactID = 131
- elCYIndex compactID = 132
- elGRIndex compactID = 133
- enIndex compactID = 134
- en001Index compactID = 135
- en150Index compactID = 136
- enAGIndex compactID = 137
- enAIIndex compactID = 138
- enASIndex compactID = 139
- enATIndex compactID = 140
- enAUIndex compactID = 141
- enBBIndex compactID = 142
- enBEIndex compactID = 143
- enBIIndex compactID = 144
- enBMIndex compactID = 145
- enBSIndex compactID = 146
- enBWIndex compactID = 147
- enBZIndex compactID = 148
- enCAIndex compactID = 149
- enCCIndex compactID = 150
- enCHIndex compactID = 151
- enCKIndex compactID = 152
- enCMIndex compactID = 153
- enCXIndex compactID = 154
- enCYIndex compactID = 155
- enDEIndex compactID = 156
- enDGIndex compactID = 157
- enDKIndex compactID = 158
- enDMIndex compactID = 159
- enERIndex compactID = 160
- enFIIndex compactID = 161
- enFJIndex compactID = 162
- enFKIndex compactID = 163
- enFMIndex compactID = 164
- enGBIndex compactID = 165
- enGDIndex compactID = 166
- enGGIndex compactID = 167
- enGHIndex compactID = 168
- enGIIndex compactID = 169
- enGMIndex compactID = 170
- enGUIndex compactID = 171
- enGYIndex compactID = 172
- enHKIndex compactID = 173
- enIEIndex compactID = 174
- enILIndex compactID = 175
- enIMIndex compactID = 176
- enINIndex compactID = 177
- enIOIndex compactID = 178
- enJEIndex compactID = 179
- enJMIndex compactID = 180
- enKEIndex compactID = 181
- enKIIndex compactID = 182
- enKNIndex compactID = 183
- enKYIndex compactID = 184
- enLCIndex compactID = 185
- enLRIndex compactID = 186
- enLSIndex compactID = 187
- enMGIndex compactID = 188
- enMHIndex compactID = 189
- enMOIndex compactID = 190
- enMPIndex compactID = 191
- enMSIndex compactID = 192
- enMTIndex compactID = 193
- enMUIndex compactID = 194
- enMWIndex compactID = 195
- enMYIndex compactID = 196
- enNAIndex compactID = 197
- enNFIndex compactID = 198
- enNGIndex compactID = 199
- enNLIndex compactID = 200
- enNRIndex compactID = 201
- enNUIndex compactID = 202
- enNZIndex compactID = 203
- enPGIndex compactID = 204
- enPHIndex compactID = 205
- enPKIndex compactID = 206
- enPNIndex compactID = 207
- enPRIndex compactID = 208
- enPWIndex compactID = 209
- enRWIndex compactID = 210
- enSBIndex compactID = 211
- enSCIndex compactID = 212
- enSDIndex compactID = 213
- enSEIndex compactID = 214
- enSGIndex compactID = 215
- enSHIndex compactID = 216
- enSIIndex compactID = 217
- enSLIndex compactID = 218
- enSSIndex compactID = 219
- enSXIndex compactID = 220
- enSZIndex compactID = 221
- enTCIndex compactID = 222
- enTKIndex compactID = 223
- enTOIndex compactID = 224
- enTTIndex compactID = 225
- enTVIndex compactID = 226
- enTZIndex compactID = 227
- enUGIndex compactID = 228
- enUMIndex compactID = 229
- enUSIndex compactID = 230
- enVCIndex compactID = 231
- enVGIndex compactID = 232
- enVIIndex compactID = 233
- enVUIndex compactID = 234
- enWSIndex compactID = 235
- enZAIndex compactID = 236
- enZMIndex compactID = 237
- enZWIndex compactID = 238
- eoIndex compactID = 239
- eo001Index compactID = 240
- esIndex compactID = 241
- es419Index compactID = 242
- esARIndex compactID = 243
- esBOIndex compactID = 244
- esBRIndex compactID = 245
- esBZIndex compactID = 246
- esCLIndex compactID = 247
- esCOIndex compactID = 248
- esCRIndex compactID = 249
- esCUIndex compactID = 250
- esDOIndex compactID = 251
- esEAIndex compactID = 252
- esECIndex compactID = 253
- esESIndex compactID = 254
- esGQIndex compactID = 255
- esGTIndex compactID = 256
- esHNIndex compactID = 257
- esICIndex compactID = 258
- esMXIndex compactID = 259
- esNIIndex compactID = 260
- esPAIndex compactID = 261
- esPEIndex compactID = 262
- esPHIndex compactID = 263
- esPRIndex compactID = 264
- esPYIndex compactID = 265
- esSVIndex compactID = 266
- esUSIndex compactID = 267
- esUYIndex compactID = 268
- esVEIndex compactID = 269
- etIndex compactID = 270
- etEEIndex compactID = 271
- euIndex compactID = 272
- euESIndex compactID = 273
- ewoIndex compactID = 274
- ewoCMIndex compactID = 275
- faIndex compactID = 276
- faAFIndex compactID = 277
- faIRIndex compactID = 278
- ffIndex compactID = 279
- ffCMIndex compactID = 280
- ffGNIndex compactID = 281
- ffMRIndex compactID = 282
- ffSNIndex compactID = 283
- fiIndex compactID = 284
- fiFIIndex compactID = 285
- filIndex compactID = 286
- filPHIndex compactID = 287
- foIndex compactID = 288
- foDKIndex compactID = 289
- foFOIndex compactID = 290
- frIndex compactID = 291
- frBEIndex compactID = 292
- frBFIndex compactID = 293
- frBIIndex compactID = 294
- frBJIndex compactID = 295
- frBLIndex compactID = 296
- frCAIndex compactID = 297
- frCDIndex compactID = 298
- frCFIndex compactID = 299
- frCGIndex compactID = 300
- frCHIndex compactID = 301
- frCIIndex compactID = 302
- frCMIndex compactID = 303
- frDJIndex compactID = 304
- frDZIndex compactID = 305
- frFRIndex compactID = 306
- frGAIndex compactID = 307
- frGFIndex compactID = 308
- frGNIndex compactID = 309
- frGPIndex compactID = 310
- frGQIndex compactID = 311
- frHTIndex compactID = 312
- frKMIndex compactID = 313
- frLUIndex compactID = 314
- frMAIndex compactID = 315
- frMCIndex compactID = 316
- frMFIndex compactID = 317
- frMGIndex compactID = 318
- frMLIndex compactID = 319
- frMQIndex compactID = 320
- frMRIndex compactID = 321
- frMUIndex compactID = 322
- frNCIndex compactID = 323
- frNEIndex compactID = 324
- frPFIndex compactID = 325
- frPMIndex compactID = 326
- frREIndex compactID = 327
- frRWIndex compactID = 328
- frSCIndex compactID = 329
- frSNIndex compactID = 330
- frSYIndex compactID = 331
- frTDIndex compactID = 332
- frTGIndex compactID = 333
- frTNIndex compactID = 334
- frVUIndex compactID = 335
- frWFIndex compactID = 336
- frYTIndex compactID = 337
- furIndex compactID = 338
- furITIndex compactID = 339
- fyIndex compactID = 340
- fyNLIndex compactID = 341
- gaIndex compactID = 342
- gaIEIndex compactID = 343
- gdIndex compactID = 344
- gdGBIndex compactID = 345
- glIndex compactID = 346
- glESIndex compactID = 347
- gswIndex compactID = 348
- gswCHIndex compactID = 349
- gswFRIndex compactID = 350
- gswLIIndex compactID = 351
- guIndex compactID = 352
- guINIndex compactID = 353
- guwIndex compactID = 354
- guzIndex compactID = 355
- guzKEIndex compactID = 356
- gvIndex compactID = 357
- gvIMIndex compactID = 358
- haIndex compactID = 359
- haGHIndex compactID = 360
- haNEIndex compactID = 361
- haNGIndex compactID = 362
- hawIndex compactID = 363
- hawUSIndex compactID = 364
- heIndex compactID = 365
- heILIndex compactID = 366
- hiIndex compactID = 367
- hiINIndex compactID = 368
- hrIndex compactID = 369
- hrBAIndex compactID = 370
- hrHRIndex compactID = 371
- hsbIndex compactID = 372
- hsbDEIndex compactID = 373
- huIndex compactID = 374
- huHUIndex compactID = 375
- hyIndex compactID = 376
- hyAMIndex compactID = 377
- idIndex compactID = 378
- idIDIndex compactID = 379
- igIndex compactID = 380
- igNGIndex compactID = 381
- iiIndex compactID = 382
- iiCNIndex compactID = 383
- inIndex compactID = 384
- ioIndex compactID = 385
- isIndex compactID = 386
- isISIndex compactID = 387
- itIndex compactID = 388
- itCHIndex compactID = 389
- itITIndex compactID = 390
- itSMIndex compactID = 391
- itVAIndex compactID = 392
- iuIndex compactID = 393
- iwIndex compactID = 394
- jaIndex compactID = 395
- jaJPIndex compactID = 396
- jboIndex compactID = 397
- jgoIndex compactID = 398
- jgoCMIndex compactID = 399
- jiIndex compactID = 400
- jmcIndex compactID = 401
- jmcTZIndex compactID = 402
- jvIndex compactID = 403
- jwIndex compactID = 404
- kaIndex compactID = 405
- kaGEIndex compactID = 406
- kabIndex compactID = 407
- kabDZIndex compactID = 408
- kajIndex compactID = 409
- kamIndex compactID = 410
- kamKEIndex compactID = 411
- kcgIndex compactID = 412
- kdeIndex compactID = 413
- kdeTZIndex compactID = 414
- keaIndex compactID = 415
- keaCVIndex compactID = 416
- khqIndex compactID = 417
- khqMLIndex compactID = 418
- kiIndex compactID = 419
- kiKEIndex compactID = 420
- kkIndex compactID = 421
- kkKZIndex compactID = 422
- kkjIndex compactID = 423
- kkjCMIndex compactID = 424
- klIndex compactID = 425
- klGLIndex compactID = 426
- klnIndex compactID = 427
- klnKEIndex compactID = 428
- kmIndex compactID = 429
- kmKHIndex compactID = 430
- knIndex compactID = 431
- knINIndex compactID = 432
- koIndex compactID = 433
- koKPIndex compactID = 434
- koKRIndex compactID = 435
- kokIndex compactID = 436
- kokINIndex compactID = 437
- ksIndex compactID = 438
- ksINIndex compactID = 439
- ksbIndex compactID = 440
- ksbTZIndex compactID = 441
- ksfIndex compactID = 442
- ksfCMIndex compactID = 443
- kshIndex compactID = 444
- kshDEIndex compactID = 445
- kuIndex compactID = 446
- kwIndex compactID = 447
- kwGBIndex compactID = 448
- kyIndex compactID = 449
- kyKGIndex compactID = 450
- lagIndex compactID = 451
- lagTZIndex compactID = 452
- lbIndex compactID = 453
- lbLUIndex compactID = 454
- lgIndex compactID = 455
- lgUGIndex compactID = 456
- lktIndex compactID = 457
- lktUSIndex compactID = 458
- lnIndex compactID = 459
- lnAOIndex compactID = 460
- lnCDIndex compactID = 461
- lnCFIndex compactID = 462
- lnCGIndex compactID = 463
- loIndex compactID = 464
- loLAIndex compactID = 465
- lrcIndex compactID = 466
- lrcIQIndex compactID = 467
- lrcIRIndex compactID = 468
- ltIndex compactID = 469
- ltLTIndex compactID = 470
- luIndex compactID = 471
- luCDIndex compactID = 472
- luoIndex compactID = 473
- luoKEIndex compactID = 474
- luyIndex compactID = 475
- luyKEIndex compactID = 476
- lvIndex compactID = 477
- lvLVIndex compactID = 478
- masIndex compactID = 479
- masKEIndex compactID = 480
- masTZIndex compactID = 481
- merIndex compactID = 482
- merKEIndex compactID = 483
- mfeIndex compactID = 484
- mfeMUIndex compactID = 485
- mgIndex compactID = 486
- mgMGIndex compactID = 487
- mghIndex compactID = 488
- mghMZIndex compactID = 489
- mgoIndex compactID = 490
- mgoCMIndex compactID = 491
- mkIndex compactID = 492
- mkMKIndex compactID = 493
- mlIndex compactID = 494
- mlINIndex compactID = 495
- mnIndex compactID = 496
- mnMNIndex compactID = 497
- moIndex compactID = 498
- mrIndex compactID = 499
- mrINIndex compactID = 500
- msIndex compactID = 501
- msBNIndex compactID = 502
- msMYIndex compactID = 503
- msSGIndex compactID = 504
- mtIndex compactID = 505
- mtMTIndex compactID = 506
- muaIndex compactID = 507
- muaCMIndex compactID = 508
- myIndex compactID = 509
- myMMIndex compactID = 510
- mznIndex compactID = 511
- mznIRIndex compactID = 512
- nahIndex compactID = 513
- naqIndex compactID = 514
- naqNAIndex compactID = 515
- nbIndex compactID = 516
- nbNOIndex compactID = 517
- nbSJIndex compactID = 518
- ndIndex compactID = 519
- ndZWIndex compactID = 520
- ndsIndex compactID = 521
- ndsDEIndex compactID = 522
- ndsNLIndex compactID = 523
- neIndex compactID = 524
- neINIndex compactID = 525
- neNPIndex compactID = 526
- nlIndex compactID = 527
- nlAWIndex compactID = 528
- nlBEIndex compactID = 529
- nlBQIndex compactID = 530
- nlCWIndex compactID = 531
- nlNLIndex compactID = 532
- nlSRIndex compactID = 533
- nlSXIndex compactID = 534
- nmgIndex compactID = 535
- nmgCMIndex compactID = 536
- nnIndex compactID = 537
- nnNOIndex compactID = 538
- nnhIndex compactID = 539
- nnhCMIndex compactID = 540
- noIndex compactID = 541
- nqoIndex compactID = 542
- nrIndex compactID = 543
- nsoIndex compactID = 544
- nusIndex compactID = 545
- nusSSIndex compactID = 546
- nyIndex compactID = 547
- nynIndex compactID = 548
- nynUGIndex compactID = 549
- omIndex compactID = 550
- omETIndex compactID = 551
- omKEIndex compactID = 552
- orIndex compactID = 553
- orINIndex compactID = 554
- osIndex compactID = 555
- osGEIndex compactID = 556
- osRUIndex compactID = 557
- paIndex compactID = 558
- paArabIndex compactID = 559
- paArabPKIndex compactID = 560
- paGuruIndex compactID = 561
- paGuruINIndex compactID = 562
- papIndex compactID = 563
- plIndex compactID = 564
- plPLIndex compactID = 565
- prgIndex compactID = 566
- prg001Index compactID = 567
- psIndex compactID = 568
- psAFIndex compactID = 569
- ptIndex compactID = 570
- ptAOIndex compactID = 571
- ptBRIndex compactID = 572
- ptCHIndex compactID = 573
- ptCVIndex compactID = 574
- ptGQIndex compactID = 575
- ptGWIndex compactID = 576
- ptLUIndex compactID = 577
- ptMOIndex compactID = 578
- ptMZIndex compactID = 579
- ptPTIndex compactID = 580
- ptSTIndex compactID = 581
- ptTLIndex compactID = 582
- quIndex compactID = 583
- quBOIndex compactID = 584
- quECIndex compactID = 585
- quPEIndex compactID = 586
- rmIndex compactID = 587
- rmCHIndex compactID = 588
- rnIndex compactID = 589
- rnBIIndex compactID = 590
- roIndex compactID = 591
- roMDIndex compactID = 592
- roROIndex compactID = 593
- rofIndex compactID = 594
- rofTZIndex compactID = 595
- ruIndex compactID = 596
- ruBYIndex compactID = 597
- ruKGIndex compactID = 598
- ruKZIndex compactID = 599
- ruMDIndex compactID = 600
- ruRUIndex compactID = 601
- ruUAIndex compactID = 602
- rwIndex compactID = 603
- rwRWIndex compactID = 604
- rwkIndex compactID = 605
- rwkTZIndex compactID = 606
- sahIndex compactID = 607
- sahRUIndex compactID = 608
- saqIndex compactID = 609
- saqKEIndex compactID = 610
- sbpIndex compactID = 611
- sbpTZIndex compactID = 612
- sdIndex compactID = 613
- sdPKIndex compactID = 614
- sdhIndex compactID = 615
- seIndex compactID = 616
- seFIIndex compactID = 617
- seNOIndex compactID = 618
- seSEIndex compactID = 619
- sehIndex compactID = 620
- sehMZIndex compactID = 621
- sesIndex compactID = 622
- sesMLIndex compactID = 623
- sgIndex compactID = 624
- sgCFIndex compactID = 625
- shIndex compactID = 626
- shiIndex compactID = 627
- shiLatnIndex compactID = 628
- shiLatnMAIndex compactID = 629
- shiTfngIndex compactID = 630
- shiTfngMAIndex compactID = 631
- siIndex compactID = 632
- siLKIndex compactID = 633
- skIndex compactID = 634
- skSKIndex compactID = 635
- slIndex compactID = 636
- slSIIndex compactID = 637
- smaIndex compactID = 638
- smiIndex compactID = 639
- smjIndex compactID = 640
- smnIndex compactID = 641
- smnFIIndex compactID = 642
- smsIndex compactID = 643
- snIndex compactID = 644
- snZWIndex compactID = 645
- soIndex compactID = 646
- soDJIndex compactID = 647
- soETIndex compactID = 648
- soKEIndex compactID = 649
- soSOIndex compactID = 650
- sqIndex compactID = 651
- sqALIndex compactID = 652
- sqMKIndex compactID = 653
- sqXKIndex compactID = 654
- srIndex compactID = 655
- srCyrlIndex compactID = 656
- srCyrlBAIndex compactID = 657
- srCyrlMEIndex compactID = 658
- srCyrlRSIndex compactID = 659
- srCyrlXKIndex compactID = 660
- srLatnIndex compactID = 661
- srLatnBAIndex compactID = 662
- srLatnMEIndex compactID = 663
- srLatnRSIndex compactID = 664
- srLatnXKIndex compactID = 665
- ssIndex compactID = 666
- ssyIndex compactID = 667
- stIndex compactID = 668
- svIndex compactID = 669
- svAXIndex compactID = 670
- svFIIndex compactID = 671
- svSEIndex compactID = 672
- swIndex compactID = 673
- swCDIndex compactID = 674
- swKEIndex compactID = 675
- swTZIndex compactID = 676
- swUGIndex compactID = 677
- syrIndex compactID = 678
- taIndex compactID = 679
- taINIndex compactID = 680
- taLKIndex compactID = 681
- taMYIndex compactID = 682
- taSGIndex compactID = 683
- teIndex compactID = 684
- teINIndex compactID = 685
- teoIndex compactID = 686
- teoKEIndex compactID = 687
- teoUGIndex compactID = 688
- tgIndex compactID = 689
- tgTJIndex compactID = 690
- thIndex compactID = 691
- thTHIndex compactID = 692
- tiIndex compactID = 693
- tiERIndex compactID = 694
- tiETIndex compactID = 695
- tigIndex compactID = 696
- tkIndex compactID = 697
- tkTMIndex compactID = 698
- tlIndex compactID = 699
- tnIndex compactID = 700
- toIndex compactID = 701
- toTOIndex compactID = 702
- trIndex compactID = 703
- trCYIndex compactID = 704
- trTRIndex compactID = 705
- tsIndex compactID = 706
- ttIndex compactID = 707
- ttRUIndex compactID = 708
- twqIndex compactID = 709
- twqNEIndex compactID = 710
- tzmIndex compactID = 711
- tzmMAIndex compactID = 712
- ugIndex compactID = 713
- ugCNIndex compactID = 714
- ukIndex compactID = 715
- ukUAIndex compactID = 716
- urIndex compactID = 717
- urINIndex compactID = 718
- urPKIndex compactID = 719
- uzIndex compactID = 720
- uzArabIndex compactID = 721
- uzArabAFIndex compactID = 722
- uzCyrlIndex compactID = 723
- uzCyrlUZIndex compactID = 724
- uzLatnIndex compactID = 725
- uzLatnUZIndex compactID = 726
- vaiIndex compactID = 727
- vaiLatnIndex compactID = 728
- vaiLatnLRIndex compactID = 729
- vaiVaiiIndex compactID = 730
- vaiVaiiLRIndex compactID = 731
- veIndex compactID = 732
- viIndex compactID = 733
- viVNIndex compactID = 734
- voIndex compactID = 735
- vo001Index compactID = 736
- vunIndex compactID = 737
- vunTZIndex compactID = 738
- waIndex compactID = 739
- waeIndex compactID = 740
- waeCHIndex compactID = 741
- woIndex compactID = 742
- woSNIndex compactID = 743
- xhIndex compactID = 744
- xogIndex compactID = 745
- xogUGIndex compactID = 746
- yavIndex compactID = 747
- yavCMIndex compactID = 748
- yiIndex compactID = 749
- yi001Index compactID = 750
- yoIndex compactID = 751
- yoBJIndex compactID = 752
- yoNGIndex compactID = 753
- yueIndex compactID = 754
- yueHansIndex compactID = 755
- yueHansCNIndex compactID = 756
- yueHantIndex compactID = 757
- yueHantHKIndex compactID = 758
- zghIndex compactID = 759
- zghMAIndex compactID = 760
- zhIndex compactID = 761
- zhHansIndex compactID = 762
- zhHansCNIndex compactID = 763
- zhHansHKIndex compactID = 764
- zhHansMOIndex compactID = 765
- zhHansSGIndex compactID = 766
- zhHantIndex compactID = 767
- zhHantHKIndex compactID = 768
- zhHantMOIndex compactID = 769
- zhHantTWIndex compactID = 770
- zuIndex compactID = 771
- zuZAIndex compactID = 772
- caESvalenciaIndex compactID = 773
- enUSuvaposixIndex compactID = 774
+ undIndex ID = 0
+ afIndex ID = 1
+ afNAIndex ID = 2
+ afZAIndex ID = 3
+ agqIndex ID = 4
+ agqCMIndex ID = 5
+ akIndex ID = 6
+ akGHIndex ID = 7
+ amIndex ID = 8
+ amETIndex ID = 9
+ arIndex ID = 10
+ ar001Index ID = 11
+ arAEIndex ID = 12
+ arBHIndex ID = 13
+ arDJIndex ID = 14
+ arDZIndex ID = 15
+ arEGIndex ID = 16
+ arEHIndex ID = 17
+ arERIndex ID = 18
+ arILIndex ID = 19
+ arIQIndex ID = 20
+ arJOIndex ID = 21
+ arKMIndex ID = 22
+ arKWIndex ID = 23
+ arLBIndex ID = 24
+ arLYIndex ID = 25
+ arMAIndex ID = 26
+ arMRIndex ID = 27
+ arOMIndex ID = 28
+ arPSIndex ID = 29
+ arQAIndex ID = 30
+ arSAIndex ID = 31
+ arSDIndex ID = 32
+ arSOIndex ID = 33
+ arSSIndex ID = 34
+ arSYIndex ID = 35
+ arTDIndex ID = 36
+ arTNIndex ID = 37
+ arYEIndex ID = 38
+ arsIndex ID = 39
+ asIndex ID = 40
+ asINIndex ID = 41
+ asaIndex ID = 42
+ asaTZIndex ID = 43
+ astIndex ID = 44
+ astESIndex ID = 45
+ azIndex ID = 46
+ azCyrlIndex ID = 47
+ azCyrlAZIndex ID = 48
+ azLatnIndex ID = 49
+ azLatnAZIndex ID = 50
+ basIndex ID = 51
+ basCMIndex ID = 52
+ beIndex ID = 53
+ beBYIndex ID = 54
+ bemIndex ID = 55
+ bemZMIndex ID = 56
+ bezIndex ID = 57
+ bezTZIndex ID = 58
+ bgIndex ID = 59
+ bgBGIndex ID = 60
+ bhIndex ID = 61
+ bmIndex ID = 62
+ bmMLIndex ID = 63
+ bnIndex ID = 64
+ bnBDIndex ID = 65
+ bnINIndex ID = 66
+ boIndex ID = 67
+ boCNIndex ID = 68
+ boINIndex ID = 69
+ brIndex ID = 70
+ brFRIndex ID = 71
+ brxIndex ID = 72
+ brxINIndex ID = 73
+ bsIndex ID = 74
+ bsCyrlIndex ID = 75
+ bsCyrlBAIndex ID = 76
+ bsLatnIndex ID = 77
+ bsLatnBAIndex ID = 78
+ caIndex ID = 79
+ caADIndex ID = 80
+ caESIndex ID = 81
+ caFRIndex ID = 82
+ caITIndex ID = 83
+ ccpIndex ID = 84
+ ccpBDIndex ID = 85
+ ccpINIndex ID = 86
+ ceIndex ID = 87
+ ceRUIndex ID = 88
+ cggIndex ID = 89
+ cggUGIndex ID = 90
+ chrIndex ID = 91
+ chrUSIndex ID = 92
+ ckbIndex ID = 93
+ ckbIQIndex ID = 94
+ ckbIRIndex ID = 95
+ csIndex ID = 96
+ csCZIndex ID = 97
+ cuIndex ID = 98
+ cuRUIndex ID = 99
+ cyIndex ID = 100
+ cyGBIndex ID = 101
+ daIndex ID = 102
+ daDKIndex ID = 103
+ daGLIndex ID = 104
+ davIndex ID = 105
+ davKEIndex ID = 106
+ deIndex ID = 107
+ deATIndex ID = 108
+ deBEIndex ID = 109
+ deCHIndex ID = 110
+ deDEIndex ID = 111
+ deITIndex ID = 112
+ deLIIndex ID = 113
+ deLUIndex ID = 114
+ djeIndex ID = 115
+ djeNEIndex ID = 116
+ dsbIndex ID = 117
+ dsbDEIndex ID = 118
+ duaIndex ID = 119
+ duaCMIndex ID = 120
+ dvIndex ID = 121
+ dyoIndex ID = 122
+ dyoSNIndex ID = 123
+ dzIndex ID = 124
+ dzBTIndex ID = 125
+ ebuIndex ID = 126
+ ebuKEIndex ID = 127
+ eeIndex ID = 128
+ eeGHIndex ID = 129
+ eeTGIndex ID = 130
+ elIndex ID = 131
+ elCYIndex ID = 132
+ elGRIndex ID = 133
+ enIndex ID = 134
+ en001Index ID = 135
+ en150Index ID = 136
+ enAGIndex ID = 137
+ enAIIndex ID = 138
+ enASIndex ID = 139
+ enATIndex ID = 140
+ enAUIndex ID = 141
+ enBBIndex ID = 142
+ enBEIndex ID = 143
+ enBIIndex ID = 144
+ enBMIndex ID = 145
+ enBSIndex ID = 146
+ enBWIndex ID = 147
+ enBZIndex ID = 148
+ enCAIndex ID = 149
+ enCCIndex ID = 150
+ enCHIndex ID = 151
+ enCKIndex ID = 152
+ enCMIndex ID = 153
+ enCXIndex ID = 154
+ enCYIndex ID = 155
+ enDEIndex ID = 156
+ enDGIndex ID = 157
+ enDKIndex ID = 158
+ enDMIndex ID = 159
+ enERIndex ID = 160
+ enFIIndex ID = 161
+ enFJIndex ID = 162
+ enFKIndex ID = 163
+ enFMIndex ID = 164
+ enGBIndex ID = 165
+ enGDIndex ID = 166
+ enGGIndex ID = 167
+ enGHIndex ID = 168
+ enGIIndex ID = 169
+ enGMIndex ID = 170
+ enGUIndex ID = 171
+ enGYIndex ID = 172
+ enHKIndex ID = 173
+ enIEIndex ID = 174
+ enILIndex ID = 175
+ enIMIndex ID = 176
+ enINIndex ID = 177
+ enIOIndex ID = 178
+ enJEIndex ID = 179
+ enJMIndex ID = 180
+ enKEIndex ID = 181
+ enKIIndex ID = 182
+ enKNIndex ID = 183
+ enKYIndex ID = 184
+ enLCIndex ID = 185
+ enLRIndex ID = 186
+ enLSIndex ID = 187
+ enMGIndex ID = 188
+ enMHIndex ID = 189
+ enMOIndex ID = 190
+ enMPIndex ID = 191
+ enMSIndex ID = 192
+ enMTIndex ID = 193
+ enMUIndex ID = 194
+ enMWIndex ID = 195
+ enMYIndex ID = 196
+ enNAIndex ID = 197
+ enNFIndex ID = 198
+ enNGIndex ID = 199
+ enNLIndex ID = 200
+ enNRIndex ID = 201
+ enNUIndex ID = 202
+ enNZIndex ID = 203
+ enPGIndex ID = 204
+ enPHIndex ID = 205
+ enPKIndex ID = 206
+ enPNIndex ID = 207
+ enPRIndex ID = 208
+ enPWIndex ID = 209
+ enRWIndex ID = 210
+ enSBIndex ID = 211
+ enSCIndex ID = 212
+ enSDIndex ID = 213
+ enSEIndex ID = 214
+ enSGIndex ID = 215
+ enSHIndex ID = 216
+ enSIIndex ID = 217
+ enSLIndex ID = 218
+ enSSIndex ID = 219
+ enSXIndex ID = 220
+ enSZIndex ID = 221
+ enTCIndex ID = 222
+ enTKIndex ID = 223
+ enTOIndex ID = 224
+ enTTIndex ID = 225
+ enTVIndex ID = 226
+ enTZIndex ID = 227
+ enUGIndex ID = 228
+ enUMIndex ID = 229
+ enUSIndex ID = 230
+ enVCIndex ID = 231
+ enVGIndex ID = 232
+ enVIIndex ID = 233
+ enVUIndex ID = 234
+ enWSIndex ID = 235
+ enZAIndex ID = 236
+ enZMIndex ID = 237
+ enZWIndex ID = 238
+ eoIndex ID = 239
+ eo001Index ID = 240
+ esIndex ID = 241
+ es419Index ID = 242
+ esARIndex ID = 243
+ esBOIndex ID = 244
+ esBRIndex ID = 245
+ esBZIndex ID = 246
+ esCLIndex ID = 247
+ esCOIndex ID = 248
+ esCRIndex ID = 249
+ esCUIndex ID = 250
+ esDOIndex ID = 251
+ esEAIndex ID = 252
+ esECIndex ID = 253
+ esESIndex ID = 254
+ esGQIndex ID = 255
+ esGTIndex ID = 256
+ esHNIndex ID = 257
+ esICIndex ID = 258
+ esMXIndex ID = 259
+ esNIIndex ID = 260
+ esPAIndex ID = 261
+ esPEIndex ID = 262
+ esPHIndex ID = 263
+ esPRIndex ID = 264
+ esPYIndex ID = 265
+ esSVIndex ID = 266
+ esUSIndex ID = 267
+ esUYIndex ID = 268
+ esVEIndex ID = 269
+ etIndex ID = 270
+ etEEIndex ID = 271
+ euIndex ID = 272
+ euESIndex ID = 273
+ ewoIndex ID = 274
+ ewoCMIndex ID = 275
+ faIndex ID = 276
+ faAFIndex ID = 277
+ faIRIndex ID = 278
+ ffIndex ID = 279
+ ffCMIndex ID = 280
+ ffGNIndex ID = 281
+ ffMRIndex ID = 282
+ ffSNIndex ID = 283
+ fiIndex ID = 284
+ fiFIIndex ID = 285
+ filIndex ID = 286
+ filPHIndex ID = 287
+ foIndex ID = 288
+ foDKIndex ID = 289
+ foFOIndex ID = 290
+ frIndex ID = 291
+ frBEIndex ID = 292
+ frBFIndex ID = 293
+ frBIIndex ID = 294
+ frBJIndex ID = 295
+ frBLIndex ID = 296
+ frCAIndex ID = 297
+ frCDIndex ID = 298
+ frCFIndex ID = 299
+ frCGIndex ID = 300
+ frCHIndex ID = 301
+ frCIIndex ID = 302
+ frCMIndex ID = 303
+ frDJIndex ID = 304
+ frDZIndex ID = 305
+ frFRIndex ID = 306
+ frGAIndex ID = 307
+ frGFIndex ID = 308
+ frGNIndex ID = 309
+ frGPIndex ID = 310
+ frGQIndex ID = 311
+ frHTIndex ID = 312
+ frKMIndex ID = 313
+ frLUIndex ID = 314
+ frMAIndex ID = 315
+ frMCIndex ID = 316
+ frMFIndex ID = 317
+ frMGIndex ID = 318
+ frMLIndex ID = 319
+ frMQIndex ID = 320
+ frMRIndex ID = 321
+ frMUIndex ID = 322
+ frNCIndex ID = 323
+ frNEIndex ID = 324
+ frPFIndex ID = 325
+ frPMIndex ID = 326
+ frREIndex ID = 327
+ frRWIndex ID = 328
+ frSCIndex ID = 329
+ frSNIndex ID = 330
+ frSYIndex ID = 331
+ frTDIndex ID = 332
+ frTGIndex ID = 333
+ frTNIndex ID = 334
+ frVUIndex ID = 335
+ frWFIndex ID = 336
+ frYTIndex ID = 337
+ furIndex ID = 338
+ furITIndex ID = 339
+ fyIndex ID = 340
+ fyNLIndex ID = 341
+ gaIndex ID = 342
+ gaIEIndex ID = 343
+ gdIndex ID = 344
+ gdGBIndex ID = 345
+ glIndex ID = 346
+ glESIndex ID = 347
+ gswIndex ID = 348
+ gswCHIndex ID = 349
+ gswFRIndex ID = 350
+ gswLIIndex ID = 351
+ guIndex ID = 352
+ guINIndex ID = 353
+ guwIndex ID = 354
+ guzIndex ID = 355
+ guzKEIndex ID = 356
+ gvIndex ID = 357
+ gvIMIndex ID = 358
+ haIndex ID = 359
+ haGHIndex ID = 360
+ haNEIndex ID = 361
+ haNGIndex ID = 362
+ hawIndex ID = 363
+ hawUSIndex ID = 364
+ heIndex ID = 365
+ heILIndex ID = 366
+ hiIndex ID = 367
+ hiINIndex ID = 368
+ hrIndex ID = 369
+ hrBAIndex ID = 370
+ hrHRIndex ID = 371
+ hsbIndex ID = 372
+ hsbDEIndex ID = 373
+ huIndex ID = 374
+ huHUIndex ID = 375
+ hyIndex ID = 376
+ hyAMIndex ID = 377
+ idIndex ID = 378
+ idIDIndex ID = 379
+ igIndex ID = 380
+ igNGIndex ID = 381
+ iiIndex ID = 382
+ iiCNIndex ID = 383
+ inIndex ID = 384
+ ioIndex ID = 385
+ isIndex ID = 386
+ isISIndex ID = 387
+ itIndex ID = 388
+ itCHIndex ID = 389
+ itITIndex ID = 390
+ itSMIndex ID = 391
+ itVAIndex ID = 392
+ iuIndex ID = 393
+ iwIndex ID = 394
+ jaIndex ID = 395
+ jaJPIndex ID = 396
+ jboIndex ID = 397
+ jgoIndex ID = 398
+ jgoCMIndex ID = 399
+ jiIndex ID = 400
+ jmcIndex ID = 401
+ jmcTZIndex ID = 402
+ jvIndex ID = 403
+ jwIndex ID = 404
+ kaIndex ID = 405
+ kaGEIndex ID = 406
+ kabIndex ID = 407
+ kabDZIndex ID = 408
+ kajIndex ID = 409
+ kamIndex ID = 410
+ kamKEIndex ID = 411
+ kcgIndex ID = 412
+ kdeIndex ID = 413
+ kdeTZIndex ID = 414
+ keaIndex ID = 415
+ keaCVIndex ID = 416
+ khqIndex ID = 417
+ khqMLIndex ID = 418
+ kiIndex ID = 419
+ kiKEIndex ID = 420
+ kkIndex ID = 421
+ kkKZIndex ID = 422
+ kkjIndex ID = 423
+ kkjCMIndex ID = 424
+ klIndex ID = 425
+ klGLIndex ID = 426
+ klnIndex ID = 427
+ klnKEIndex ID = 428
+ kmIndex ID = 429
+ kmKHIndex ID = 430
+ knIndex ID = 431
+ knINIndex ID = 432
+ koIndex ID = 433
+ koKPIndex ID = 434
+ koKRIndex ID = 435
+ kokIndex ID = 436
+ kokINIndex ID = 437
+ ksIndex ID = 438
+ ksINIndex ID = 439
+ ksbIndex ID = 440
+ ksbTZIndex ID = 441
+ ksfIndex ID = 442
+ ksfCMIndex ID = 443
+ kshIndex ID = 444
+ kshDEIndex ID = 445
+ kuIndex ID = 446
+ kwIndex ID = 447
+ kwGBIndex ID = 448
+ kyIndex ID = 449
+ kyKGIndex ID = 450
+ lagIndex ID = 451
+ lagTZIndex ID = 452
+ lbIndex ID = 453
+ lbLUIndex ID = 454
+ lgIndex ID = 455
+ lgUGIndex ID = 456
+ lktIndex ID = 457
+ lktUSIndex ID = 458
+ lnIndex ID = 459
+ lnAOIndex ID = 460
+ lnCDIndex ID = 461
+ lnCFIndex ID = 462
+ lnCGIndex ID = 463
+ loIndex ID = 464
+ loLAIndex ID = 465
+ lrcIndex ID = 466
+ lrcIQIndex ID = 467
+ lrcIRIndex ID = 468
+ ltIndex ID = 469
+ ltLTIndex ID = 470
+ luIndex ID = 471
+ luCDIndex ID = 472
+ luoIndex ID = 473
+ luoKEIndex ID = 474
+ luyIndex ID = 475
+ luyKEIndex ID = 476
+ lvIndex ID = 477
+ lvLVIndex ID = 478
+ masIndex ID = 479
+ masKEIndex ID = 480
+ masTZIndex ID = 481
+ merIndex ID = 482
+ merKEIndex ID = 483
+ mfeIndex ID = 484
+ mfeMUIndex ID = 485
+ mgIndex ID = 486
+ mgMGIndex ID = 487
+ mghIndex ID = 488
+ mghMZIndex ID = 489
+ mgoIndex ID = 490
+ mgoCMIndex ID = 491
+ mkIndex ID = 492
+ mkMKIndex ID = 493
+ mlIndex ID = 494
+ mlINIndex ID = 495
+ mnIndex ID = 496
+ mnMNIndex ID = 497
+ moIndex ID = 498
+ mrIndex ID = 499
+ mrINIndex ID = 500
+ msIndex ID = 501
+ msBNIndex ID = 502
+ msMYIndex ID = 503
+ msSGIndex ID = 504
+ mtIndex ID = 505
+ mtMTIndex ID = 506
+ muaIndex ID = 507
+ muaCMIndex ID = 508
+ myIndex ID = 509
+ myMMIndex ID = 510
+ mznIndex ID = 511
+ mznIRIndex ID = 512
+ nahIndex ID = 513
+ naqIndex ID = 514
+ naqNAIndex ID = 515
+ nbIndex ID = 516
+ nbNOIndex ID = 517
+ nbSJIndex ID = 518
+ ndIndex ID = 519
+ ndZWIndex ID = 520
+ ndsIndex ID = 521
+ ndsDEIndex ID = 522
+ ndsNLIndex ID = 523
+ neIndex ID = 524
+ neINIndex ID = 525
+ neNPIndex ID = 526
+ nlIndex ID = 527
+ nlAWIndex ID = 528
+ nlBEIndex ID = 529
+ nlBQIndex ID = 530
+ nlCWIndex ID = 531
+ nlNLIndex ID = 532
+ nlSRIndex ID = 533
+ nlSXIndex ID = 534
+ nmgIndex ID = 535
+ nmgCMIndex ID = 536
+ nnIndex ID = 537
+ nnNOIndex ID = 538
+ nnhIndex ID = 539
+ nnhCMIndex ID = 540
+ noIndex ID = 541
+ nqoIndex ID = 542
+ nrIndex ID = 543
+ nsoIndex ID = 544
+ nusIndex ID = 545
+ nusSSIndex ID = 546
+ nyIndex ID = 547
+ nynIndex ID = 548
+ nynUGIndex ID = 549
+ omIndex ID = 550
+ omETIndex ID = 551
+ omKEIndex ID = 552
+ orIndex ID = 553
+ orINIndex ID = 554
+ osIndex ID = 555
+ osGEIndex ID = 556
+ osRUIndex ID = 557
+ paIndex ID = 558
+ paArabIndex ID = 559
+ paArabPKIndex ID = 560
+ paGuruIndex ID = 561
+ paGuruINIndex ID = 562
+ papIndex ID = 563
+ plIndex ID = 564
+ plPLIndex ID = 565
+ prgIndex ID = 566
+ prg001Index ID = 567
+ psIndex ID = 568
+ psAFIndex ID = 569
+ ptIndex ID = 570
+ ptAOIndex ID = 571
+ ptBRIndex ID = 572
+ ptCHIndex ID = 573
+ ptCVIndex ID = 574
+ ptGQIndex ID = 575
+ ptGWIndex ID = 576
+ ptLUIndex ID = 577
+ ptMOIndex ID = 578
+ ptMZIndex ID = 579
+ ptPTIndex ID = 580
+ ptSTIndex ID = 581
+ ptTLIndex ID = 582
+ quIndex ID = 583
+ quBOIndex ID = 584
+ quECIndex ID = 585
+ quPEIndex ID = 586
+ rmIndex ID = 587
+ rmCHIndex ID = 588
+ rnIndex ID = 589
+ rnBIIndex ID = 590
+ roIndex ID = 591
+ roMDIndex ID = 592
+ roROIndex ID = 593
+ rofIndex ID = 594
+ rofTZIndex ID = 595
+ ruIndex ID = 596
+ ruBYIndex ID = 597
+ ruKGIndex ID = 598
+ ruKZIndex ID = 599
+ ruMDIndex ID = 600
+ ruRUIndex ID = 601
+ ruUAIndex ID = 602
+ rwIndex ID = 603
+ rwRWIndex ID = 604
+ rwkIndex ID = 605
+ rwkTZIndex ID = 606
+ sahIndex ID = 607
+ sahRUIndex ID = 608
+ saqIndex ID = 609
+ saqKEIndex ID = 610
+ sbpIndex ID = 611
+ sbpTZIndex ID = 612
+ sdIndex ID = 613
+ sdPKIndex ID = 614
+ sdhIndex ID = 615
+ seIndex ID = 616
+ seFIIndex ID = 617
+ seNOIndex ID = 618
+ seSEIndex ID = 619
+ sehIndex ID = 620
+ sehMZIndex ID = 621
+ sesIndex ID = 622
+ sesMLIndex ID = 623
+ sgIndex ID = 624
+ sgCFIndex ID = 625
+ shIndex ID = 626
+ shiIndex ID = 627
+ shiLatnIndex ID = 628
+ shiLatnMAIndex ID = 629
+ shiTfngIndex ID = 630
+ shiTfngMAIndex ID = 631
+ siIndex ID = 632
+ siLKIndex ID = 633
+ skIndex ID = 634
+ skSKIndex ID = 635
+ slIndex ID = 636
+ slSIIndex ID = 637
+ smaIndex ID = 638
+ smiIndex ID = 639
+ smjIndex ID = 640
+ smnIndex ID = 641
+ smnFIIndex ID = 642
+ smsIndex ID = 643
+ snIndex ID = 644
+ snZWIndex ID = 645
+ soIndex ID = 646
+ soDJIndex ID = 647
+ soETIndex ID = 648
+ soKEIndex ID = 649
+ soSOIndex ID = 650
+ sqIndex ID = 651
+ sqALIndex ID = 652
+ sqMKIndex ID = 653
+ sqXKIndex ID = 654
+ srIndex ID = 655
+ srCyrlIndex ID = 656
+ srCyrlBAIndex ID = 657
+ srCyrlMEIndex ID = 658
+ srCyrlRSIndex ID = 659
+ srCyrlXKIndex ID = 660
+ srLatnIndex ID = 661
+ srLatnBAIndex ID = 662
+ srLatnMEIndex ID = 663
+ srLatnRSIndex ID = 664
+ srLatnXKIndex ID = 665
+ ssIndex ID = 666
+ ssyIndex ID = 667
+ stIndex ID = 668
+ svIndex ID = 669
+ svAXIndex ID = 670
+ svFIIndex ID = 671
+ svSEIndex ID = 672
+ swIndex ID = 673
+ swCDIndex ID = 674
+ swKEIndex ID = 675
+ swTZIndex ID = 676
+ swUGIndex ID = 677
+ syrIndex ID = 678
+ taIndex ID = 679
+ taINIndex ID = 680
+ taLKIndex ID = 681
+ taMYIndex ID = 682
+ taSGIndex ID = 683
+ teIndex ID = 684
+ teINIndex ID = 685
+ teoIndex ID = 686
+ teoKEIndex ID = 687
+ teoUGIndex ID = 688
+ tgIndex ID = 689
+ tgTJIndex ID = 690
+ thIndex ID = 691
+ thTHIndex ID = 692
+ tiIndex ID = 693
+ tiERIndex ID = 694
+ tiETIndex ID = 695
+ tigIndex ID = 696
+ tkIndex ID = 697
+ tkTMIndex ID = 698
+ tlIndex ID = 699
+ tnIndex ID = 700
+ toIndex ID = 701
+ toTOIndex ID = 702
+ trIndex ID = 703
+ trCYIndex ID = 704
+ trTRIndex ID = 705
+ tsIndex ID = 706
+ ttIndex ID = 707
+ ttRUIndex ID = 708
+ twqIndex ID = 709
+ twqNEIndex ID = 710
+ tzmIndex ID = 711
+ tzmMAIndex ID = 712
+ ugIndex ID = 713
+ ugCNIndex ID = 714
+ ukIndex ID = 715
+ ukUAIndex ID = 716
+ urIndex ID = 717
+ urINIndex ID = 718
+ urPKIndex ID = 719
+ uzIndex ID = 720
+ uzArabIndex ID = 721
+ uzArabAFIndex ID = 722
+ uzCyrlIndex ID = 723
+ uzCyrlUZIndex ID = 724
+ uzLatnIndex ID = 725
+ uzLatnUZIndex ID = 726
+ vaiIndex ID = 727
+ vaiLatnIndex ID = 728
+ vaiLatnLRIndex ID = 729
+ vaiVaiiIndex ID = 730
+ vaiVaiiLRIndex ID = 731
+ veIndex ID = 732
+ viIndex ID = 733
+ viVNIndex ID = 734
+ voIndex ID = 735
+ vo001Index ID = 736
+ vunIndex ID = 737
+ vunTZIndex ID = 738
+ waIndex ID = 739
+ waeIndex ID = 740
+ waeCHIndex ID = 741
+ woIndex ID = 742
+ woSNIndex ID = 743
+ xhIndex ID = 744
+ xogIndex ID = 745
+ xogUGIndex ID = 746
+ yavIndex ID = 747
+ yavCMIndex ID = 748
+ yiIndex ID = 749
+ yi001Index ID = 750
+ yoIndex ID = 751
+ yoBJIndex ID = 752
+ yoNGIndex ID = 753
+ yueIndex ID = 754
+ yueHansIndex ID = 755
+ yueHansCNIndex ID = 756
+ yueHantIndex ID = 757
+ yueHantHKIndex ID = 758
+ zghIndex ID = 759
+ zghMAIndex ID = 760
+ zhIndex ID = 761
+ zhHansIndex ID = 762
+ zhHansCNIndex ID = 763
+ zhHansHKIndex ID = 764
+ zhHansMOIndex ID = 765
+ zhHansSGIndex ID = 766
+ zhHantIndex ID = 767
+ zhHantHKIndex ID = 768
+ zhHantMOIndex ID = 769
+ zhHantTWIndex ID = 770
+ zuIndex ID = 771
+ zuZAIndex ID = 772
+ caESvalenciaIndex ID = 773
+ enUSuvaposixIndex ID = 774
)
var coreTags = []language.CompactCoreInfo{ // 773 elements
@@ -1054,252 +1012,4 @@
const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
-var regionToGroups = []uint8{ // 357 elements
- // Entry 0 - 3F
- 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
- // Entry 40 - 7F
- 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
- 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x08,
- 0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
- // Entry 80 - BF
- 0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00,
- 0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00, 0x04,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
- // Entry C0 - FF
- 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01,
- 0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00,
- 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- // Entry 100 - 13F
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
- 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04, 0x00,
- 0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00, 0x00,
- // Entry 140 - 17F
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00,
-} // Size: 381 bytes
-
-var paradigmLocales = [][3]uint16{ // 3 elements
- 0: [3]uint16{0x139, 0x0, 0x7b},
- 1: [3]uint16{0x13e, 0x0, 0x1f},
- 2: [3]uint16{0x3c0, 0x41, 0xee},
-} // Size: 42 bytes
-
-type mutualIntelligibility struct {
- want uint16
- have uint16
- distance uint8
- oneway bool
-}
-type scriptIntelligibility struct {
- wantLang uint16
- haveLang uint16
- wantScript uint8
- haveScript uint8
- distance uint8
-}
-type regionIntelligibility struct {
- lang uint16
- script uint8
- group uint8
- distance uint8
-}
-
-// matchLang holds pairs of langIDs of base languages that are typically
-// mutually intelligible. Each pair is associated with a confidence and
-// whether the intelligibility goes one or both ways.
-var matchLang = []mutualIntelligibility{ // 113 elements
- 0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
- 1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
- 2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
- 3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
- 4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
- 5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
- 6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
- 7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
- 8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
- 9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
- 10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
- 11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
- 12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
- 13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
- 14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
- 15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
- 16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
- 17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
- 18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
- 19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
- 20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
- 21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
- 22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
- 23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
- 24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
- 25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
- 26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
- 27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
- 28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
- 29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
- 30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
- 31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
- 32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
- 33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
- 34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
- 35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
- 36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
- 37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
- 38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
- 39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
- 40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
- 41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
- 42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
- 43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
- 44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
- 45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
- 46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
- 47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
- 48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
- 49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
- 50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
- 51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
- 52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
- 53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
- 54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
- 55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
- 56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
- 57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
- 58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
- 59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
- 60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
- 61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
- 62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
- 63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
- 64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
- 65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
- 66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
- 67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
- 68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
- 69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
- 70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
- 71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
- 72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
- 73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
- 74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
- 75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
- 76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
- 77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
- 78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
- 79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
- 80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
- 81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
- 82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
- 83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
- 84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
- 85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
- 86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
- 87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
- 88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
- 89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
- 90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
- 91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
- 92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
- 93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
- 94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
- 95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
- 96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
- 97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
- 98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
- 99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
- 100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
- 101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
- 102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
- 103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
- 104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
- 105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
- 106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
- 107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
- 108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
- 109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
- 110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
- 111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
- 112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
-} // Size: 702 bytes
-
-// matchScript holds pairs of scriptIDs where readers of one script
-// can typically also read the other. Each is associated with a confidence.
-var matchScript = []scriptIntelligibility{ // 26 elements
- 0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
- 1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
- 2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
- 3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
- 4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
- 5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
- 6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
- 7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
- 8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
- 9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
- 10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
- 11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
- 12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
- 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
- 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
- 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
- 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
- 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
- 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
- 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
- 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
- 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
- 22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
- 23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
- 24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
- 25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
-} // Size: 232 bytes
-
-var matchRegion = []regionIntelligibility{ // 15 elements
- 0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
- 1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
- 2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
- 3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
- 4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
- 5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
- 6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
- 7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
- 8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
- 9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
- 10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
- 11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
- 12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
- 13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
- 14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
-} // Size: 114 bytes
-
-// Total table size 4618 bytes (4KiB); checksum: D161A896
+// Total table size 3147 bytes (3KiB); checksum: F4E57D15
diff --git a/internal/language/compact/tags.go b/internal/language/compact/tags.go
index acc482a..ca135d2 100644
--- a/internal/language/compact/tags.go
+++ b/internal/language/compact/tags.go
@@ -2,59 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package language
-
-// TODO: Various sets of commonly use tags and regions.
-
-// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
-// It simplifies safe initialization of Tag values.
-func MustParse(s string) Tag {
- t, err := Parse(s)
- if err != nil {
- panic(err)
- }
- return t
-}
-
-// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
-// It simplifies safe initialization of Tag values.
-func (c CanonType) MustParse(s string) Tag {
- t, err := c.Parse(s)
- if err != nil {
- panic(err)
- }
- return t
-}
-
-// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
-// It simplifies safe initialization of Base values.
-func MustParseBase(s string) Base {
- b, err := ParseBase(s)
- if err != nil {
- panic(err)
- }
- return b
-}
-
-// MustParseScript is like ParseScript, but panics if the given script cannot be
-// parsed. It simplifies safe initialization of Script values.
-func MustParseScript(s string) Script {
- scr, err := ParseScript(s)
- if err != nil {
- panic(err)
- }
- return scr
-}
-
-// MustParseRegion is like ParseRegion, but panics if the given region cannot be
-// parsed. It simplifies safe initialization of Region values.
-func MustParseRegion(s string) Region {
- r, err := ParseRegion(s)
- if err != nil {
- panic(err)
- }
- return r
-}
+package compact
var (
und = Tag{}