blob: f26bace1466c9ebabf8c432ca076b9cc799bc1aa [file] [log] [blame]
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
import (
"fmt"
"reflect"
"strings"
"testing"
"unicode"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/message"
)
// TODO: test that tables are properly dropped by the linker for various use
// cases.
var (
firstLang2aa = language.MustParseBase("aa")
lastLang2zu = language.MustParseBase("zu")
firstLang3ace = language.MustParseBase("ace")
lastLang3zza = language.MustParseBase("zza")
firstTagAr001 = language.MustParse("ar-001")
lastTagZhHant = language.MustParse("zh-Hant")
)
// TestValues tests that for all languages, regions, and scripts in Values, at
// least one language has a name defined for it by checking it exists in
// English, which is assumed to be the most comprehensive. It is also tested
// that a Namer returns "" for unsupported values.
func TestValues(t *testing.T) {
type testcase struct {
kind string
n Namer
}
// checkDefined checks that a value exists in a Namer.
checkDefined := func(x interface{}, namers []testcase) {
for _, n := range namers {
testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) {
if n.n.Name(x) == "" {
// As of version 28 there is no data for az-Arab in English,
// although there is useful data in other languages.
if x.(fmt.Stringer).String() == "az-Arab" {
return
}
t.Errorf("supported but no result")
}
})
}
}
// checkUnsupported checks that a value does not exist in a Namer.
checkUnsupported := func(x interface{}, namers []testcase) {
for _, n := range namers {
if got := n.n.Name(x); got != "" {
t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
}
}
}
tags := map[language.Tag]bool{}
namers := []testcase{
{"Languages(en)", Languages(language.English)},
{"Tags(en)", Tags(language.English)},
{"English.Languages()", English.Languages()},
{"English.Tags()", English.Tags()},
}
for _, tag := range Values.Tags() {
checkDefined(tag, namers)
tags[tag] = true
}
for _, base := range language.Supported.BaseLanguages() {
tag, _ := language.All.Compose(base)
if !tags[tag] {
checkUnsupported(tag, namers)
}
}
regions := map[language.Region]bool{}
namers = []testcase{
{"Regions(en)", Regions(language.English)},
{"English.Regions()", English.Regions()},
}
for _, r := range Values.Regions() {
checkDefined(r, namers)
regions[r] = true
}
for _, r := range language.Supported.Regions() {
if r = r.Canonicalize(); !regions[r] {
checkUnsupported(r, namers)
}
}
scripts := map[language.Script]bool{}
namers = []testcase{
{"Scripts(en)", Scripts(language.English)},
{"English.Scripts()", English.Scripts()},
}
for _, s := range Values.Scripts() {
checkDefined(s, namers)
scripts[s] = true
}
for _, s := range language.Supported.Scripts() {
// Canonicalize the script.
tag, _ := language.DeprecatedScript.Compose(s)
if _, s, _ = tag.Raw(); !scripts[s] {
checkUnsupported(s, namers)
}
}
}
// TestSupported tests that we have at least some Namers for languages that we
// claim to support. To test the claims in the documentation, it also verifies
// that if a Namer is returned, it will have at least some data.
func TestSupported(t *testing.T) {
supportedTags := Supported.Tags()
if len(supportedTags) != numSupported {
t.Errorf("number of supported was %d; want %d", len(supportedTags), numSupported)
}
namerFuncs := []struct {
kind string
fn func(language.Tag) Namer
}{
{"Tags", Tags},
{"Languages", Languages},
{"Regions", Regions},
{"Scripts", Scripts},
}
// Verify that we have at least one Namer for all tags we claim to support.
tags := make(map[language.Tag]bool)
for _, tag := range supportedTags {
// Test we have at least one Namer for this supported Tag.
found := false
for _, kind := range namerFuncs {
if defined(t, kind.kind, kind.fn(tag), tag) {
found = true
}
}
if !found {
t.Errorf("%s: supported, but no data available", tag)
}
if tags[tag] {
t.Errorf("%s: included in Supported.Tags more than once", tag)
}
tags[tag] = true
}
// Verify that we have no Namers for tags we don't claim to support.
for _, base := range language.Supported.BaseLanguages() {
tag, _ := language.All.Compose(base)
// Skip tags that are supported after matching.
if _, _, conf := matcher.Match(tag); conf != language.No {
continue
}
// Test there are no Namers for this tag.
for _, kind := range namerFuncs {
if defined(t, kind.kind, kind.fn(tag), tag) {
t.Errorf("%[1]s(%[2]s) returns a Namer, but %[2]s is not in the set of supported Tags.", kind.kind, tag)
}
}
}
}
// defined reports whether n is a proper Namer, which means it is non-nil and
// must have at least one non-empty value.
func defined(t *testing.T, kind string, n Namer, tag language.Tag) bool {
if n == nil {
return false
}
switch kind {
case "Tags":
for _, t := range Values.Tags() {
if n.Name(t) != "" {
return true
}
}
case "Languages":
for _, t := range Values.BaseLanguages() {
if n.Name(t) != "" {
return true
}
}
case "Regions":
for _, t := range Values.Regions() {
if n.Name(t) != "" {
return true
}
}
case "Scripts":
for _, t := range Values.Scripts() {
if n.Name(t) != "" {
return true
}
}
}
t.Errorf("%s(%s) returns non-nil Namer without content", kind, tag)
return false
}
func TestCoverage(t *testing.T) {
en := language.English
tests := []struct {
n Namer
x interface{}
}{
{Languages(en), Values.Tags()},
{Scripts(en), Values.Scripts()},
{Regions(en), Values.Regions()},
}
for i, tt := range tests {
uniq := make(map[string]interface{})
v := reflect.ValueOf(tt.x)
for j := 0; j < v.Len(); j++ {
x := v.Index(j).Interface()
// As of version 28 there is no data for az-Arab in English,
// although there is useful data in other languages.
if x.(fmt.Stringer).String() == "az-Arab" {
continue
}
s := tt.n.Name(x)
if s == "" {
t.Errorf("%d:%d:%s: missing content", i, j, x)
} else if uniq[s] != nil {
t.Errorf("%d:%d:%s: identical return value %q for %v and %v", i, j, x, s, x, uniq[s])
}
uniq[s] = x
}
}
}
// TestUpdate tests whether dictionary entries for certain languages need to be
// updated. For some languages, some of the headers may be empty or they may be
// identical to the parent. This code detects if such entries need to be updated
// after a table update.
func TestUpdate(t *testing.T) {
tests := []struct {
d *Dictionary
tag string
}{
{ModernStandardArabic, "ar-001"},
{AmericanEnglish, "en-US"},
{EuropeanSpanish, "es-ES"},
{BrazilianPortuguese, "pt-BR"},
{SimplifiedChinese, "zh-Hans"},
}
for _, tt := range tests {
_, i, _ := matcher.Match(language.MustParse(tt.tag))
if !reflect.DeepEqual(tt.d.lang, langHeaders[i]) {
t.Errorf("%s: lang table update needed", tt.tag)
}
if !reflect.DeepEqual(tt.d.script, scriptHeaders[i]) {
t.Errorf("%s: script table update needed", tt.tag)
}
if !reflect.DeepEqual(tt.d.region, regionHeaders[i]) {
t.Errorf("%s: region table update needed", tt.tag)
}
}
}
func TestIndex(t *testing.T) {
notIn := []string{"aa", "xx", "zz", "aaa", "xxx", "zzz", "Aaaa", "Xxxx", "Zzzz"}
tests := []tagIndex{
{
"",
"",
"",
},
{
"bb",
"",
"",
},
{
"",
"bbb",
"",
},
{
"",
"",
"Bbbb",
},
{
"bb",
"bbb",
"Bbbb",
},
{
"bbccddyy",
"bbbcccdddyyy",
"BbbbCcccDdddYyyy",
},
}
for i, tt := range tests {
// Create the test set from the tagIndex.
cnt := 0
for sz := 2; sz <= 4; sz++ {
a := tt[sz-2]
for j := 0; j < len(a); j += sz {
s := a[j : j+sz]
if idx := tt.index(s); idx != cnt {
t.Errorf("%d:%s: index was %d; want %d", i, s, idx, cnt)
}
cnt++
}
}
if n := tt.len(); n != cnt {
t.Errorf("%d: len was %d; want %d", i, n, cnt)
}
for _, x := range notIn {
if idx := tt.index(x); idx != -1 {
t.Errorf("%d:%s: index was %d; want -1", i, x, idx)
}
}
}
}
func TestTag(t *testing.T) {
tests := []struct {
dict string
tag string
name string
}{
// sr is in Value.Languages(), but is not supported by agq.
{"agq", "sr", "|[language: sr]"},
{"nl", "nl", "Nederlands"},
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
// Flemish in English, though. TODO: check if this is a CLDR bug.
// {"nl", "nl-BE", "Vlaams"},
{"nl", "nl-BE", "Nederlands (België)"},
{"nl", "vls", "West-Vlaams"},
{"en", "nl-BE", "Flemish"},
{"en", "en", "English"},
{"en", "en-GB", "British English"},
{"en", "en-US", "American English"}, // American English in CLDR 24+
{"ru", "ru", "русский"},
{"ru", "ru-RU", "русский (Россия)"},
{"ru", "ru-Cyrl", "русский (кириллица)"},
{"en", lastLang2zu.String(), "Zulu"},
{"en", firstLang2aa.String(), "Afar"},
{"en", lastLang3zza.String(), "Zaza"},
{"en", firstLang3ace.String(), "Achinese"},
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
{"en", lastTagZhHant.String(), "Traditional Chinese"},
{"en", "aaa", "|Unknown language (aaa)"},
{"en", "zzj", "|Unknown language (zzj)"},
// If full tag doesn't match, try without script or region.
{"en", "aa-Hans", "Afar (Simplified Han)"},
{"en", "af-Arab", "Afrikaans (Arabic)"},
{"en", "zu-Cyrl", "Zulu (Cyrillic)"},
{"en", "aa-GB", "Afar (United Kingdom)"},
{"en", "af-NA", "Afrikaans (Namibia)"},
{"en", "zu-BR", "Zulu (Brazil)"},
// Correct inheritance and language selection.
{"zh", "zh-TW", "中文 (台湾)"},
{"zh", "zh-Hant-TW", "繁体中文 (台湾)"},
{"zh-Hant", "zh-TW", "中文 (台灣)"},
{"zh-Hant", "zh-Hant-TW", "繁體中文 (台灣)"},
// Some rather arbitrary interpretations for Serbian. This is arguably
// correct and consistent with the way zh-[Hant-]TW is handled. It will
// also give results more in line with the expectations if users
// explicitly use "sh".
{"sr-Latn", "sr-ME", "srpski (Crna Gora)"},
{"sr-Latn", "sr-Latn-ME", "srpskohrvatski (Crna Gora)"},
// Double script and region
{"nl", "en-Cyrl-BE", "Engels (Cyrillisch, België)"},
}
for _, tt := range tests {
t.Run(tt.dict+"/"+tt.tag, func(t *testing.T) {
name, fmtName := splitName(tt.name)
dict := language.MustParse(tt.dict)
tag := language.Raw.MustParse(tt.tag)
d := Tags(dict)
if n := d.Name(tag); n != name {
// There are inconsistencies w.r.t. capitalization in the tests
// due to CLDR's update procedure which treats modern and other
// languages differently.
// See https://unicode.org/cldr/trac/ticket/8051.
// TODO: use language capitalization to sanitize the strings.
t.Errorf("Name(%s) = %q; want %q", tag, n, name)
}
p := message.NewPrinter(dict)
if n := p.Sprint(Tag(tag)); n != fmtName {
t.Errorf("Tag(%s) = %q; want %q", tag, n, fmtName)
}
})
}
}
func splitName(names string) (name, formatName string) {
split := strings.Split(names, "|")
name, formatName = split[0], split[0]
if len(split) > 1 {
formatName = split[1]
}
return name, formatName
}
func TestLanguage(t *testing.T) {
tests := []struct {
dict string
tag string
name string
}{
// sr is in Value.Languages(), but is not supported by agq.
{"agq", "sr", "|[language: sr]"},
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
// Flemish in English, though. TODO: this is probably incorrect.
// West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its
// own language, whereas Vlaams is generally Dutch. So expect to have
// to change these tests back.
{"nl", "nl", "Nederlands"},
{"nl", "vls", "West-Vlaams"},
{"nl", "nl-BE", "Nederlands"},
{"en", "pt", "Portuguese"},
{"en", "pt-PT", "European Portuguese"},
{"en", "pt-BR", "Brazilian Portuguese"},
{"en", "en", "English"},
{"en", "en-GB", "British English"},
{"en", "en-US", "American English"}, // American English in CLDR 24+
{"en", lastLang2zu.String(), "Zulu"},
{"en", firstLang2aa.String(), "Afar"},
{"en", lastLang3zza.String(), "Zaza"},
{"en", firstLang3ace.String(), "Achinese"},
{"en", firstTagAr001.String(), "Modern Standard Arabic"},
{"en", lastTagZhHant.String(), "Traditional Chinese"},
{"en", "aaa", "|Unknown language (aaa)"},
{"en", "zzj", "|Unknown language (zzj)"},
// If full tag doesn't match, try without script or region.
{"en", "aa-Hans", "Afar"},
{"en", "af-Arab", "Afrikaans"},
{"en", "zu-Cyrl", "Zulu"},
{"en", "aa-GB", "Afar"},
{"en", "af-NA", "Afrikaans"},
{"en", "zu-BR", "Zulu"},
{"agq", "zh-Hant", "|[language: zh-Hant]"},
{"en", "sh", "Serbo-Croatian"},
{"en", "sr-Latn", "Serbo-Croatian"},
{"en", "sr", "Serbian"},
{"en", "sr-ME", "Serbian"},
{"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
}
for _, tt := range tests {
testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) {
name, fmtName := splitName(tt.name)
dict := language.MustParse(tt.dict)
tag := language.Raw.MustParse(tt.tag)
p := message.NewPrinter(dict)
d := Languages(dict)
if n := d.Name(tag); n != name {
t.Errorf("Name(%v) = %q; want %q", tag, n, name)
}
if n := p.Sprint(Language(tag)); n != fmtName {
t.Errorf("Language(%v) = %q; want %q", tag, n, fmtName)
}
if len(tt.tag) <= 3 {
base := language.MustParseBase(tt.tag)
if n := d.Name(base); n != name {
t.Errorf("Name(%v) = %q; want %q", base, n, name)
}
if n := p.Sprint(Language(base)); n != fmtName {
t.Errorf("Language(%v) = %q; want %q", base, n, fmtName)
}
}
})
}
}
func TestScript(t *testing.T) {
tests := []struct {
dict string
scr string
name string
}{
{"nl", "Arab", "Arabisch"},
{"en", "Arab", "Arabic"},
{"en", "Zzzz", "Unknown Script"},
{"zh-Hant", "Hang", "韓文字"},
{"zh-Hant-HK", "Hang", "韓文字"},
{"zh", "Arab", "阿拉伯文"},
{"zh-Hans-HK", "Arab", "阿拉伯文"}, // same as zh
{"zh-Hant", "Arab", "阿拉伯文"},
{"zh-Hant-HK", "Arab", "阿拉伯文"}, // same as zh
// Canonicalized form
{"en", "Qaai", "Inherited"}, // deprecated script, now is Zinh
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
{"en", "en", "Unknown Script"},
// Don't introduce scripts with canonicalization.
{"en", "sh", "Unknown Script"}, // sh canonicalizes to sr-Latn
}
for _, tt := range tests {
t.Run(tt.dict+"/"+tt.scr, func(t *testing.T) {
name, fmtName := splitName(tt.name)
dict := language.MustParse(tt.dict)
p := message.NewPrinter(dict)
d := Scripts(dict)
var tag language.Tag
if unicode.IsUpper(rune(tt.scr[0])) {
x := language.MustParseScript(tt.scr)
if n := d.Name(x); n != name {
t.Errorf("Name(%v) = %q; want %q", x, n, name)
}
if n := p.Sprint(Script(x)); n != fmtName {
t.Errorf("Script(%v) = %q; want %q", x, n, fmtName)
}
tag, _ = language.Raw.Compose(x)
} else {
tag = language.Raw.MustParse(tt.scr)
}
if n := d.Name(tag); n != name {
t.Errorf("Name(%v) = %q; want %q", tag, n, name)
}
if n := p.Sprint(Script(tag)); n != fmtName {
t.Errorf("Script(%v) = %q; want %q", tag, n, fmtName)
}
})
}
}
func TestRegion(t *testing.T) {
tests := []struct {
dict string
reg string
name string
}{
{"nl", "NL", "Nederland"},
{"en", "US", "United States"},
{"en", "ZZ", "Unknown Region"},
{"en-GB", "NL", "Netherlands"},
// Canonical equivalents
{"en", "UK", "United Kingdom"},
// No region
{"en", "pt", "Unknown Region"},
{"en", "und", "Unknown Region"},
// Don't introduce regions with canonicalization.
{"en", "mo", "Unknown Region"},
}
for _, tt := range tests {
t.Run(tt.dict+"/"+tt.reg, func(t *testing.T) {
dict := language.MustParse(tt.dict)
p := message.NewPrinter(dict)
d := Regions(dict)
var tag language.Tag
if unicode.IsUpper(rune(tt.reg[0])) {
// Region
x := language.MustParseRegion(tt.reg)
if n := d.Name(x); n != tt.name {
t.Errorf("Name(%v) = %q; want %q", x, n, tt.name)
}
if n := p.Sprint(Region(x)); n != tt.name {
t.Errorf("Region(%v) = %q; want %q", x, n, tt.name)
}
tag, _ = language.Raw.Compose(x)
} else {
tag = language.Raw.MustParse(tt.reg)
}
if n := d.Name(tag); n != tt.name {
t.Errorf("Name(%v) = %q; want %q", tag, n, tt.name)
}
if n := p.Sprint(Region(tag)); n != tt.name {
t.Errorf("Region(%v) = %q; want %q", tag, n, tt.name)
}
})
}
}
func TestSelf(t *testing.T) {
tests := []struct {
tag string
name string
}{
{"nl", "Nederlands"},
// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
// Flemish in English, though. TODO: check if this is a CLDR bug.
// {"nl-BE", "Vlaams"},
{"nl-BE", "Nederlands"},
{"en-GB", "British English"},
{lastLang2zu.String(), "isiZulu"},
{firstLang2aa.String(), ""}, // not defined
{lastLang3zza.String(), ""}, // not defined
{firstLang3ace.String(), ""}, // not defined
{firstTagAr001.String(), "العربية الرسمية الحديثة"},
{"ar", "العربية"},
{lastTagZhHant.String(), "繁體中文"},
{"aaa", ""},
{"zzj", ""},
// Drop entries that are not in the requested script, even if there is
// an entry for the language.
{"aa-Hans", ""},
{"af-Arab", ""},
{"zu-Cyrl", ""},
// Append the country name in the language of the matching language.
{"af-NA", "Afrikaans"},
{"zh", "中文"},
// zh-TW should match zh-Hant instead of zh!
{"zh-TW", "繁體中文"},
{"zh-Hant", "繁體中文"},
{"zh-Hans", "简体中文"},
{"zh-Hant-TW", "繁體中文"},
{"zh-Hans-TW", "简体中文"},
// Take the entry for sr which has the matching script.
// TODO: Capitalization changed as of CLDR 26, but change seems
// arbitrary. Revisit capitalization with revision 27. See
// https://unicode.org/cldr/trac/ticket/8051.
{"sr", "српски"},
// TODO: sr-ME should show up as Serbian or Montenegrin, not Serbo-
// Croatian. This is an artifact of the current algorithm, which is the
// way it is to have the preferred behavior for other languages such as
// Chinese. We can hardwire this case in the table generator or package
// code, but we first check if CLDR can be updated.
// {"sr-ME", "Srpski"}, // Is Srpskohrvatski
{"sr-Latn-ME", "srpskohrvatski"},
{"sr-Cyrl-ME", "српски"},
{"sr-NL", "српски"},
// NOTE: kk is defined, but in Cyrillic script. For China, Arab is the
// dominant script. We do not have data for kk-Arab and we chose to not
// fall back in such cases.
{"kk-CN", ""},
}
for i, tt := range tests {
d := Self
if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
t.Errorf("%d:%s: was %q; want %q", i, tt.tag, n, tt.name)
}
}
}
func TestEquivalence(t *testing.T) {
testCases := []struct {
desc string
namer Namer
}{
{"Self", Self},
{"Tags", Tags(language.Romanian)},
{"Languages", Languages(language.Romanian)},
{"Scripts", Scripts(language.Romanian)},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
ro := tc.namer.Name(language.Raw.MustParse("ro-MD"))
mo := tc.namer.Name(language.Raw.MustParse("mo"))
if ro != mo {
t.Errorf("%q != %q", ro, mo)
}
})
}
}
func TestDictionaryLang(t *testing.T) {
tests := []struct {
d *Dictionary
tag string
name string
}{
{English, "en", "English"},
{Portuguese, "af", "africâner"},
{EuropeanPortuguese, "af", "africanês"},
{English, "nl-BE", "Flemish"},
}
for i, test := range tests {
tag := language.MustParse(test.tag)
if got := test.d.Tags().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
if base, _ := language.Compose(tag.Base()); base == tag {
if got := test.d.Languages().Name(base); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}
}
func TestDictionaryRegion(t *testing.T) {
tests := []struct {
d *Dictionary
region string
name string
}{
{English, "FR", "France"},
{Portuguese, "009", "Oceania"},
{EuropeanPortuguese, "009", "Oceânia"},
}
for i, test := range tests {
tag := language.MustParseRegion(test.region)
if got := test.d.Regions().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}
func TestDictionaryScript(t *testing.T) {
tests := []struct {
d *Dictionary
script string
name string
}{
{English, "Cyrl", "Cyrillic"},
{EuropeanPortuguese, "Gujr", "guzerate"},
}
for i, test := range tests {
tag := language.MustParseScript(test.script)
if got := test.d.Scripts().Name(tag); got != test.name {
t.Errorf("%d:%v: got %s; want %s", i, tag, got, test.name)
}
}
}