| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package language |
| |
| import "errors" |
| |
| type scriptRegionFlags uint8 |
| |
| const ( |
| isList = 1 << iota |
| scriptInFrom |
| regionInFrom |
| ) |
| |
| func (t *Tag) setUndefinedLang(id langID) { |
| if t.lang == 0 { |
| t.lang = id |
| } |
| } |
| |
| func (t *Tag) setUndefinedScript(id scriptID) { |
| if t.script == 0 { |
| t.script = id |
| } |
| } |
| |
| func (t *Tag) setUndefinedRegion(id regionID) { |
| if t.region == 0 || t.region.Contains(id) { |
| t.region = id |
| } |
| } |
| |
| // ErrMissingLikelyTagsData indicates no information was available |
| // to compute likely values of missing tags. |
| var ErrMissingLikelyTagsData = errors.New("missing likely tags data") |
| |
| // addLikelySubtags sets subtags to their most likely value, given the locale. |
| // In most cases this means setting fields for unknown values, but in some |
| // cases it may alter a value. It returns an ErrMissingLikelyTagsData error |
| // if the given locale cannot be expanded. |
| func (t Tag) addLikelySubtags() (Tag, error) { |
| id, err := addTags(t) |
| if err != nil { |
| return t, err |
| } else if id.equalTags(t) { |
| return t, nil |
| } |
| id.remakeString() |
| return id, nil |
| } |
| |
| // specializeRegion attempts to specialize a group region. |
| func specializeRegion(t *Tag) bool { |
| if i := regionInclusion[t.region]; i < nRegionGroups { |
| x := likelyRegionGroup[i] |
| if langID(x.lang) == t.lang && scriptID(x.script) == t.script { |
| t.region = regionID(x.region) |
| } |
| return true |
| } |
| return false |
| } |
| |
| func addTags(t Tag) (Tag, error) { |
| // We leave private use identifiers alone. |
| if t.private() { |
| return t, nil |
| } |
| if t.script != 0 && t.region != 0 { |
| if t.lang != 0 { |
| // already fully specified |
| specializeRegion(&t) |
| return t, nil |
| } |
| // Search matches for und-script-region. Note that for these cases |
| // region will never be a group so there is no need to check for this. |
| list := likelyRegion[t.region : t.region+1] |
| if x := list[0]; x.flags&isList != 0 { |
| list = likelyRegionList[x.lang : x.lang+uint16(x.script)] |
| } |
| for _, x := range list { |
| // Deviating from the spec. See match_test.go for details. |
| if scriptID(x.script) == t.script { |
| t.setUndefinedLang(langID(x.lang)) |
| return t, nil |
| } |
| } |
| } |
| if t.lang != 0 { |
| // Search matches for lang-script and lang-region, where lang != und. |
| if t.lang < langNoIndexOffset { |
| x := likelyLang[t.lang] |
| if x.flags&isList != 0 { |
| list := likelyLangList[x.region : x.region+uint16(x.script)] |
| if t.script != 0 { |
| for _, x := range list { |
| if scriptID(x.script) == t.script && x.flags&scriptInFrom != 0 { |
| t.setUndefinedRegion(regionID(x.region)) |
| return t, nil |
| } |
| } |
| } else if t.region != 0 { |
| count := 0 |
| goodScript := true |
| tt := t |
| for _, x := range list { |
| // We visit all entries for which the script was not |
| // defined, including the ones where the region was not |
| // defined. This allows for proper disambiguation within |
| // regions. |
| if x.flags&scriptInFrom == 0 && t.region.Contains(regionID(x.region)) { |
| tt.region = regionID(x.region) |
| tt.setUndefinedScript(scriptID(x.script)) |
| goodScript = goodScript && tt.script == scriptID(x.script) |
| count++ |
| } |
| } |
| if count == 1 { |
| return tt, nil |
| } |
| // Even if we fail to find a unique Region, we might have |
| // an unambiguous script. |
| if goodScript { |
| t.script = tt.script |
| } |
| } |
| } |
| } |
| } else { |
| // Search matches for und-script. |
| if t.script != 0 { |
| x := likelyScript[t.script] |
| if x.region != 0 { |
| t.setUndefinedRegion(regionID(x.region)) |
| t.setUndefinedLang(langID(x.lang)) |
| return t, nil |
| } |
| } |
| // Search matches for und-region. If und-script-region exists, it would |
| // have been found earlier. |
| if t.region != 0 { |
| if i := regionInclusion[t.region]; i < nRegionGroups { |
| x := likelyRegionGroup[i] |
| if x.region != 0 { |
| t.setUndefinedLang(langID(x.lang)) |
| t.setUndefinedScript(scriptID(x.script)) |
| t.region = regionID(x.region) |
| } |
| } else { |
| x := likelyRegion[t.region] |
| if x.flags&isList != 0 { |
| x = likelyRegionList[x.lang] |
| } |
| if x.script != 0 && x.flags != scriptInFrom { |
| t.setUndefinedLang(langID(x.lang)) |
| t.setUndefinedScript(scriptID(x.script)) |
| return t, nil |
| } |
| } |
| } |
| } |
| |
| // Search matches for lang. |
| if t.lang < langNoIndexOffset { |
| x := likelyLang[t.lang] |
| if x.flags&isList != 0 { |
| x = likelyLangList[x.region] |
| } |
| if x.region != 0 { |
| t.setUndefinedScript(scriptID(x.script)) |
| t.setUndefinedRegion(regionID(x.region)) |
| } |
| specializeRegion(&t) |
| if t.lang == 0 { |
| t.lang = _en // default language |
| } |
| return t, nil |
| } |
| return t, ErrMissingLikelyTagsData |
| } |
| |
| func (t *Tag) setTagsFrom(id Tag) { |
| t.lang = id.lang |
| t.script = id.script |
| t.region = id.region |
| } |
| |
| // minimize removes the region or script subtags from t such that |
| // t.addLikelySubtags() == t.minimize().addLikelySubtags(). |
| func (t Tag) minimize() (Tag, error) { |
| t, err := minimizeTags(t) |
| if err != nil { |
| return t, err |
| } |
| t.remakeString() |
| return t, nil |
| } |
| |
| // minimizeTags mimics the behavior of the ICU 51 C implementation. |
| func minimizeTags(t Tag) (Tag, error) { |
| if t.equalTags(und) { |
| return t, nil |
| } |
| max, err := addTags(t) |
| if err != nil { |
| return t, err |
| } |
| for _, id := range [...]Tag{ |
| {lang: t.lang}, |
| {lang: t.lang, region: t.region}, |
| {lang: t.lang, script: t.script}, |
| } { |
| if x, err := addTags(id); err == nil && max.equalTags(x) { |
| t.setTagsFrom(id) |
| break |
| } |
| } |
| return t, nil |
| } |
| |
| func (t Tag) variants() string { |
| if t.pVariant == 0 { |
| return "" |
| } |
| return t.str[t.pVariant:t.pExt] |
| } |
| |
| // variantOrPrivateTagStr returns variants or private use tags. |
| func (t Tag) variantOrPrivateTagStr() string { |
| if t.pExt > 0 { |
| return t.str[t.pVariant:t.pExt] |
| } |
| return t.str[t.pVariant:] |
| } |